[Ocfs2-commits] mfasheh commits r1594 - branches/dlm-glue/src
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Mon Oct 25 20:08:33 CDT 2004
Author: mfasheh
Date: 2004-10-25 20:08:32 -0500 (Mon, 25 Oct 2004)
New Revision: 1594
Removed:
branches/dlm-glue/src/dlm.c
branches/dlm-glue/src/dlm.h
branches/dlm-glue/src/nm.c
branches/dlm-glue/src/nm.h
branches/dlm-glue/src/vote.c
branches/dlm-glue/src/vote.h
Log:
* yoink! We won't be needing these any more...
Deleted: branches/dlm-glue/src/dlm.c
===================================================================
--- branches/dlm-glue/src/dlm.c 2004-10-26 01:06:23 UTC (rev 1593)
+++ branches/dlm-glue/src/dlm.c 2004-10-26 01:08:32 UTC (rev 1594)
@@ -1,732 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlm.c
- *
- * Distributed lock manager. Requests and processes lock votes.
- *
- * Copyright (C) 2002, 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include "ocfs_compat.h"
-
-#include <linux/fs.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/highmem.h>
-#include <linux/random.h>
-
-#include "ocfs_log.h"
-#include "ocfs.h"
-#include "ocfs2.h"
-
-#include "alloc.h"
-#include "dcache.h"
-#include "dlm.h"
-#include "inode.h"
-#include "lockres.h"
-#include "nm.h"
-#include "util.h"
-#include "vote.h"
-
-#include "ocfs_journal.h"
-#include "buffer_head_io.h"
-
-#define WAIT_FOR_VOTE_INCREMENT 200
-
-/* Tracing */
-#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_DLM
-
-/* inode is definitely non NULL */
-static inline int ocfs_wait_for_readonly_drop(ocfs_super *osb, struct inode *inode)
-{
- int status = 0;
- ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
-
- if (ocfs_node_map_is_empty(&lockres->readonly_map))
- return status;
- status = ocfs_drop_readonly_cache_lock(osb, inode, 0);
- return status;
-}
-
-/*
- * ocfs_update_disk_lock()
- * inode is definitely non NULL
- */
-void ocfs_update_disk_lock (ocfs_super * osb,
- struct buffer_head *bh,
- struct inode *inode)
-{
- ocfs2_dinode *fe = NULL;
- ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
-
- LOG_ENTRY ();
-
- fe = (ocfs2_dinode *) bh->b_data;
-
- /* We do our own I/O here to lock out dirty readers from
- * refreshing the bh when we're in the middle of changing
- * it. We shouldn't ever get here if it's a journalled buffer
- * so io_sem is not necessary. */
- if (buffer_jbd(bh)) {
- printk("Ugh, block %llu has the JBD bit set!\n",
- (unsigned long long)bh->b_blocknr);
- BUG();
- }
-
- if ((DISK_LOCK(fe)->dl_master == lockres->master_node_num)
- && (DISK_LOCK(fe)->dl_level == lockres->lock_type))
- goto skip_write;
-
- lock_buffer(bh);
-
- if (buffer_jbd(bh)) {
- printk("Ugh, block %llu has the JBD bit set!\n",
- (unsigned long long)bh->b_blocknr);
- BUG();
- }
-
- DISK_LOCK(fe)->dl_master = lockres->master_node_num;
- DISK_LOCK(fe)->dl_level = lockres->lock_type;
-
- set_buffer_uptodate(bh);
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
- /*
- * mark_buffer_clean() doesn't exist in 2.6.x kernels.
- * Not many places actually used mark_buffer_clean, but
- * at least reiserfs uses clear_buffer_dirty() as
- * a replacment.
- */
- clear_buffer_dirty(bh);
-#else
- mark_buffer_clean(bh);
-#endif
- bh->b_end_io = ocfs_end_buffer_io_sync;
- submit_bh(WRITE, bh);
- wait_on_buffer(bh);
- SET_BH_SEQNUM(inode, bh);
-
-skip_write:
- LOG_EXIT ();
-} /* ocfs_update_disk_lock */
-
-int ocfs_notify_cluster(ocfs_super *osb,
- struct inode *inode,
- u32 message_flags)
-{
- int status = -EAGAIN;
- ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
- u32 flags;
-
- LOG_ENTRY_ARGS("(inode = %llu, flags = 0x%x)\n",
- OCFS_I(inode)->ip_blkno, message_flags);
-
- while (status == -EAGAIN) {
- ocfs_acquire_lockres_write(inode);
-
- flags = message_flags;
- if (ocfs_inode_is_new(osb, inode))
- flags |= FLAG_FAST_PATH_LOCK;
-
- if (ocfs_task_interruptible ()) {
- ocfs_release_lockres_write (inode);
- LOG_TRACE_ARGS("interrupted... inode = %llu\n",
- OCFS_I(inode)->ip_blkno);
- status = -EINTR;
- goto bail;
- }
-
- status = new_lock_function(osb, lockres->lock_type, flags, NULL, inode);
-
- if (status < 0) {
- if (status != -EAGAIN)
- LOG_ERROR_STATUS (status);
- ocfs_release_lockres_write (inode); // ocfs_file_open ocfs_symlink
- if (status == -EAGAIN || status == -ETIMEDOUT) {
- ocfs_sleep (50);
- status = -EAGAIN;
- continue;
- }
-
- goto bail;
- }
- ocfs_release_lockres_write (inode); // ocfs_file_open
- }
-bail:
- LOG_EXIT_STATUS (status);
- return status;
-}
-
-enum {
- invalid_path = 0,
- fast_path,
- become_master,
- get_x,
- wait_for_release,
- master_request,
- num_paths
-};
-
-static const char *lock_path_strs[] = {
- "invalid_path", "fast_path", "become_master",
- "get_x", "wait_for_release", "master_request"
-};
-
-static inline const char * lock_path_str(int lock_path);
-static inline const char * lock_path_str(int lock_path)
-{
- if (lock_path >= num_paths || lock_path <= invalid_path)
- return lock_path_strs[0];
- return lock_path_strs[lock_path];
-}
-
-/*
- * ocfs_acquire_lock()
- * inode is definitely non NULL
- */
-int ocfs_acquire_lock (ocfs_super * osb, __u32 lock_type,
- __u32 flags, struct buffer_head **bh, struct inode *inode)
-{
- int status;
- __u32 updated;
- ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
- int k = 0;
- int no_owner = 0, owner_dead = 0, wait_on_recovery = 0;
- __u32 extra_lock_flags = 0;
- __u64 lock_id;
-
- LOG_ENTRY_ARGS ("(0x%p, %u, %u, 0x%p)\n",
- osb, lock_type, flags, bh);
-
- OCFS_ASSERT(lock_type != OCFS_LKM_NLMODE);
- OCFS_ASSERT(inode);
- OCFS_ASSERT(bh);
- OCFS_ASSERT(!journal_current_handle());
-
- lock_id = OCFS_I(inode)->ip_blkno;
- LOG_TRACE_ARGS("lock_id = %llu\n", lock_id);
-
- flags |= FLAG_ACQUIRE_LOCK;
-
- *bh = sb_getblk(osb->sb, OCFS_I(inode)->ip_blkno);
- if (*bh == NULL) {
- status = -EIO;
- LOG_ERROR_STATUS(status);
- goto finally;
- }
-
- updated = 0;
-again:
- ocfs_acquire_lockres_write (inode);
-
- LOG_TRACE_ARGS("attempting to get lock, pass: %d\n", ++k);
-
- /* if updated = 1 then we've read a valid bh so skip the
- * update_lockres if we can trust it. */
- if (updated && (lockres->master_node_num != osb->node_num))
- updated = 0;
-
- if (!updated) {
- status = ocfs_update_lockres(osb, *bh, inode, 1);
- if (status < 0) {
- ocfs_release_lockres_write (inode);
- LOG_ERROR_STATUS (status);
- goto finally;
- }
- updated = 1;
- }
-
-reevaluate:
- no_owner = (lockres->master_node_num == OCFS_INVALID_NODE_NUM);
-
- /* master node is an invalid node */
- if (unlikely(lockres->master_node_num >= osb->max_nodes && !no_owner)) {
- LOG_ERROR_ARGS("lockres: master_node=%d, owner=%s, lockid=%llu\n",
- lockres->master_node_num, no_owner?"no":"yes",
- lock_id);
- LOG_ERROR_STATUS (status = -EINVAL);
- ocfs_release_lockres_write (inode); // ocfs_acquire_lock
- goto finally;
- }
-
- wait_on_recovery =
- ocfs_node_is_recovering(osb, lockres->master_node_num);
- owner_dead = !(no_owner ||
- ocfs_node_is_alive(&osb->publ_map,
- lockres->master_node_num));
- if (owner_dead || wait_on_recovery) {
- // if owner is dead or in recovery and the lockres
- // has the readonly flag set, clear it
- clear_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
- }
-
- status = 0;
- extra_lock_flags = 0;
-
- if (flags & FLAG_READONLY) {
- if (test_bit(LOCK_STATE_READONLY, &lockres->readonly_state) ||
- (lockres->master_node_num == osb->node_num &&
- lockres->lock_type == OCFS_LKM_EXMODE)) {
- /* already readonly or local node is master */
- /* THIS node will see it as readonly, but OTHER
- * nodes will have to wait until lock_holders drops
- * to 0 (to finish journal flush on this inode) */
-#ifdef VERBOSE_LOCKING_TRACE
- printk("acquire_lock: lockid %llu, setting readonly\n",
- lock_id);
-#endif
- set_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
- goto skip_lock_write;
- }
-
- if (lockres->master_node_num == OCFS_INVALID_NODE_NUM ||
- owner_dead || wait_on_recovery) {
- /* no master or dead master */
- extra_lock_flags = FLAG_REMASTER;
- } else {
- /* valid master, but either not cachelock or elsewhere */
- if (lockres->lock_type != OCFS_LKM_EXMODE) {
- /* treat just like a normal master change request */
- extra_lock_flags = FLAG_CHANGE_MASTER;
- }
- }
- goto do_lock;
- }
-
-#warning NEED MORE HANDLING HERE NOW FOR DROPPING LOCAL READONLY!!!
- // anything else is NOT a readonly request
- if (lockres->master_node_num != osb->node_num)
- clear_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
-
- status = ocfs_wait_for_readonly_drop(osb, inode);
- if (status < 0) {
- if (status == -EAGAIN) {
- // the rodrop thread is already running and needs the lockres
- ocfs_release_lockres_write(inode);
- ocfs_sleep (OCFS_NM_HEARTBEAT_TIME / 10);
- ocfs_acquire_lockres_write(inode);
- goto reevaluate;
- }
- LOG_ERROR_STATUS(status);
- goto finally;
- }
-
- if (ocfs_inode_is_new(osb, inode)) {
- if (lockres->master_node_num != osb->node_num) {
- printk("inode is new, but lockres is out of date! "
- "owner = %d, type = %d\n",
- lockres->master_node_num, lockres->lock_type);
-
- BUG();
- }
- extra_lock_flags |= FLAG_FAST_PATH_LOCK;
- }
-
- /* some lock requests are simple messages and don't require a
- * master change. */
- if (flags & FLAG_TRUNCATE_PAGES)
- goto do_lock;
-
- if ((lockres->master_node_num != osb->node_num)
- && (wait_on_recovery || no_owner || owner_dead)) {
- extra_lock_flags |= FLAG_REMASTER;
- } else if (lockres->master_node_num != osb->node_num) {
- extra_lock_flags |= FLAG_CHANGE_MASTER;
- }
-
-do_lock:
- LOG_TRACE_ARGS("lockres: master=%d, locktype=%d, flags: %08x\n",
- lockres->master_node_num, lockres->lock_type,
- flags|extra_lock_flags);
-
-#ifdef VERBOSE_LOCKING_TRACE
- printk("acquire_lock: lockid=%llu, this=%d, master=%d, locktype=%d, "
- "flags=%08x, readonly=%s\n", lock_id, osb->node_num,
- lockres->master_node_num, lockres->lock_type, flags|extra_lock_flags,
- test_bit(LOCK_STATE_READONLY, &lockres->readonly_state) ? "yes" : "no");
-#endif
- if (wait_on_recovery
- && !((flags|extra_lock_flags) & FLAG_FILE_RECOVERY)) {
- int waitcnt = 0;
- LOG_TRACE_ARGS("Waiting on node %u to be recovered\n",
- lockres->master_node_num);
- while (1) {
- LOG_TRACE_ARGS("waitcnt = %d\n", waitcnt);
- if (!ocfs_node_is_recovering(osb,
- lockres->master_node_num))
- break;
- ocfs_sleep(500);
- }
- }
-
- if (ocfs_task_interruptible ()) {
- ocfs_release_lockres_write (inode);
- LOG_TRACE_ARGS("interrupted... inode %llu\n",
- OCFS_I(inode)->ip_blkno);
- status = -EINTR;
- goto finally;
- }
- status = new_lock_function(osb, lock_type, flags|extra_lock_flags, *bh, inode);
-
- if (status < 0) {
- ocfs_release_lockres_write (inode); // ocfs_acquire_lock
- if (status == -EAGAIN || status == -ETIMEDOUT) {
- if (status == -ETIMEDOUT)
- LOG_ERROR_ARGS("Timed out acquiring lock for inode "
- "%llu, retrying...\n", OCFS_I(inode)->ip_blkno);
- ocfs_sleep (50);
- goto again;
- }
- goto finally;
- }
-
- /* We got the lock */
- status = 0;
-
-skip_lock_write:
- OCFS_ASSERT(status == 0);
- lockres->lock_holders++;
- if ((extra_lock_flags & FLAG_FAST_PATH_LOCK)
- && ((flags & FLAG_FILE_EXTEND) || (flags & FLAG_FILE_TRUNCATE)))
- lockres->uncommitted_holders++;
- LOG_TRACE_ARGS("lockres->lock_holders = %u\n", lockres->lock_holders);
- LOG_TRACE_ARGS("lockres->uncommitted_holders = %u\n",
- lockres->uncommitted_holders);
- ocfs_release_lockres_write (inode); // ocfs_acquire_lock
-
- if (atomic_read(&OCFS_I(inode)->ip_needs_verification)) {
- ocfs2_dinode *fe = (ocfs2_dinode *) (*bh)->b_data;
- status = ocfs_refresh_inode(inode, fe);
- if (status < 0)
- LOG_ERROR_STATUS(status);
- status = 0;
- }
-finally:
-
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_acquire_lock */
-
-
-/*
- * ocfs_release_lock_full()
- * inode is definitely non NULL
- */
-int ocfs_release_lock_full (ocfs_super * osb, __u32 lock_type, __u32 flags, struct inode *inode, __u32 num_ident)
-{
- int status = 0;
- int vote_status = 0;
- ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
- __u64 lock_id;
- __u32 num_to_send;
- ocfs_node_map votemap;
-
- LOG_ENTRY_ARGS ("(0x%p, %u, %u, 0x%p)\n",
- osb, lock_type, flags, lockres);
-
- OCFS_ASSERT(inode);
- OCFS_ASSERT(num_ident);
-
- lock_id = OCFS_I(inode)->ip_blkno;
- LOG_TRACE_ARGS("lock_id = %llu", lock_id);
-
- flags |= FLAG_RELEASE_LOCK;
-
- ocfs_acquire_lockres_write(inode);
-
- if ((lockres->lock_type == OCFS_LKM_EXMODE) &&
- (lockres->master_node_num == osb->node_num) &&
- !(flags & (FLAG_FILE_EXTEND|FLAG_FILE_TRUNCATE))) {
- status = 0;
- goto finally;
- }
-
- if (flags & FLAG_READONLY) {
- if (lockres->lock_type != OCFS_LKM_EXMODE ||
- lockres->master_node_num == OCFS_INVALID_NODE_NUM ||
- !(test_bit(LOCK_STATE_READONLY, &lockres->readonly_state))) {
- LOG_ERROR_ARGS("READONLY release has issues! type=%d, master=%d, readonly=%s\n",
- lockres->lock_type, lockres->master_node_num,
- test_bit(LOCK_STATE_READONLY, &lockres->readonly_state) ? "yes" : "no");
- BUG();
- }
- status = 0;
- goto finally;
- }
-
- OCFS_ASSERT(lockres->uncommitted_holders <= lockres->lock_holders);
-
- num_to_send = num_ident;
- /* we don't want to send over a count for any size change
- * which includes acquires which we also didn't broadcast. */
- if ((flags & FLAG_FILE_EXTEND) || (flags & FLAG_FILE_TRUNCATE)) {
- if (num_ident <= lockres->uncommitted_holders) {
- /* it breaks the rules to send zero or
- * negative lock releases! */
- num_to_send = 0;
- lockres->uncommitted_holders -= num_ident;
- status = 0;
- goto finally;
- }
- num_to_send -= lockres->uncommitted_holders;
- lockres->uncommitted_holders = 0;
- }
-
- OCFS_ASSERT(num_to_send);
-
- ocfs_node_map_dup(osb, &votemap, &osb->publ_map);
- ocfs_node_map_clear_bit(&votemap, osb->node_num);
- if (ocfs_node_map_is_empty(&votemap))
- goto finally;
-
- if (!(flags & FLAG_FILE_UPDATE_OIN))
- goto finally;
-
- status = -EAGAIN;
- while (status == -EAGAIN) {
- // remove dead nodes
- ocfs_node_map_and(&votemap, &osb->publ_map);
- if (ocfs_node_map_is_empty(&votemap)) {
- // last node in map died, so this node gets the lock
- status = 0;
- break;
- }
- status = ocfs_send_dlm_request_msg (osb, lock_id, lock_type, flags,
- &votemap, inode, num_to_send, &vote_status);
- if (status >= 0 || status == -EAGAIN) {
- if (status != -EAGAIN)
- status = vote_status;
-
- if (status >= 0) {
- break;
- } else if (status == -EAGAIN) {
- LOG_TRACE_ARGS ("EAGAIN on net vote, id=%llu\n", lock_id);
- continue;
- } else {
- LOG_ERROR_STATUS (status);
- break;
- }
- } else if (status == -ETIMEDOUT) {
- LOG_TRACE_ARGS ("ETIMEDOUT on net vote, id=%llu\n", lock_id);
- status = -EAGAIN;
-
- LOG_ERROR_ARGS("Timed out releasing lock for inode %llu, retrying...\n", OCFS_I(inode)->ip_blkno);
- ocfs_release_lockres_write(inode);
- ocfs_sleep(200);
- ocfs_acquire_lockres_write(inode);
- continue;
- } else
- LOG_ERROR_STATUS (status);
- }
-
-finally:
- if (lockres->lock_holders - num_ident < 0) {
- printk("About to decrement lock_holders one too many! lockid "
- "= %llu\n", lock_id);
- BUG();
- }
-#warning "is this wise, or shouldn't we be retrying the lock release later?"
- lockres->lock_holders -= num_ident;
- LOG_TRACE_ARGS("lockres->lock_holders = %u\n", lockres->lock_holders);
-
- ocfs_release_lockres_write (inode);
- LOG_EXIT_STATUS (status);
- return (status);
-} /* ocfs_release_lock_full */
-
-/* inode is definitely non NULL */
-int new_lock_function(ocfs_super * osb, __u32 requested_lock, __u32 flags, struct buffer_head *bh, struct inode *inode)
-{
- ocfs_node_map vote_map;
- ocfs2_dinode *fe = NULL;
- __u64 lock_id;
- __u32 lock_type = requested_lock;
- int need_to_zap_buffers = 0, need_lock_write = 1;
- int is_readonly = (flags & FLAG_READONLY) ? 1 : 0;
- int status = 0, vote_status = 0;
- ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
-
- LOG_ENTRY ();
-
- lock_id = OCFS_I(inode)->ip_blkno;
-
- if (flags & FLAG_READONLY) {
- if (flags & (FLAG_CHANGE_MASTER | FLAG_REMASTER)) {
- /* not currently readonly. treat like normal change master. */
- flags &= ~FLAG_READONLY;
- }
- } else if (flags & FLAG_CHANGE_MASTER) {
- /* non-readonly with CHANGE_MASTER should have no readonly flag */
- if (test_bit(LOCK_STATE_READONLY, &lockres->readonly_state)) {
- LOG_ERROR_ARGS("change_master but currently readonly\n");
- clear_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
- }
- }
- if (flags & (FLAG_CHANGE_MASTER | FLAG_REMASTER)) {
- /* on a master change... */
- need_to_zap_buffers = 1; /* need to dump local buffers */
- need_lock_write = 1; /* and rewrite the lock */
- } else if (flags & (FLAG_DROP_READONLY | FLAG_TRUNCATE_PAGES)) {
- need_lock_write = 0;
- need_to_zap_buffers = 0;
- } else if (flags & FLAG_READONLY) {
- need_lock_write = 0;
- need_to_zap_buffers = 1;
- } else if (!bh) {
- need_lock_write = 0;
- need_to_zap_buffers = 0;
- } else {
- fe = (ocfs2_dinode *) bh->b_data;
- /* may not need to rewrite the lock later if
- * we already have a cachelock */
- if ((DISK_LOCK(fe)->dl_master == osb->node_num)
- && (DISK_LOCK(fe)->dl_level != requested_lock))
- need_lock_write = 1;
- else
- need_lock_write = 0;
- need_to_zap_buffers = 0;
- }
-
- /* that's why it's called fast path */
- if (flags & FLAG_FAST_PATH_LOCK)
- goto vote_success;
-
-
-#define BROADCAST_FLAGS (FLAG_FILE_DELETE | FLAG_FILE_RENAME | FLAG_RELEASE_DENTRY | FLAG_FILE_EXTEND | FLAG_FILE_TRUNCATE | FLAG_FILE_UPDATE_OIN | FLAG_TRUNCATE_PAGES | FLAG_DROP_READONLY | FLAG_REMASTER)
-
- /* figure out who to vote with */
- if (flags & BROADCAST_FLAGS) {
- ocfs_node_map_dup(osb, &vote_map, &osb->publ_map); /* broadcast */
- /* only nodes that see this is readonly */
- if (flags & FLAG_DROP_READONLY)
- ocfs_node_map_and(&vote_map, &lockres->readonly_map);
- } else {
- ocfs_node_map_init(osb, &vote_map);
- ocfs_node_map_set_bit(&vote_map, lockres->master_node_num); /* just owner */
- lock_type = lockres->lock_type;
- }
- ocfs_node_map_clear_bit(&vote_map, osb->node_num);
-
- // remove dead nodes
- ocfs_node_map_and(&vote_map, &osb->publ_map);
-
- if (ocfs_node_map_is_empty(&vote_map)) {
- /* As this is the only node alive, make it master of the lock */
- goto vote_success;
- }
-
- status = ocfs_send_dlm_request_msg (osb, lock_id, lock_type,
- flags, &vote_map,
- inode, 1,
- &vote_status);
- if (status >= 0) {
- status = vote_status;
- }
-
- if (status < 0) {
- if (status != -EAGAIN &&
- status != -ETIMEDOUT &&
- status != -EINTR &&
- status != -EBUSY)
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
-vote_success:
- if (need_to_zap_buffers)
- ocfs_inc_inode_seq(osb, inode);
-
- /* just alerting owner on open */
- if (flags & FLAG_TRUNCATE_PAGES)
- goto bail;
-
- /* converted EX to readonly EX */
- if (flags & FLAG_READONLY)
- goto bail;
-
- /* drop readonly should remove anyone who has responded */
- if (flags & FLAG_DROP_READONLY) {
- ocfs_node_map_clear_bits(&lockres->readonly_map, &vote_map);
- goto bail;
- }
-
- /* update the disk lock */
- if (need_lock_write) {
- lockres->lock_type = requested_lock;
- lockres->master_node_num = osb->node_num;
- if (!bh) {
- printk("We're trying to write a lock but I wasn't "
- "passed a buffer: inode %llu, flags %u\n",
- OCFS_I(inode)->ip_blkno, flags);
- BUG();
- }
-
- /* want to refresh the lock from the latest on disk
- * state before writing it back out. */
- status = ocfs_read_block(osb, lock_id, &bh, 0, inode);
- if (!status)
- ocfs_update_disk_lock(osb, bh, inode);
-
- if (status < 0)
- LOG_ERROR_STATUS(status);
-
- atomic_set(&OCFS_I(inode)->ip_needs_verification, 1);
- }
-
-bail:
- /* if we removed FLAG_READONLY above, or converted an
- * EX to readonly, set the readonly state now */
- if (status >= 0 && (is_readonly || flags & FLAG_READONLY)) {
- set_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
- }
-
- LOG_EXIT_STATUS (status);
- return status;
-}
-
-void ocfs_compute_dlm_stats(int status, int vote_status, ocfs_dlm_stats *stats)
-{
- atomic_inc (&stats->total);
- if (status == -ETIMEDOUT)
- atomic_inc (&stats->etimedout);
- else {
- switch (vote_status) {
- case -EAGAIN:
- case FLAG_VOTE_UPDATE_RETRY:
- atomic_inc (&stats->eagain);
- break;
- case -ENOENT:
- case FLAG_VOTE_FILE_DEL:
- atomic_inc (&stats->enoent);
- break;
- case -EBUSY:
- case -ENETUNREACH:
- case FLAG_VOTE_OIN_ALREADY_INUSE:
- atomic_inc (&stats->efail);
- break;
- case 0:
- case FLAG_VOTE_NODE:
- case FLAG_VOTE_OIN_UPDATED:
- atomic_inc (&stats->okay);
- break;
- default:
- atomic_inc (&stats->def);
- break;
- }
- }
-}
Deleted: branches/dlm-glue/src/dlm.h
===================================================================
--- branches/dlm-glue/src/dlm.h 2004-10-26 01:06:23 UTC (rev 1593)
+++ branches/dlm-glue/src/dlm.h 2004-10-26 01:08:32 UTC (rev 1594)
@@ -1,78 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlm.h
- *
- * Function prototypes
- *
- * Copyright (C) 2002, 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef OCFS2_DLM_H
-#define OCFS2_DLM_H
-
-int new_lock_function(ocfs_super *osb, __u32 requested_lock,
- __u32 flags, struct buffer_head *bh,
- struct inode *inode);
-int ocfs_acquire_lock(ocfs_super *osb, __u32 lock_type,
- __u32 flags, struct buffer_head **bh,
- struct inode *inode);
-void ocfs_compute_dlm_stats(int status, int vote_status,
- ocfs_dlm_stats *stats);
-#define ocfs_release_lock(osb, lock_type, flags, inode) \
- ocfs_release_lock_full(osb, lock_type, flags, inode, 1)
-int ocfs_release_lock_full(ocfs_super *osb, __u32 lock_type,
- __u32 flags, struct inode *inode, __u32 num_ident);
-
-#define ocfs_acquire_lock_ro(osb, inode) \
-({ \
- int status; \
- struct buffer_head *junkbh = NULL;\
- status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, FLAG_READONLY, &junkbh, inode);\
- if (junkbh) \
- brelse(junkbh);\
- (status); \
- })
-
-#define ocfs_release_lock_ro(osb, inode) ocfs_release_lock(osb, OCFS_LKM_EXMODE, FLAG_READONLY, inode)
-
-
-void ocfs_get_publish_vote_map(ocfs_super *osb, ocfs_publish *publish,
- ocfs_node_map *vote_map);
-int ocfs_notify_cluster(ocfs_super *osb,
- struct inode *inode,
- u32 message_flags);
-static inline int ocfs_notify_on_rename(ocfs_super *osb, struct inode *inode)
-{
- /* whatcha tryin' to do to us! */
- OCFS_ASSERT(!S_ISDIR(inode->i_mode));
-
- return(ocfs_notify_cluster(osb,
- inode,
- FLAG_RELEASE_DENTRY|FLAG_FILE_RENAME));
-}
-static inline int ocfs_notify_on_open(ocfs_super *osb, struct inode *inode)
-{
- return(ocfs_notify_cluster(osb,
- inode,
- FLAG_TRUNCATE_PAGES));
-}
-void ocfs_update_disk_lock (ocfs_super * osb,
- struct buffer_head *bh,
- struct inode *inode);
-#endif /* OCFS2_DLM_H */
Deleted: branches/dlm-glue/src/nm.c
===================================================================
--- branches/dlm-glue/src/nm.c 2004-10-26 01:06:23 UTC (rev 1593)
+++ branches/dlm-glue/src/nm.c 2004-10-26 01:08:32 UTC (rev 1594)
@@ -1,1150 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * nm.c
- *
- * net and disk process vote, nm thread, etc.
- *
- * Copyright (C) 2002, 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include "ocfs_compat.h"
-
-#include <linux/fs.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/highmem.h>
-#include <linux/bitops.h>
-#include <linux/net.h>
-
-#include "ocfs_log.h"
-#include "ocfs.h"
-#include "ocfs2.h"
-
-#include "alloc.h"
-#include "dlm.h"
-#include "extent_map.h"
-#include "file.h"
-#include "heartbeat.h"
-#include "inode.h"
-#include "lockres.h"
-#include "nm.h"
-#include "util.h"
-#include "vote.h"
-
-#include "ocfs_journal.h"
-#include "buffer_head_io.h"
-
-/* Tracing */
-#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_NM
-
-/* for lack of a better name... protects inode_private->ip_num_extends. */
-static spinlock_t oin_num_ext_lock = SPIN_LOCK_UNLOCKED;
-struct semaphore recovery_list_sem;
-
-static inline int need_write_lock(ocfs_super *osb, ocfs_lock_res *lockres, __u32 flags);
-static inline int get_process_vote_action(ocfs_super * osb, ocfs_lock_res *lockres, __u32 node_num,
- __u32 flags, int *master_alive, int *write_lock,
- int *change_master, struct inode *inode);
-
-static int ocfs_process_vote_pre_change_master(int vote_type, int flags, int *vote_response, struct inode *inode);
-static int ocfs_process_vote_change_master(ocfs_super *osb, int *vote_response, int *status, struct inode *inode, ocfs_lock_res *lockres, __s16 node_num, __u64 lock_id);
-static int ocfs_process_vote_post_change_master(ocfs_super *osb, int vote_type, int flags, int *vote_response, struct inode *inode, ocfs_lock_res *lockres, int *status, __s16 node_num, int *inc_seq);
-static int ocfs_lock_busy(ocfs_super *osb, struct inode *inode, ocfs_lock_res *lockres);
-
-static int _ocfs_drop_readonly_cache_lock(void *arg);
-
-typedef struct _ocfs_ro_cache_drop_ctxt
-{
- ocfs_super *osb;
- ocfs_lock_res *lockres;
- struct inode *inode;
- int yield;
-} ocfs_ro_cache_drop_ctxt;
-
-static void ocfs_mark_inode_for_extend(ocfs_super *osb, struct inode *inode,
- __u32 node_num);
-static void ocfs_clear_inode_for_extend(ocfs_super *osb, struct inode *inode,
- __u32 node_num, u32 num_rel);
-
-static int ocfs_process_inode_delete(struct inode *inode);
-static void ocfs_commit_inode_delete(struct inode *inode);
-
-static const char *process_vote_strings[] = {
- "INVALID_REQUEST", // reply with a NO vote
- "UPDATE_OIN_INODE", // update both oin and inode
- "DELETE_ACQUIRE",// delete or rename request
- "CHANGE_MASTER", // request to change master to requestor
- "NOT_MASTER", // I am not master, retry
- "REMASTER_THIS", // remaster lock to me
- "REMASTER_REQUESTOR", // remaster lock to requestor
- "DROP_READONLY", // RO cachelock needs to convert to RW
- "READONLY",
- "RELEASE_DENTRY",
- "TRUNCATE_PAGES"
-};
-
-/*
- * ocfs_recv_thread()
- *
- */
-int ocfs_recv_thread (void *unused)
-{
- int status = 0;
- ocfs_recv_ctxt *recv_ctxt = NULL;
-
- LOG_ENTRY ();
-
-#define LISTENER_PROCESS_NAME "ocfs2lsnr"
- ocfs_daemonize (LISTENER_PROCESS_NAME, strlen(LISTENER_PROCESS_NAME),
- 1);
-
- OcfsIpcCtxt.task = current;
-
- while (1) {
- recv_ctxt = ocfs_malloc (sizeof (ocfs_recv_ctxt));
- if (recv_ctxt == NULL) {
- LOG_ERROR_STATUS (-ENOMEM);
- goto bail;
- }
-
- memset (recv_ctxt, 0, sizeof (ocfs_recv_ctxt));
- recv_ctxt->msg_len = OCFS_MAX_DLM_PKT_SIZE;
-
- status = ocfs_recv_udp_msg (recv_ctxt);
- if (status < 0) {
- kfree(recv_ctxt);
- if (status != -EBADF) {
- LOG_ERROR_STATUS (status);
- } else {
- /* Thread is being killed. */
- goto finally;
- }
- }
- }
-
-finally:
- /* Flush all scheduled tasks */
- flush_scheduled_work();
-
- if (OcfsIpcCtxt.send_sock) {
- sock_release (OcfsIpcCtxt.send_sock);
- OcfsIpcCtxt.send_sock = NULL;
- }
-
- if (OcfsIpcCtxt.recv_sock) {
- sock_release (OcfsIpcCtxt.recv_sock);
- OcfsIpcCtxt.recv_sock = NULL;
- }
-
- OcfsIpcCtxt.task = NULL;
-
- /* signal main thread of ipcdlm's exit */
- complete (&(OcfsIpcCtxt.complete));
-
-bail:
- LOG_EXIT ();
- return 0;
-} /* ocfs_recv_thread */
-
-// gets a best guess (based on dirty read of lockres)
-// of whether down_read or down_write should be used on lockres
-// NOTE: always RECHECK after getting the lock and follow what
-// get_process_vote_action says
-static inline int need_write_lock(ocfs_super *osb, ocfs_lock_res *lockres, __u32 flags)
-{
- // always need write access to lockres if not master
- if (lockres->master_node_num != osb->node_num)
- return 1;
- // usually need write access for these so just get it
- if (flags & (FLAG_CHANGE_MASTER|FLAG_DROP_READONLY|FLAG_READONLY))
- return 1;
- // nothing else will need it, assuming it didnt just change under us
- return 0;
-}
-
-static inline int get_process_vote_action(ocfs_super * osb, ocfs_lock_res *lockres, __u32 node_num,
- __u32 flags, int *master_alive, int *write_lock,
- int *change_master, struct inode *inode)
-{
- int vote_type = INVALID_REQUEST;
- int my_node_wins = 0;
- int this_node_master = 0;
- __u64 lockid = 0;
- ocfs_vote_obj_lookup_data data;
-
- LOG_ENTRY_ARGS("(node_num=%d, flags=%08x)\n", node_num, flags);
-
- OCFS_ASSERT(inode);
- OCFS_ASSERT(lockres);
-
- lockid = OCFS_I(inode)->ip_blkno;
-
- *change_master = 0;
- *write_lock = 0;
- this_node_master = (lockres->master_node_num == osb->node_num);
- *master_alive = (lockres->master_node_num != OCFS_INVALID_NODE_NUM) &&
- ocfs_node_is_alive(&osb->publ_map, lockres->master_node_num);
-
- // if an outstanding vote request is found on this lockid
- // and this node number is higher, this node wins
- data.func = ocfs_lookup_obj_by_lockid;
- data.u.s.lock_id = lockid;
- data.ret = NULL;
- if (ocfs_lookup_vote_request_obj(osb, &data) == 0)
- my_node_wins = (node_num < osb->node_num);
-
- /* NOTE: FLAG_CHANGE_MASTER may be combined with
- * other flags and result in a process_vote action
- * other than CHANGE_MASTER. the change_master
- * value returned here is independent of this action */
- if (this_node_master && flags & FLAG_CHANGE_MASTER) {
- *write_lock = 1;
- *change_master = 1;
- }
-
- // if this node is not master, we will need to update the lockres
- if (!this_node_master)
- *write_lock = 1;
-
- if (flags & (FLAG_RELEASE_DENTRY | FLAG_FILE_RENAME)) {
- vote_type = RELEASE_DENTRY;
- goto done;
- }
-
- if (flags & FLAG_DROP_READONLY) {
- vote_type = DROP_READONLY;
- *write_lock = 1;
- goto done;
- } else if (flags & FLAG_READONLY) {
- if (this_node_master && lockres->lock_type == OCFS_LKM_EXMODE) {
- vote_type = READONLY;
- *write_lock = 1;
- } else
- vote_type = INVALID_REQUEST;
- goto done;
- }
-
- if (flags & FLAG_FILE_DELETE) {
- if (flags & FLAG_RELEASE_LOCK)
- vote_type = INVALID_REQUEST;
- else if (flags & FLAG_ACQUIRE_LOCK)
- vote_type = DELETE_ACQUIRE;
- else
- vote_type = INVALID_REQUEST;
- } else if (flags & FLAG_FILE_UPDATE_OIN) {
- if ((flags & FLAG_FILE_TRUNCATE) &&
- (flags & FLAG_ACQUIRE_LOCK))
- vote_type = TRUNCATE_PAGES;
- else
- vote_type = UPDATE_OIN_INODE;
- } else if (flags & FLAG_TRUNCATE_PAGES) {
- vote_type = TRUNCATE_PAGES;
- } else if (this_node_master) {
- if (flags & FLAG_CHANGE_MASTER)
- vote_type = CHANGE_MASTER;
- else {
- LOG_TRACE_STR("(INVALID_REQUEST) am master, but no more types");
- vote_type = INVALID_REQUEST;
- }
- } else {
- if (*master_alive)
- vote_type = NOT_MASTER;
- else if (my_node_wins)
- vote_type = REMASTER_THIS;
- else
- vote_type = REMASTER_REQUESTOR;
- }
-
-done:
- LOG_EXIT_STATUS(vote_type);
- return vote_type;
-}
-
-/* this function requires that callers to it be serialized (isn't
- * really a problem as vote_sem does that for us. */
-static void ocfs_mark_inode_for_extend(ocfs_super *osb, struct inode *inode,
- __u32 node_num)
-{
- spin_lock(&oin_num_ext_lock);
-
- if (OCFS_I(inode)->ip_num_extends < 0)
- BUG();
-
- /* this isn't the 1st extend against the inode, so just inc
- * the counter. */
- if (OCFS_I(inode)->ip_num_extends > 0) {
- OCFS_I(inode)->ip_num_extends++;
-
- printk("ocfs_mark_inode_for_extend: inode %llu, num = %d\n",
- OCFS_I(inode)->ip_blkno, OCFS_I(inode)->ip_num_extends);
-
- spin_unlock(&oin_num_ext_lock);
- return;
- }
-
- /* ok, we're going to have to take the extend sem. We can't do
- * this holding ip_node_extend_sem so we drop it and recheck after
- * we've got it. */
- spin_unlock(&oin_num_ext_lock);
-
- /* take the extend_sem on behalf of
- * this other node. It won't be
- * released until he does his last
- * release broadcast. This has the
- * effect of locking out
- * ocfs2_extent_map lookups
- * inode. */
- down_write(&OCFS_I(inode)->ip_node_extend_sem);
-
- atomic_inc(&inode->i_count);
-
- /* Ok, we've still got it open. Put this guy on the recovery
- * list in case the extending node dies. */
- down(&recovery_list_sem);
- spin_lock(&oin_num_ext_lock);
-
- if (OCFS_I(inode)->ip_num_extends < 0)
- BUG();
-
- OCFS_I(inode)->ip_num_extends++;
- list_add_tail(&OCFS_I(inode)->ip_recovery_list,
- &osb->lock_recovery_lists[node_num]);
-
- LOG_TRACE_PROCESS_VOTE("inode %llu, num = %d\n",
- OCFS_I(inode)->ip_blkno, OCFS_I(inode)->ip_num_extends);
-
- spin_unlock(&oin_num_ext_lock);
- up(&recovery_list_sem);
-
- return;
-}
-
-static void ocfs_clear_inode_for_extend(ocfs_super *osb, struct inode *inode,
- __u32 node_num, u32 num_rel)
-{
- int dec = 0;
-
- down(&recovery_list_sem);
- spin_lock(&oin_num_ext_lock);
-
- if ((OCFS_I(inode)->ip_num_extends - (s32) num_rel) < 0) {
- /* We don't force to zero here in order to cover up a
- * bug, but rather because it's perfectly valid for us
- * to get a release with a count > what we've had if
- * we mount after the acquires have been sent. */
-
- LOG_TRACE_PROCESS_VOTE("inode %llu, num_rel of "
- "%d would result in negative count (ip_num_extends "
- "= %d)\n",
- OCFS_I(inode)->ip_blkno, num_rel,
- OCFS_I(inode)->ip_num_extends);
- OCFS_I(inode)->ip_num_extends = 0;
- } else {
- OCFS_I(inode)->ip_num_extends -= num_rel;
- }
-
- LOG_TRACE_PROCESS_VOTE("inode %llu, num = %d\n",
- OCFS_I(inode)->ip_blkno, OCFS_I(inode)->ip_num_extends);
-
- if (!OCFS_I(inode)->ip_num_extends) {
- list_del(&OCFS_I(inode)->ip_recovery_list);
- INIT_LIST_HEAD(&OCFS_I(inode)->ip_recovery_list);
-
- up_write(&OCFS_I(inode)->ip_node_extend_sem);
-
- dec = 1;
- }
-
- spin_unlock(&oin_num_ext_lock);
- up(&recovery_list_sem);
-
- /* we want iputs to happen outside of as many locks as possible. */
- if (dec)
- iput(inode);
-
- return;
-}
-
-
-static int ocfs_process_inode_delete(struct inode *inode)
-{
- int status;
-
- LOG_TRACE_ARGS("DELETE vote on inode %lu, read "
- "lnk_cnt = %u\n", inode->i_ino,
- inode->i_nlink);
-
- /* force this as ours may be out of date. */
- inode->i_nlink = 0;
-
- spin_lock(&OCFS_I(inode)->ip_lock);
- /* vote no if the file is still open. */
- if (OCFS_I(inode)->ip_open_cnt > 0) {
- LOG_TRACE_PROCESS_VOTE("open count = %u\n",
- OCFS_I(inode)->ip_open_cnt);
- spin_unlock(&OCFS_I(inode)->ip_lock);
- status = 0;
- goto done;
- }
- spin_unlock(&OCFS_I(inode)->ip_lock);
-
- /* vote no if someone's extending it. */
- spin_lock(&oin_num_ext_lock);
- if (OCFS_I(inode)->ip_num_extends) {
- spin_unlock(&oin_num_ext_lock);
- LOG_TRACE_PROCESS_VOTE("extends pending\n");
- status = 0;
- goto done;
- }
- spin_unlock(&oin_num_ext_lock);
-
- /* directories are a bit ugly... What if someone is sitting in
- * it? We want to make sure the inode is removed completely as
- * a result of the iput in process_vote. */
- if (S_ISDIR(inode->i_mode) && (atomic_read(&inode->i_count) != 1)) {
- LOG_TRACE_PROCESS_VOTE("i_count = %u\n",
- atomic_read(&inode->i_count));
- status = 0;
- goto done;
- }
-
- status = 1;
-done:
- return(status);
-}
-
-static void ocfs_commit_inode_delete(struct inode *inode)
-{
- SET_INODE_DELETED(inode);
- /* if we vote yes, then we set the SKIP_DELETE
- * flag on the inode so we don't try to delete
- * it in delete_inode ourselves. */
- OCFS_SET_FLAG(OCFS_I(inode)->ip_flags,
- OCFS_INODE_SKIP_DELETE);
-
- d_prune_aliases (inode);
- sync_mapping_buffers(inode->i_mapping);
- ocfs_truncate_inode_pages(inode, 0);
- ocfs2_extent_map_drop(inode, 0);
-}
-
-
-/*
- * ocfs_process_vote()
- *
- * @osb:
- * @publish:
- * @node_num: node asking for the vote
- *
- */
-int ocfs_process_vote (ocfs_super * osb, ocfs_dlm_msg *dlm_msg)
-{
- int status = 0;
- int tmpstat = 0;
- ocfs_lock_res *lockres = NULL;
- __u32 flags, num_ident;
- __u16 num_nodes;
- int vote_type = INVALID_REQUEST, vote_response = 0;
- struct inode *inode = NULL;
- int master_alive = 1, change_master = 0, write_lock = 0;
- int inc_inode_seq = 0;
- int change_master_succeeded = 0;
- __s16 node_num = dlm_msg->src_node;
- __u64 lock_id, seq_num;
- ocfs_dlm_req_master *req_master = NULL;
- int lockres_lock_held = NO_LOCK;
-
- LOG_ENTRY_ARGS ("(0x%p, 0x%p)\n", osb, dlm_msg);
-
- down(&osb->vote_sem);
-
- num_nodes = osb->max_nodes;
-
- req_master = (ocfs_dlm_req_master *)dlm_msg->msg_buf;
- flags = req_master->flags;
- lock_id = req_master->lock_id;
- seq_num = req_master->lock_seq_num;
- num_ident = req_master->num_ident;
-
- if (!num_ident) {
- printk("flags = 0x%x, lock_id = %llu, node_num = %u\n",
- flags, lock_id, node_num);
- BUG();
- }
-
- LOG_TRACE_ARGS ("node=%u, id=%llu, seq=%llu\n", node_num,
- lock_id, seq_num);
-
- /* if we timeout on any of the locks, we want to send a retry
- * instead of letting the other guy's network timeout. */
- vote_response = FLAG_VOTE_UPDATE_RETRY;
-
- if (flags & FLAG_TRUNCATE_PAGES) {
- inode = ocfs_ilookup(osb, lock_id);
- if(!inode) {
- vote_type = TRUNCATE_PAGES;
- goto got_vote_type;
- }
- } else {
- inode = ocfs_iget(osb, lock_id);
- }
-
- if (!inode) {
- status = -ENOENT;
- LOG_ERROR_ARGS("Could not find inode: lock_id = %llu, "
- "node=%u, seq=%llu, flags=0x%x\n",
- lock_id, node_num, seq_num, flags);
- LOG_ERROR_STATUS(status);
- goto vote;
- }
-
- /* ahh, so you find yourself asking "what the
- * heck is this?"
- * Please see the note in ocfs_delete_inode. */
- osb->voting_ino = inode->i_ino;
-
- lockres = GET_INODE_LOCKRES(inode);
-
- // take a good guess...
- // at worst, we will take 2 passes through
- write_lock = need_write_lock(osb, lockres, flags);
-
-retake_lock:
- OCFS_ASSERT(lockres_lock_held == NO_LOCK);
- if (write_lock)
- status = ocfs_acquire_lockres_write_timeout (inode, (OCFS_NM_HEARTBEAT_TIME/2));
- else
- status = ocfs_acquire_lockres_read_timeout (inode, (OCFS_NM_HEARTBEAT_TIME/2));
-
- if (status < 0) {
- LOG_TRACE_ARGS("Timedout locking lockres for id: %llu\n",
- OCFS_I(inode)->ip_blkno);
- goto vote;
- } else
- lockres_lock_held = (write_lock ? WRITE_LOCK : READ_LOCK);
-
- // find out everything now that a lock is held
- vote_type = get_process_vote_action(osb, lockres, node_num, flags,
- &master_alive, &write_lock,
- &change_master, inode);
-
- // bummer. we got the wrong lock. get the write lock and start over.
- if (write_lock && lockres_lock_held == READ_LOCK) {
- ocfs_release_lockres_read(inode);
- lockres_lock_held = NO_LOCK;
- goto retake_lock;
- }
-
- if (lockres->master_node_num != osb->node_num) {
- /* since we pass a NULL bh, this'll only do a read if
- * we're not the master. */
- OCFS_ASSERT(lockres_lock_held == WRITE_LOCK);
- status = ocfs_update_lockres (osb, NULL, inode, 1);
-
- if (status < 0) {
- if (status != -ETIMEDOUT)
- LOG_ERROR_STATUS (status);
- goto leave;
- }
- }
-
-got_vote_type:
-
- LOG_TRACE_PROCESS_VOTE("type: %s, lockid: %llu, action: (%u) %s, num_ident: %u, "
- "alive: %d, write: %d, change: %d, held: %d\n",
- flags & FLAG_RELEASE_LOCK ? "RELEASE" :
- (flags & FLAG_ACQUIRE_LOCK ? "ACQUIRE" : "MODIFY"), lock_id,
- vote_type, process_vote_strings[vote_type], num_ident,
- master_alive, write_lock, change_master, lockres_lock_held);
-
- if (vote_type == INVALID_REQUEST)
- printk("Invalid request! flags = 0x%x master=%d, readonly=%s\n",
- flags, lockres->master_node_num,
- test_bit(LOCK_STATE_READONLY, &lockres->readonly_state) ? "yes" : "no");
-
- /* get_process_vote_action will only allow CHANGE_MASTER on a CACHE lock
- * held by this node. the CHANGE_MASTER path needs to check the readonly
- * map to see if any nodes need to be updated. */
-
- vote_response = FLAG_VOTE_NODE;
-
- if (ocfs_process_vote_pre_change_master(vote_type, flags, &vote_response, inode))
- goto vote;
-
- if (change_master) {
- tmpstat = ocfs_process_vote_change_master(osb, &vote_response, &status,
- inode, lockres, node_num, lock_id);
- if (tmpstat < 0)
- goto leave;
- else if (tmpstat == 1)
- goto vote;
- change_master_succeeded = 1;
- inc_inode_seq = 1;
- }
-
- tmpstat = ocfs_process_vote_post_change_master(osb, vote_type, flags, &vote_response, inode,
- lockres, &status, node_num, &inc_inode_seq);
-
- /* if we made it this far, and change_master, then it had better be voting yes */
- if (change_master && vote_response != FLAG_VOTE_NODE)
- BUG();
-
- if (inode && (flags & (FLAG_FILE_EXTEND|FLAG_FILE_TRUNCATE)) &&
- ((flags & FLAG_ACQUIRE_LOCK && vote_response==FLAG_VOTE_NODE) ||
- (flags & FLAG_RELEASE_LOCK))) {
- LOG_TRACE_ARGS("responding YES to %s %s request, inode=%p, node=%u\n", flags & FLAG_FILE_EXTEND ?
- "extend" : "truncate", flags & FLAG_RELEASE_LOCK ?
- "release" : "acquire", inode, node_num);
-
- if (flags & FLAG_ACQUIRE_LOCK)
- ocfs_mark_inode_for_extend(osb, inode, node_num);
- else if (flags & FLAG_RELEASE_LOCK)
- ocfs_clear_inode_for_extend(osb, inode, node_num,
- num_ident);
- else {
- printk("uhoh, bad vote flags! 0x%x\n", flags);
- BUG();
- }
- }
-
-vote:
- status = ocfs_send_vote_reply(osb, dlm_msg, vote_response);
-
- LOG_TRACE_PROCESS_VOTE("vote: lockid=%llu, node=%d, seqnum=%llu, response=%d\n",
- lock_id, node_num, seq_num, vote_response);
-
- if (status < 0)
- LOG_ERROR_STATUS (status);
- else {
- ocfs_compute_dlm_stats (0, vote_response,
- &(OcfsGlobalCtxt.net_reply_stats));
- ocfs_compute_dlm_stats (0, vote_response,
- &(osb->net_reply_stats));
- }
-
-leave:
- if (lockres_lock_held == READ_LOCK)
- ocfs_release_lockres_read (inode);
- else if (lockres_lock_held == WRITE_LOCK)
- ocfs_release_lockres_write (inode);
- lockres_lock_held = NO_LOCK;
-
- if (!inode)
- goto no_inode_leave;
-
- if (inc_inode_seq) {
- ocfs_inc_inode_seq(osb, inode);
- sync_mapping_buffers(inode->i_mapping);
- }
- iput(inode);
-
-no_inode_leave:
- osb->voting_ino = 0;
-
- up(&osb->vote_sem);
-
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_process_vote */
-
-
-/* some lock requests need to be processed before a possible
- * change master. Beware however that the change_master might
- * very well send a no vote, so you can't do things here that
- * cannot be rolled back. */
-
-/* Returns: 1 if process_vote should vote immediately, 0 otherwise */
-
-static int ocfs_process_vote_pre_change_master(int vote_type, int flags, int *vote_response, struct inode *inode)
-{
- if (vote_type == DELETE_ACQUIRE) {
- LOG_TRACE_STR("DELETE_ACQUIRE (part one)");
- if (!ocfs_process_inode_delete(inode)) {
- *vote_response = FLAG_VOTE_OIN_ALREADY_INUSE;
- return 1;
- }
- *vote_response = FLAG_VOTE_NODE;
- return 0;
- }
- if (vote_type == TRUNCATE_PAGES) {
- LOG_TRACE_STR("TRUNCATE_PAGES");
- *vote_response = FLAG_VOTE_NODE;
- if (inode) {
- if (ocfs_sync_inode(inode) < 0) {
- LOG_ERROR_ARGS("sync inode failed for inode %lu!\n", inode->i_ino);
- BUG();
- }
- ocfs_truncate_inode_pages(inode, 0);
- spin_lock(&OCFS_I(inode)->ip_lock);
-
- /* truncate may send this */
- if (flags & FLAG_FILE_UPDATE_OIN)
- atomic_set(&OCFS_I(inode)->ip_needs_verification, 1);
- spin_unlock(&OCFS_I(inode)->ip_lock);
-
- /* Do we need this? */
- ocfs2_extent_map_drop(inode, 0);
- }
- return 0;
- }
-
- *vote_response = 0;
- if (vote_type == INVALID_REQUEST) {
- /* we catch INVALID_REQUEST up here now as we
- * don't want to do a change_master on a
- * messed up vote... */
- LOG_TRACE_STR("INVALID_REQUEST");
- return 1;
- }
- return 0;
-}
-
-
-
-
-static int ocfs_lock_busy(ocfs_super *osb, struct inode *inode, ocfs_lock_res *lockres)
-{
- /* requestor will need to retry if anyone is using the lockres */
- if (lockres->lock_holders > 0) {
- LOG_TRACE_PROCESS_VOTE("Lock id (%llu) has %u holders\n",
- OCFS_I(inode)->ip_blkno, lockres->lock_holders);
- // kick the commit thread
- atomic_set(&osb->flush_event_woken, 1);
- wake_up(&osb->flush_event);
-
- return 1;
- }
- return 0;
-}
-
-
-/* Returns: <0 if an I/O error occurred,
- * 1 if process_vote should vote immediately,
- * 0 if change master succeeded */
-
-static int ocfs_process_vote_change_master(ocfs_super *osb, int *vote_response, int *status, struct inode *inode,
- ocfs_lock_res *lockres, __s16 node_num, __u64 lock_id)
-{
- struct buffer_head *fe_bh = NULL;
-
- /* lockres is held with down_write throughout this call */
-
- LOG_TRACE_STR("CHANGE_MASTER");
- LOG_TRACE_PROCESS_VOTE("doing CHANGE_MASTER for this request\n");
-
- if (ocfs_lock_busy(osb, inode, lockres)) {
- *vote_response = FLAG_VOTE_UPDATE_RETRY;
- *status = 0;
- return 1;
- }
-
- /* this is currently a readonly EX lock.
- * need to communicate to all the nodes in the
- * map that lock will be changing to RW before we
- * continue. RETRY this request while we spawn
- * off a thread to collect up the communication */
- if (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
- // assumption: node asking for vote has already dropped readonly
- ocfs_node_map_clear_bit(&lockres->readonly_map, node_num);
- // should not be in there, but...
- ocfs_node_map_clear_bit(&lockres->readonly_map, osb->node_num);
- if (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
- OCFS_ASSERT(test_bit(LOCK_STATE_READONLY, &lockres->readonly_state) &&
- lockres->master_node_num == osb->node_num);
- OCFS_ASSERT(inode);
- *status = ocfs_drop_readonly_cache_lock(osb, inode, 1);
- if (*status < 0)
- LOG_ERROR_STATUS(*status);
- LOG_TRACE_PROCESS_VOTE("node map not empty on RO drop request\n");
- *vote_response = FLAG_VOTE_UPDATE_RETRY;
- // did not change master, send response
- return 1;
- }
- // noone left in map, so continue
- clear_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
- }
-
- sync_mapping_buffers(inode->i_mapping);
-
-#warning do we need a truncate_inode_pages here?
- if (lockres->master_node_num != osb->node_num) {
- printk("are we giving away a lock we don't own!?! "
- "inode %llu\n", OCFS_I(inode)->ip_blkno);
- BUG();
- }
-
- *status = ocfs_read_block(osb, lock_id, &fe_bh, OCFS_BH_CACHED, inode);
- if (*status < 0) {
- LOG_ERROR_STATUS ((*status));
- return *status;
- }
- lockres->master_node_num = node_num;
- lockres->lock_type = OCFS_LKM_NLMODE;
- ocfs_update_disk_lock(osb, fe_bh, inode);
- brelse(fe_bh);
- *vote_response = FLAG_VOTE_NODE;
- *status = 0;
-
- // master successfully changed
- return 0;
-}
-
-
-
-/* Returns: 1 if process_vote should vote immediately,
- * 0 on success */
-
-/* we can't have any of these cases failing if the change master already succeeded */
-static int ocfs_process_vote_post_change_master(ocfs_super *osb, int vote_type, int flags, int *vote_response, struct inode *inode, ocfs_lock_res *lockres, int *status, __s16 node_num, int *inc_seq)
-{
- switch (vote_type) {
- case TRUNCATE_PAGES:
- case CHANGE_MASTER:
- /* we dealt with this all above. */
- break;
-
- case UPDATE_OIN_INODE:
- LOG_TRACE_STR("UPDATE_OIN_INODE");
- atomic_set(&OCFS_I(inode)->ip_needs_verification, 1);
- *vote_response = FLAG_VOTE_OIN_UPDATED;
- break;
-
- case RELEASE_DENTRY:
- OCFS_ASSERT(inode);
-
- /* we always vote yes on this one. */
- *vote_response = FLAG_VOTE_NODE;
-
- /* do nothing in the release case... hmm,
- * perhaps we should just do a verify_update
- * or something in case the guy aborted... */
- if (flags & FLAG_RELEASE_LOCK)
- break;
-
- d_prune_aliases (inode);
-
- /* for rename, we don't drop link counts */
- if (!(flags & FLAG_FILE_RENAME)) {
- if (S_ISDIR(inode->i_mode))
- inode->i_nlink = 0;
- else
- inode->i_nlink--;
- }
-
- LOG_TRACE_ARGS("pruned dentries for inode %lu, nlink = %u\n",
- inode->i_ino, inode->i_nlink);
- break;
-
- case DELETE_ACQUIRE:
- LOG_TRACE_STR("DELETE_ACQUIRE (part two)");
- /* If we got this far, then we assume we've
- * done the 1st part of the DELETE_ACQUIRE
- * case and we just have to commit it. */
- if (*vote_response != FLAG_VOTE_NODE)
- BUG();
-
- ocfs_commit_inode_delete(inode);
- break;
-
- case READONLY:
- LOG_TRACE_STR("READONLY");
- // WRITELOCK
- OCFS_ASSERT(!(test_bit(LOCK_STATE_READONLY, &lockres->readonly_state)) ||
- lockres->master_node_num == osb->node_num);
-
- if (ocfs_lock_busy(osb, inode, lockres)) {
- *vote_response = FLAG_VOTE_UPDATE_RETRY;
- *status = 0;
- return 1;
- }
-
- // if the requestor just wants to do readonly, we
- // drop our buffers, so switch to readonly and done
- sync_mapping_buffers(inode->i_mapping);
-
- ocfs_node_map_set_bit(&lockres->readonly_map, node_num);
- set_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
- *vote_response = FLAG_VOTE_NODE;
- *status = 0;
- break;
-
- case DROP_READONLY:
- /* TODO: may need locking in here to lock out
- * the actual IO that a readdir may have in
- * progress, if it's possible to have a corrupt
- * readdir. for now, skip it.
- * NOTE: can't just take io_sem because lock order
- * needs to be io_sem->lockres... would have to
- * drop lockres, take io_sem, take lockres, then
- * recheck all the conditions to see if still
- * appropriate, then do the work and drop both.
- * seems like a lot of work. almost as many lines
- * of code as there are lines of comments right here.
- */
-
- /* this path should always succeed on the vote *
- * even in the error case. do nothing for error. */
-
- // WRITELOCK
- if (lockres->master_node_num != node_num ||
- lockres->lock_type != OCFS_LKM_EXMODE ||
- !ocfs_node_map_is_empty(&lockres->readonly_map))
- LOG_ERROR_ARGS("(drop-ro) master=%d node_num=%d locktype=%d readonly=%s\n",
- lockres->master_node_num, node_num, lockres->lock_type,
- test_bit(LOCK_STATE_READONLY, &lockres->readonly_state) ? "yes" : "no");
- else
- clear_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
-
- *status = 0;
- *vote_response = FLAG_VOTE_NODE;
- *inc_seq = 1;
- break;
-
- case NOT_MASTER:
- LOG_TRACE_STR("NOT_MASTER");
- *vote_response = FLAG_VOTE_UPDATE_RETRY;
- if (flags & (FLAG_FILE_EXTEND|FLAG_FILE_TRUNCATE) &&
- lockres->master_node_num == node_num) {
- LOG_TRACE_STR("owner is requesting extend/truncate");
- *vote_response = FLAG_VOTE_NODE;
- }
- break;
-
- case REMASTER_THIS:
- LOG_TRACE_STR("REMASTER_THIS");
- *vote_response = FLAG_VOTE_UPDATE_RETRY;
- break;
-
- case REMASTER_REQUESTOR:
- LOG_TRACE_STR("REMASTER_REQUESTOR");
- *vote_response = FLAG_VOTE_NODE;
- break;
-
- case INVALID_REQUEST:
- default:
- LOG_TRACE_STR("INVALID_REQUEST");
- *vote_response = 0;
- break;
- }
- return 0;
-}
-
-
-/* inode is definitely non NULL */
-void ocfs_inc_inode_seq(ocfs_super *osb, struct inode *inode)
-{
- atomic_t *seq = GET_INODE_CLEAN_SEQ(inode);
-
- LOG_TRACE_ARGS("incrementing inode seq... current is %d\n",
- atomic_read(seq));
-
- /* wrap to ONE after 13 bits, will need a spinlock */
- spin_lock (&osb->clean_buffer_lock);
- if ((atomic_read(&osb->clean_buffer_seq)+1) % STATE_BIT_MAX == 0)
- atomic_set(&osb->clean_buffer_seq, 1);
- else
- atomic_inc(&osb->clean_buffer_seq);
- spin_unlock (&osb->clean_buffer_lock);
-
- /* doesn't matter if this another process */
- /* has already incremented the global seq */
- atomic_set(seq, atomic_read(&osb->clean_buffer_seq));
-
- LOG_TRACE_ARGS("done incrementing inode seq... new is %d\n",
- atomic_read(seq));
-}
-
-
-void ocfs_recover_oin_locks(ocfs_super *osb, __u32 node_num)
-{
- struct list_head *iter, *temp;
- struct inode *inode;
- ocfs_inode_private *i;
-
- LOG_ENTRY_ARGS("(node_num = %u)\n", node_num);
-
-start:
- down(&recovery_list_sem);
- list_for_each_safe (iter, temp, &osb->lock_recovery_lists[node_num]) {
- i = list_entry (iter, ocfs_inode_private, ip_recovery_list);
-
- inode = i->ip_inode;
- spin_lock(&oin_num_ext_lock);
-
- if (OCFS_I(inode)->ip_num_extends) {
- OCFS_I(inode)->ip_num_extends = 0;
- list_del(&OCFS_I(inode)->ip_recovery_list);
- INIT_LIST_HEAD(&OCFS_I(inode)->ip_recovery_list);
- up_write(&OCFS_I(inode)->ip_node_extend_sem);
-
- spin_unlock(&oin_num_ext_lock);
- up (&recovery_list_sem);
- iput(inode);
- goto start;
- } else
- LOG_ERROR_STR("oin is in recovery list, but has zero extend counter value!");
-
- spin_unlock(&oin_num_ext_lock);
- }
-
- up (&recovery_list_sem);
-
- LOG_EXIT();
-}
-
-static int _ocfs_drop_readonly_cache_lock_thread(void *arg);
-
-/* inode is definitely non NULL */
-int ocfs_drop_readonly_cache_lock(ocfs_super *osb, struct inode *inode, int yield)
-{
- ocfs_ro_cache_drop_ctxt *arg;
- int status = 0;
- ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
-
- if (test_bit(LOCK_STATE_READONLY_DROPPING, &lockres->readonly_state)) {
- // if coming from process_vote, go about our merry way
- if (yield)
- return 0;
- // if coming from acquire_lock, we are holding the
- // lockres and the rodrop thread needs it.
- // return -EAGAIN to drop and try again.
- return -EAGAIN;
- }
-
- arg = kmalloc(sizeof(ocfs_ro_cache_drop_ctxt), GFP_KERNEL);
- if (arg == NULL)
- return -ENOMEM;
-
- atomic_inc(&inode->i_count);
- arg->osb = osb;
- arg->lockres = lockres;
- arg->inode = inode;
- arg->yield = yield;
-
- if (yield)
- kernel_thread(_ocfs_drop_readonly_cache_lock_thread, arg,
- CLONE_VM | CLONE_FS | CLONE_FILES);
- else
- status = _ocfs_drop_readonly_cache_lock(arg);
-
- return status;
-}
-
-static int _ocfs_drop_readonly_cache_lock(void *arg)
-{
- ocfs_ro_cache_drop_ctxt *ctxt = arg;
- ocfs_super *osb = ctxt->osb;
- ocfs_lock_res *lockres = ctxt->lockres;
- struct inode *inode = ctxt->inode;
- int status = 0;
- int yield = ctxt->yield;
-
- /* this will wait until process_vote gets to the release */
- if (yield)
- ocfs_acquire_lockres_write(inode);
- /* check these under the lock */
- if (!(test_bit(LOCK_STATE_READONLY, &lockres->readonly_state)) ||
- lockres->master_node_num != osb->node_num ||
- lockres->lock_type != OCFS_LKM_EXMODE) {
- LOG_ERROR_ARGS("inode %llu: bad RO lockres! this=%d, readonly=%s, master=%d, locktype=%u\n", OCFS_I(inode)->ip_blkno,
- osb->node_num,
- test_bit(LOCK_STATE_READONLY, &lockres->readonly_state) ? "yes" : "no",
- lockres->master_node_num, lockres->lock_type);
- status = -EINVAL;
- goto leave;
- }
-
- if (test_bit(LOCK_STATE_READONLY_DROPPING, &lockres->readonly_state)) {
- status = 0;
- goto leave;
- }
-
- set_bit(LOCK_STATE_READONLY_DROPPING, &lockres->readonly_state);
- /* remove this node */
- ocfs_node_map_clear_bit(&lockres->readonly_map, osb->node_num);
-
- status = 0;
- while (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
- /* remove all dead nodes */
- ocfs_node_map_and(&lockres->readonly_map, &osb->publ_map);
- status = new_lock_function(osb, OCFS_LKM_EXMODE, FLAG_DROP_READONLY,
- NULL, inode);
- if (status == -EAGAIN) {
- status = 0;
- if (yield) {
- /* from nm thread, give some time to waiters */
- ocfs_release_lockres_write(inode);
- ocfs_sleep(50);
- ocfs_acquire_lockres_write(inode);
- }
- continue;
- }
- if (status < 0)
- LOG_ERROR_STATUS (status);
- break;
- }
-
- if (ocfs_node_map_is_empty(&lockres->readonly_map) &&
- test_bit(LOCK_STATE_READONLY, &lockres->readonly_state) &&
- lockres->master_node_num == osb->node_num)
- clear_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
-
- clear_bit(LOCK_STATE_READONLY_DROPPING, &lockres->readonly_state);
-
-leave:
- if (yield)
- ocfs_release_lockres_write(inode); // ocfs_process_vote ocfs_acquire_lock
-
- if (inode)
- iput(inode);
- kfree(arg);
-
- return status;
-}
-
-static int _ocfs_drop_readonly_cache_lock_thread(void *arg)
-{
- int status = 0;
- siginfo_t info;
-
-#define OCFS_DROP_RO_THREAD_NAME "ocfs2dropro"
-
- ocfs_daemonize (OCFS_DROP_RO_THREAD_NAME,
- strlen(OCFS_DROP_RO_THREAD_NAME),
- 0);
- status = _ocfs_drop_readonly_cache_lock(arg);
-
- /* ignore the actual signal */
- if (signal_pending(current)) {
- dequeue_signal_lock(current, ¤t->blocked, &info);
- }
-
- /* Flush all scheduled tasks */
- flush_scheduled_work();
-
-#warning need a way to wait on all of these threads on dismount
-/*
- * The way to do this is to create a wait queue on the osb. When one of
- * these guys start, you bump a counter. When it ends, it decrements
- * the counter and wake_up()s the wait queue. The counter probably can
- * be protected by a spinlock on the OSB. The dismount handling just
- * waits on that wait queue until readonly_threads == 0.
- */
- return status;
-}
Deleted: branches/dlm-glue/src/nm.h
===================================================================
--- branches/dlm-glue/src/nm.h 2004-10-26 01:06:23 UTC (rev 1593)
+++ branches/dlm-glue/src/nm.h 2004-10-26 01:08:32 UTC (rev 1594)
@@ -1,36 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * nm.h
- *
- * Function prototypes
- *
- * Copyright (C) 2002, 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef OCFS2_NM_H
-#define OCFS2_NM_H
-
-int ocfs_drop_readonly_cache_lock(ocfs_super *osb, struct inode *inode,
- int yield);
-void ocfs_inc_inode_seq(ocfs_super *osb, struct inode *inode);
-int ocfs_process_vote (ocfs_super * osb, ocfs_dlm_msg *dlm_msg);
-int ocfs_recv_thread(void *unused);
-void ocfs_recover_oin_locks(ocfs_super *osb, __u32 node_num);
-
-#endif /* OCFS2_NM_H */
Deleted: branches/dlm-glue/src/vote.c
===================================================================
--- branches/dlm-glue/src/vote.c 2004-10-26 01:06:23 UTC (rev 1593)
+++ branches/dlm-glue/src/vote.c 2004-10-26 01:08:32 UTC (rev 1594)
@@ -1,1149 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * vote.c
- *
- * netdlm listener, receive, verify and send messages
- *
- * Copyright (C) 2002, 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include "ocfs_compat.h"
-
-#include <linux/fs.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/inet.h>
-#include <linux/net.h>
-#include <linux/in.h>
-
-#include <asm/uaccess.h>
-
-#include "ocfs_log.h"
-#include "ocfs.h"
-
-#include "dlm.h"
-#include "nm.h"
-#include "util.h"
-#include "vote.h"
-
-/* Tracing */
-#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_VOTE
-
-
-
-ocfs_ipc_ctxt OcfsIpcCtxt;
-
-static const char vote_state_str[] = { 'U', 'S', 'P', 'F', 'D' };
-
-static ocfs_vote_obj * ocfs_alloc_vote_obj (ocfs_super *osb, int bytes, __u32 reqlock, ocfs_node_map * votemap);
-static void ocfs_dlm_recv_msg (void *val);
-static int ocfs_check_ipc_msg (__u8 * msg, __u32 msg_len);
-static int ocfs_comm_process_vote_reply (ocfs_super * osb, ocfs_dlm_msg * dlm_msg);
-static int ocfs_comm_process_msg (__u8 * msg);
-static void ocfs_init_dlm_msg (ocfs_super * osb, ocfs_dlm_msg * dlm_msg, __u32 msg_len, __u32 type);
-
-static int ocfs_send_bcast (ocfs_super * osb, ocfs_node_map *votemap, ocfs_dlm_msg * dlm_msg);
-static int ocfs_node_map_stringify(ocfs_node_map *map, char **str);
-
-static void ocfs_put_vote_obj(ocfs_vote_obj *obj)
-{
- if (atomic_dec_and_test(&obj->refcount))
- kfree(obj);
-}
-
-static void ocfs_get_vote_obj(ocfs_vote_obj *obj)
-{
- atomic_inc(&obj->refcount);
-}
-
-
-/*
- * ocfs_recv_udp_msg()
- *
- */
-int ocfs_recv_udp_msg (ocfs_recv_ctxt * recv_ctxt)
-{
- int status = -ENETUNREACH, error;
- mm_segment_t oldfs;
- struct sockaddr_in sin;
- struct iovec iov = {
- .iov_len = recv_ctxt->msg_len,
- .iov_base = recv_ctxt->msg
- };
- struct msghdr msg = {
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_iovlen = 1,
- .msg_iov = &iov,
- .msg_name = (struct sockaddr *) &sin,
- .msg_namelen = sizeof (sin),
- .msg_flags = 0
- };
-
- LOG_ENTRY ();
-
- /* Initialize the workitem with our worker routine and Q it. */
- INIT_WORK (&recv_ctxt->ipc_wq, ocfs_dlm_recv_msg, recv_ctxt);
-
- memset (&sin, 0, sizeof (sin));
- oldfs = get_fs ();
- set_fs (get_ds ());
- error = sock_recvmsg (OcfsIpcCtxt.recv_sock, &msg, recv_ctxt->msg_len, msg.msg_flags);
- set_fs (oldfs);
-
- if (error < 0) {
- if (error == -ERESTARTSYS) {
- status = -EBADF;
- LOG_TRACE_STR ("Shutting down ocfs2lsnr");
- } else {
- status = -ENETUNREACH;
- LOG_ERROR_ARGS ("unable to recvmsg, error=%d", error);
- LOG_ERROR_STATUS (status);
- }
- goto bail;
- } else if (msg.msg_namelen) {
- recv_ctxt->msg_len = iov.iov_len;
- status = 0;
- }
-
- LOG_TRACE_ARGS ("Received packet from: %d.%d.%d.%d\n",
- NIPQUAD (sin.sin_addr.s_addr));
-
- if (status == 0)
- schedule_work(&recv_ctxt->ipc_wq);
-
-bail:
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_recv_udp_msg */
-
-
-static inline int ocfs2_comm_ip_version_to_family(u16 ip_version)
-{
- switch (ntohs(ip_version)) {
- case 4:
- return PF_INET;
- case 6:
- return PF_INET6;
- default:
- BUG();
- }
-
- return 4;
-}
-
-/*
- * ocfs_send_bcast()
- *
- */
-static int ocfs_send_bcast (ocfs_super * osb, ocfs_node_map *votemap, ocfs_dlm_msg * dlm_msg)
-{
- int status = 0, error;
- __s16 num;
- BARF_BARF_BARF *node;
- struct sockaddr_in sin;
- mm_segment_t oldfs;
-
- LOG_ENTRY ();
-
- oldfs = get_fs ();
- for (num=0; num<osb->max_nodes; num++) {
- if (num == osb->node_num)
- continue;
-
- if (!ocfs_node_map_test_bit(votemap, num))
- continue;
-
- node = osb->node_cfg_info[num];
- if (!node)
- continue;
-
- LOG_TRACE_ARGS("Sending msg to node=%u, name=%s\n",
- num, node->node_name);
- memset (&sin, 0, sizeof (sin));
- sin.sin_family = ocfs2_comm_ip_version_to_family(node->ipc_config.ip_version);
- sin.sin_addr.s_addr = node->ipc_config.addr_u.ip_addr4;
- sin.sin_port = node->ipc_config.ip_port;
-
- LOG_TRACE_ARGS("about to send to 0x%08u:%u\n",
- ntohl(node->ipc_config.addr_u.ip_addr4),
- ntohs(node->ipc_config.ip_port));
-
- status = -ENETUNREACH;
- if (OcfsIpcCtxt.send_sock) {
- struct iovec iov = {
- .iov_base = dlm_msg,
- .iov_len = dlm_msg->msg_len
- };
- struct msghdr msg = {
- .msg_iov = &iov,
- .msg_iovlen = 1,
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_name = (struct sockaddr *) &sin,
- .msg_namelen = sizeof (sin),
- .msg_flags = 0
- };
-
- status = 0;
- set_fs (get_ds ());
- error = sock_sendmsg (OcfsIpcCtxt.send_sock, &msg, dlm_msg->msg_len);
- set_fs (oldfs);
-
- if (error < 0) {
- LOG_ERROR_ARGS ("unable to sendmsg, error=%d", error);
- status = -ENETUNREACH;
- }
- }
- if (status < 0)
- LOG_ERROR_STATUS (status);
- }
-
- status = 0;
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_send_bcast */
-
-/*
- * ocfs_init_udp_sock()
- *
- */
-int ocfs_init_udp_sock(struct socket **send_sock,
- struct socket **recv_sock)
-{
- struct sockaddr_in sin;
- int status;
- ocfs_comm_info *comm;
-
- LOG_ENTRY ();
-
- comm = &(OcfsGlobalCtxt.comm_info);
-
- /* Create Send Socket */
- status = sock_create(ocfs2_comm_ip_version_to_family(comm->ip_version),
- SOCK_DGRAM, IPPROTO_UDP,
- send_sock);
- if (status < 0) {
- LOG_ERROR_ARGS ("unable to create socket, error=%d", status);
- goto bail;
- }
-
- /* Bind Send Socket */
- memset(&sin, 0, sizeof (sin));
- sin.sin_family = ocfs2_comm_ip_version_to_family(comm->ip_version);
- sin.sin_addr.s_addr = htonl (INADDR_ANY);
- sin.sin_port = htons(0);
-
- status = (*send_sock)->ops->bind(*send_sock,
- (struct sockaddr *)&sin,
- sizeof(sin));
- if (status < 0) {
- LOG_ERROR_ARGS ("unable to bind socket, error=%d", status);
- goto bail;
- }
-
- /* Create Receive Socket */
- status = sock_create(ocfs2_comm_ip_version_to_family(comm->ip_version),
- SOCK_DGRAM, IPPROTO_UDP,
- recv_sock);
- if (status < 0) {
- LOG_ERROR_ARGS ("unable to create socket, error=%d", status);
- goto bail;
- }
-
-
- /* Bind Receive Socket */
- memset(&sin, 0, sizeof(sin));
- sin.sin_family = ocfs2_comm_ip_version_to_family(comm->ip_version);
- sin.sin_addr.s_addr = htonl(INADDR_ANY);
- sin.sin_port = comm->ip_port;
-
- status = (*recv_sock)->ops->bind(*recv_sock,
- (struct sockaddr *)&sin,
- sizeof(sin));
- if (status < 0) {
- LOG_ERROR_ARGS ("unable to bind socket, error=%d", status);
- goto bail;
- }
-
-bail:
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_init_udp_sock */
-
-
-/*
- * ocfs_send_vote_reply()
- *
- */
-int ocfs_send_vote_reply (ocfs_super * osb, ocfs_dlm_msg * dlm_msg, __u32 vote_status)
-{
- ocfs_dlm_req_master *req_master;
- ocfs_dlm_reply_master *reply_master;
- ocfs_dlm_msg *send_dlm_msg;
- ocfs_vote_obj *obj;
- int status = 0;
- __u8 *buf;
- __u32 msg_len, obj_len;
- ocfs_node_map vote_map;
-
- LOG_ENTRY ();
-
- ocfs_node_map_init(osb, &vote_map);
-
- req_master = (ocfs_dlm_req_master *) dlm_msg->msg_buf;
-
- msg_len = sizeof (ocfs_dlm_msg) + sizeof (ocfs_dlm_reply_master);
- obj_len = sizeof (ocfs_vote_obj) + sizeof (ocfs_dlm_reply_master);
-
- obj = ocfs_alloc_vote_obj (osb, obj_len, 0, NULL);
- if (obj == NULL) {
- LOG_ERROR_STATUS (status = -ENOMEM);
- goto finally;
- }
- buf = (__u8 *)&(obj->m);
- send_dlm_msg = (ocfs_dlm_msg *)buf;
- reply_master = (ocfs_dlm_reply_master *) send_dlm_msg->msg_buf;
-
- ocfs_init_dlm_msg (osb, send_dlm_msg, msg_len, OCFS_VOTE_REPLY);
-
- reply_master->h.lock_id = req_master->lock_id;
- reply_master->status = vote_status;
- reply_master->h.lock_seq_num = req_master->lock_seq_num;
- reply_master->h.flags = req_master->flags;
-
- ocfs_node_map_set_bit(&vote_map, dlm_msg->src_node);
- ocfs_node_map_set(&obj->req_vote_map, &vote_map);
-
- spin_lock(&osb->vote_obj_queue_lock);
- list_add_tail(&obj->list, &osb->vote_obj_queue);
- spin_unlock(&osb->vote_obj_queue_lock);
-
- ocfs_send_bcast (osb, &vote_map, send_dlm_msg);
- spin_lock (&obj->lock);
- obj->vote_state = VOTE_OBJ_STATE_SENT;
- spin_unlock (&obj->lock);
-
- // silly ;-)
- spin_lock (&obj->lock);
- obj->vote_state = VOTE_OBJ_STATE_DESTROYING;
- spin_unlock (&obj->lock);
-
- spin_lock(&osb->vote_obj_queue_lock);
- list_del(&obj->list);
- spin_unlock(&osb->vote_obj_queue_lock);
-
-finally:
- ocfs_put_vote_obj (obj);
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_send_vote_reply */
-
-
-/*
- * ocfs_check_ipc_msg()
- *
- */
-int ocfs_check_ipc_msg (__u8 * msg, __u32 msg_len)
-{
- int ret = 0;
- ocfs_dlm_msg *dlm_msg;
-
- LOG_ENTRY ();
-
- dlm_msg = (ocfs_dlm_msg *) msg;
-
- if (dlm_msg == NULL) {
- LOG_TRACE_STR("Null netdlm message");
- goto bail;
- }
-
- if (msg_len < sizeof(ocfs_dlm_msg)) {
- LOG_TRACE_STR("Netdlm message too short");
- goto bail;
- }
-
- /* Compute and Compare the checksum */
- if (dlm_msg->magic != OCFS_DLM_MSG_MAGIC) {
- LOG_TRACE_ARGS ("Magic number mismatch in netdlm message: "
- "0x%08x != 0x%08x\n",
- dlm_msg->magic, OCFS_DLM_MSG_MAGIC);
- goto bail;
- }
-
-
- ret = 1;
-
-bail:
- LOG_EXIT_INT (ret);
- return ret;
-} /* ocfs_check_ipc_msg */
-
-
-int ocfs_lookup_obj_for_proc (ocfs_vote_obj *obj, ocfs_vote_obj_lookup_data *data)
-{
- int status = -ENOENT;
- ocfs_dlm_msg *dlm_msg = NULL;
- ocfs_dlm_msg_hdr *request = NULL;
- ocfs_dlm_reply_master *reply = NULL;
- int *len = data->u.proc.len;
- int max = data->u.proc.max - *len;
- char *p = data->u.proc.page + *len;
- int ret = 0;
- char *reqstr=NULL, *gotstr=NULL;
-
- /* just run thru everything to populate /proc */
- /* return -ENOENT to keep going */
- dlm_msg = &(obj->m);
-
- switch (dlm_msg->msg_type) {
- case OCFS_VOTE_REQUEST:
- request = (ocfs_dlm_msg_hdr *) dlm_msg->msg_buf;
-
- if (ocfs_node_map_stringify(&obj->req_vote_map, &reqstr) < 0)
- break;
- if (ocfs_node_map_stringify(&obj->got_vote_map, &gotstr) < 0)
- break;
- ret = snprintf(p, max, "REQST: %d %c %3d %s %21llu %21llu %08x | %s\n",
- obj->pid,
- vote_state_str[obj->vote_state],
- obj->vote_status,
- reqstr,
- request->lock_id,
- request->lock_seq_num,
- request->flags, gotstr);
- break;
- case OCFS_VOTE_REPLY:
- reply = (ocfs_dlm_reply_master *) dlm_msg->msg_buf;
- if (ocfs_node_map_stringify(&obj->req_vote_map, &reqstr) < 0)
- break;
- ret = snprintf(p, max, "REPLY: %d %c %3d %s %21llu %21llu %08x | %3d\n",
- obj->pid,
- vote_state_str[obj->vote_state],
- obj->vote_status,
- reqstr,
- reply->h.lock_id,
- reply->h.lock_seq_num,
- reply->h.flags,
- reply->status);
-
- break;
- case OCFS_INFO_DISMOUNT:
- ret = snprintf(p, max, "UNMNT: %d\n", obj->pid);
- break;
- default:
- ret = snprintf(p, max, "BAD!!: %d\n", obj->pid);
- break;
- }
- (*len) += ret;
- p[max-1] = '\0';
-
- if (reqstr)
- kfree(reqstr);
- if (gotstr)
- kfree(gotstr);
- return status;
-}
-
-
-int ocfs_lookup_obj_by_lockid (ocfs_vote_obj *obj, ocfs_vote_obj_lookup_data *data)
-{
- int status = 0;
- ocfs_dlm_msg *dlm_msg = NULL;
- ocfs_dlm_msg_hdr *req = NULL;
-
- dlm_msg = &(obj->m);
- req = (ocfs_dlm_msg_hdr *) dlm_msg->msg_buf;
- if (dlm_msg->msg_type != OCFS_VOTE_REQUEST ||
- obj->vote_state == VOTE_OBJ_STATE_DESTROYING ||
- req->lock_id != data->u.s.lock_id) {
- status = -ENOENT;
- }
- return status;
-}
-
-static int ocfs_lookup_obj_by_seq (ocfs_vote_obj *obj, ocfs_vote_obj_lookup_data *data)
-{
- int status = -ENOENT;
- ocfs_dlm_msg *dlm_msg = NULL;
- ocfs_dlm_msg_hdr *req = NULL;
-
- if (obj->seq_num == data->u.s.seq_num) {
- status = 0;
- dlm_msg = &(obj->m);
- req = (ocfs_dlm_msg_hdr *) dlm_msg->msg_buf;
- // error if there is a non-request with a matching seqnum, or
- // a vote object that is in too early or too late a state, or
- // a vote object with the right seqnum but wrong lockid
- if (dlm_msg->msg_type != OCFS_VOTE_REQUEST ||
- obj->vote_state == VOTE_OBJ_STATE_DESTROYING ||
- obj->vote_state == VOTE_OBJ_STATE_UNSENT ||
- req->lock_id != data->u.s.lock_id) {
- LOG_ERROR_ARGS("bad message: vote_state=%d type=%d "
- "lockid=%llu expected=%llu\n",
- obj->vote_state, dlm_msg->msg_type,
- req->lock_id, data->u.s.lock_id);
- status = -EINVAL;
- }
- }
- return status;
-}
-
-/*
- * returns an ocfs_vote_obj with a ref on it or NULL
- */
-int ocfs_lookup_vote_request_obj (ocfs_super *osb, ocfs_vote_obj_lookup_data *data)
-{
- int status = -ENOENT;
- struct list_head *iter;
- ocfs_vote_obj *obj = NULL;
-
- spin_lock(&osb->vote_obj_queue_lock);
-
- list_for_each (iter, &osb->vote_obj_queue) {
- obj = list_entry (iter, ocfs_vote_obj, list);
- ocfs_get_vote_obj (obj);
- spin_lock(&obj->lock);
- status = data->func(obj, data);
- spin_unlock(&obj->lock);
- if (status < 0) {
- ocfs_put_vote_obj (obj);
- obj = NULL;
- }
- if (status != -ENOENT)
- break;
- obj = NULL;
- }
-
- spin_unlock(&osb->vote_obj_queue_lock);
-
- // return the obj, or drop the ref
- if (data->ret)
- *(data->ret) = obj;
- else if (obj)
- ocfs_put_vote_obj (obj);
- return status;
-}
-
-
-/*
- * ocfs_comm_process_vote_reply()
- *
- */
-int ocfs_comm_process_vote_reply (ocfs_super * osb, ocfs_dlm_msg * dlm_msg)
-{
- int status = 0;
- ocfs_dlm_reply_master *reply;
- ocfs_dlm_msg_hdr *reply_msg;
- ocfs_vote_reply_ctxt ctxt;
- ocfs_vote_obj *obj = NULL;
- ocfs_vote_obj_lookup_data data;
-
- LOG_ENTRY ();
-
- down (&(osb->comm_lock));
-
- reply = (ocfs_dlm_reply_master *) dlm_msg->msg_buf;
- reply_msg = &(reply->h);
-
- /* find the original request object for this reply */
- data.u.s.seq_num = reply_msg->lock_seq_num;
- data.u.s.lock_id = reply_msg->lock_id;
- data.func = ocfs_lookup_obj_by_seq;
- data.ret = &obj;
- status = ocfs_lookup_vote_request_obj (osb, &data);
- if (status < 0 || obj==NULL) {
- LOG_ERROR_STATUS (status);
- goto bail;
- }
-
- spin_lock(&obj->lock);
- if (obj->vote_state != VOTE_OBJ_STATE_SENT &&
- obj->vote_state != VOTE_OBJ_STATE_PARTIAL_REPLY) {
- LOG_ERROR_ARGS("bad vote reply state=%d, node=%u, lockid=%llu, seq=%llu, vote=%d\n",
- obj->vote_state, dlm_msg->src_node,
- reply_msg->lock_id,
- reply_msg->lock_seq_num, reply->status);
- status = -EINVAL;
- goto unlock;
- }
-
- LOG_TRACE_ARGS("node=%u, lockid=%llu, seq=%llu, vote=%d\n",
- dlm_msg->src_node, reply_msg->lock_id,
- reply_msg->lock_seq_num, reply->status);
-
- ctxt.got_vote_map = &(obj->got_vote_map);
- ctxt.status = &(obj->vote_status);
- ctxt.flags = reply_msg->flags;
- ctxt.reply = reply;
-
- ocfs_process_one_vote_reply(osb, &ctxt, dlm_msg->src_node);
-
- if (ocfs_node_map_is_equal(&obj->got_vote_map, &obj->req_vote_map))
- obj->vote_state = VOTE_OBJ_STATE_FULL_REPLY;
- else
- obj->vote_state = VOTE_OBJ_STATE_PARTIAL_REPLY;
-
-unlock:
- // wake if complete or error
- if (obj->vote_status < 0 || status < 0 ||
- obj->vote_state == VOTE_OBJ_STATE_FULL_REPLY) {
- atomic_set (&obj->voted_event_woken, 1);
- wake_up (&obj->voted_event);
- }
- spin_unlock(&obj->lock);
- ocfs_put_vote_obj (obj);
-
-bail:
- up (&(osb->comm_lock));
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_comm_process_vote_reply */
-
-/*
- * ocfs_dlm_recv_msg()
- *
- */
-void ocfs_dlm_recv_msg (void *val)
-{
- ocfs_recv_ctxt *recv_ctxt;
- __u8 *dlm_packet;
-
- LOG_ENTRY ();
-
- recv_ctxt = (ocfs_recv_ctxt *) val;
- dlm_packet = (__u8 *) recv_ctxt->msg;
-
- if (recv_ctxt->status >= 0) {
- if (ocfs_check_ipc_msg (dlm_packet, recv_ctxt->msg_len))
- ocfs_comm_process_msg (dlm_packet);
- }
-
- kfree(recv_ctxt);
-
- LOG_EXIT ();
- return;
-} /* ocfs_dlm_recv_msg */
-
-/*
- * ocfs_comm_process_msg()
- *
- */
-int ocfs_comm_process_msg (__u8 * msg)
-{
- int status = 0;
- ocfs_super *osb = NULL;
- ocfs_dlm_msg *dlm_msg;
- ocfs_dlm_req_master *req_master;
- struct list_head *iter_osb, *temp_iter;
- __s16 src_node;
-
- LOG_ENTRY ();
-
- dlm_msg = (ocfs_dlm_msg *) msg;
-
- down (&(OcfsGlobalCtxt.global_res));
- list_for_each_safe (iter_osb, temp_iter, &(OcfsGlobalCtxt.osb_next)) {
- osb = list_entry (iter_osb, ocfs_super, osb_next);
- if (!memcmp (osb->uuid, dlm_msg->vol_id,
- MAX_VOL_ID_LENGTH))
- break;
- osb = NULL;
- }
- up (&(OcfsGlobalCtxt.global_res));
-
- if (osb == NULL) {
- LOG_ERROR_STR("Ignoring netdlm message with invalid volume id");
- goto bail;
- }
-
- if (dlm_msg->src_node >= osb->max_nodes) {
- LOG_ERROR_ARGS ("Invalid source node in netdlm message: %d\n",
- dlm_msg->src_node);
- goto bail;
- }
-
- if (!ocfs_node_map_test_bit(&osb->publ_map, dlm_msg->src_node)) {
- LOG_TRACE_STR("Ignoring netdlm message from dead node");
- goto bail;
- }
-
- switch (dlm_msg->msg_type) {
- case OCFS_VOTE_REQUEST:
- status = ocfs_process_vote (osb, dlm_msg);
- break;
-
- case OCFS_VOTE_REPLY:
- ocfs_comm_process_vote_reply (osb, dlm_msg);
- break;
-
- case OCFS_INFO_DISMOUNT:
- src_node = dlm_msg->src_node;
- req_master = (ocfs_dlm_req_master *) dlm_msg->msg_buf;
- printk ("ocfs2: Received dismount message for device (%u,%u) "
- "from %s (node %d)\n", MAJOR(osb->sb->s_dev),
- MINOR(osb->sb->s_dev), osb->node_cfg_info[src_node]->node_name,
- src_node);
- atomic_set (&(osb->vol_node_map[src_node].dismount), 1);
- break;
-
- default:
- break;
- }
-
-bail:
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_comm_process_msg */
-
-
-
-/*
- * ocfs_send_dismount_msg()
- *
- */
-int ocfs_send_dismount_msg (ocfs_super * osb)
-{
- int status = 0;
- ocfs_dlm_msg *dlm_msg = NULL;
- ocfs_dlm_msg_hdr *req;
- ocfs_vote_obj *obj;
- __u32 msg_len, obj_len;
- ocfs_node_map map;
-
- LOG_ENTRY_ARGS ("(osb=0x%p)\n", osb);
-
- ocfs_node_map_dup(osb, &map, &osb->publ_map);
- ocfs_node_map_clear_bit(&map, osb->node_num);
-
- msg_len = sizeof (ocfs_dlm_msg) + sizeof (ocfs_dlm_req_master);
- obj_len = sizeof (ocfs_vote_obj) + sizeof (ocfs_dlm_req_master);
-
- obj = ocfs_alloc_vote_obj (osb, obj_len, 0, NULL);
- if (obj == NULL) {
- LOG_ERROR_STATUS (status = -ENOMEM);
- goto finally;
- }
- dlm_msg = &(obj->m);
- req = (ocfs_dlm_msg_hdr *) dlm_msg->msg_buf;
- ocfs_init_dlm_msg (osb, dlm_msg, msg_len, OCFS_INFO_DISMOUNT);
- req->lock_id = 0;
- req->flags = 0;
- req->lock_seq_num = 0;
-
- spin_lock(&osb->vote_obj_queue_lock);
- list_add_tail(&obj->list, &osb->vote_obj_queue);
- spin_unlock(&osb->vote_obj_queue_lock);
-
- ocfs_send_bcast (osb, &map, dlm_msg);
- spin_lock (&obj->lock);
- obj->vote_state = VOTE_OBJ_STATE_SENT;
- spin_unlock (&obj->lock);
-
- // silly ;-)
- spin_lock (&obj->lock);
- obj->vote_state = VOTE_OBJ_STATE_DESTROYING;
- spin_unlock (&obj->lock);
-
- spin_lock(&osb->vote_obj_queue_lock);
- list_del(&obj->list);
- spin_unlock(&osb->vote_obj_queue_lock);
-
-finally:
- ocfs_put_vote_obj (obj);
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_send_dismount_msg */
-
-/*
- * ocfs_init_dlm_msg()
- *
- */
-static void ocfs_init_dlm_msg (ocfs_super * osb, ocfs_dlm_msg * dlm_msg, __u32 msg_len, __u32 type)
-{
- LOG_ENTRY ();
-
- dlm_msg->magic = OCFS_DLM_MSG_MAGIC;
- dlm_msg->msg_len = msg_len;
- dlm_msg->src_node = osb->node_num;
- dlm_msg->msg_type = type;
- memcpy (dlm_msg->vol_id, osb->uuid, MAX_VOL_ID_LENGTH);
-
- LOG_EXIT ();
- return;
-} /* ocfs_init_dlm_msg */
-
-
-static ocfs_vote_obj * ocfs_alloc_vote_obj (ocfs_super *osb, int bytes, __u32 reqlock, ocfs_node_map *votemap)
-{
- ocfs_vote_obj *obj = NULL;
-
- obj = ocfs_malloc (bytes);
- if (obj == NULL)
- return NULL;
-
- memset(obj, 0, bytes);
- obj->vote_state = VOTE_OBJ_STATE_UNSENT;
- spin_lock_init (&obj->lock);
- atomic_set(&obj->refcount, 1);
- atomic_set(&obj->voted_event_woken, 0);
- init_waitqueue_head (&obj->voted_event);
- INIT_LIST_HEAD (&obj->list);
-
- if (votemap)
- ocfs_node_map_dup(osb, &obj->req_vote_map, votemap);
- else
- ocfs_node_map_init(osb, &obj->req_vote_map);
- ocfs_node_map_init(osb, &obj->got_vote_map);
-
- obj->seq_num = 0ULL;
- obj->req_lock_type = reqlock;
- obj->vote_status = 0;
- obj->pid = current->pid;
-
- return obj;
-}
-
-#define OCFS_DLM_NET_TIMEOUT (30000) // 30 seconds
-
-/*
- * ocfs_send_dlm_request_msg()
- * inode is definitely non NULL
- */
-int ocfs_send_dlm_request_msg (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *votemap, struct inode *inode, __u32 num_ident, int *vote_status)
-{
- int status = 0;
- ocfs_dlm_msg *dlm_msg = NULL;
- ocfs_dlm_msg_hdr *req;
- ocfs_vote_obj *obj;
- __u32 msg_len, obj_len;
-
- LOG_ENTRY_ARGS ("(osb=0x%p, id=%llu, ty=%u, fl=%u)\n",
- osb, lock_id, lock_type, flags);
-
- msg_len = sizeof (ocfs_dlm_msg) + sizeof (ocfs_dlm_req_master);
- obj_len = sizeof (ocfs_vote_obj) + sizeof (ocfs_dlm_req_master);
-
- obj = ocfs_alloc_vote_obj (osb, obj_len, lock_type, votemap);
- if (obj == NULL) {
- LOG_ERROR_STATUS (status = -ENOMEM);
- goto finally;
- }
- dlm_msg = &(obj->m);
- req = (ocfs_dlm_msg_hdr *) dlm_msg->msg_buf;
- ocfs_init_dlm_msg (osb, dlm_msg, msg_len, OCFS_VOTE_REQUEST);
-
- spin_lock (&OcfsGlobalCtxt.comm_seq_lock);
- req->lock_seq_num = ++OcfsGlobalCtxt.comm_seq_num;
- obj->seq_num = req->lock_seq_num;
- spin_unlock (&OcfsGlobalCtxt.comm_seq_lock);
-
- req->lock_id = lock_id;
- req->flags = flags;
- req->num_ident = num_ident;
-
-#ifdef VERBOSE_LOCKING_TRACE
- printk("ocfs_send_dlm_request_msg: inode=%p, lockid = %llu\n",
- inode, lock_id);
-#endif
-
- spin_lock(&osb->vote_obj_queue_lock);
- list_add_tail(&obj->list, &osb->vote_obj_queue);
- spin_unlock(&osb->vote_obj_queue_lock);
-
- ocfs_send_bcast (osb, votemap, dlm_msg);
- spin_lock (&obj->lock);
- obj->vote_state = VOTE_OBJ_STATE_SENT;
- spin_unlock (&obj->lock);
- status = ocfs_wait_uninterruptible(obj->voted_event,
- atomic_read (&obj->voted_event_woken),
- OCFS_DLM_NET_TIMEOUT);
-
- spin_lock (&obj->lock);
- if (obj->vote_status >= 0 && obj->vote_state == VOTE_OBJ_STATE_FULL_REPLY) {
- LOG_TRACE_ARGS ("OK vote, lockid=%llu\n", lock_id);
- } else if ((obj->vote_status != -EAGAIN && obj->vote_status != -EBUSY) || obj->vote_state != VOTE_OBJ_STATE_FULL_REPLY) {
-#warning "should we even be erroring here at all!"
- LOG_ERROR_ARGS("inode %llu, vote_status=%d, vote_state=%d, "
- "lockid=%llu, flags = 0x%x, asked type = %u "
- "master = %d, state = 0x%lx, type = %u\n",
- OCFS_I(inode)->ip_blkno, obj->vote_status,
- obj->vote_state, lock_id, flags, lock_type,
- GET_INODE_LOCKRES(inode)->master_node_num,
- GET_INODE_LOCKRES(inode)->readonly_state,
- GET_INODE_LOCKRES(inode)->lock_type);
- }
- *vote_status = obj->vote_status;
- obj->vote_state = VOTE_OBJ_STATE_DESTROYING;
- ocfs_node_map_clear_bits(votemap, &obj->got_vote_map);
- spin_unlock (&obj->lock);
-
- spin_lock(&osb->vote_obj_queue_lock);
- list_del(&obj->list);
- spin_unlock(&osb->vote_obj_queue_lock);
-
-
- ocfs_compute_dlm_stats (status, *vote_status,
- &(OcfsGlobalCtxt.net_reqst_stats));
-
- ocfs_compute_dlm_stats (status, *vote_status,
- &(osb->net_reqst_stats));
-finally:
- ocfs_put_vote_obj (obj);
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_send_dlm_request_msg */
-
-
-void ocfs_process_one_vote_reply(ocfs_super *osb, ocfs_vote_reply_ctxt *ctxt, __u32 node_num)
-{
- int status;
- int reply_status;
-
- reply_status = ctxt->reply->status;
-
- status = 0;
-
- switch (reply_status) {
- case FLAG_VOTE_NODE:
- ocfs_node_map_set_bit(ctxt->got_vote_map, node_num);
- break;
- case FLAG_VOTE_OIN_ALREADY_INUSE:
- ocfs_node_map_set_bit(ctxt->got_vote_map, node_num);
- status = -EINVAL;
- if (ctxt->flags & FLAG_FILE_DELETE)
- status = -EBUSY;
- break;
- case FLAG_VOTE_OIN_UPDATED:
- status = 0;
- ocfs_node_map_set_bit(ctxt->got_vote_map, node_num);
- break;
- case FLAG_VOTE_UPDATE_RETRY:
- ocfs_node_map_set_bit(ctxt->got_vote_map, node_num);
- status = -EAGAIN;
- break;
- case FLAG_VOTE_FILE_DEL:
-#warning "don't we need to set the node map bit here?"
- status = -ENOENT;
- break;
- }
- *(ctxt->status) = status;
-}
-
-/* special case -1 for now
- * TODO: should *really* make sure the calling func never passes -1!! */
-void ocfs_node_map_init(ocfs_super *osb, ocfs_node_map *map)
-{
- map->num_nodes = osb->max_nodes;
- memset(map->map, 0, BITS_TO_LONGS(OCFS_NODE_MAP_MAX_NODES) *
- sizeof(unsigned long));
-}
-
-void ocfs_node_map_set_bit(ocfs_node_map *map, int bit)
-{
- if (bit==-1)
- return;
- OCFS_ASSERT(bit < map->num_nodes);
- set_bit(bit, map->map);
-}
-
-void ocfs_node_map_clear_bit(ocfs_node_map *map, int bit)
-{
- if (bit==-1)
- return;
- OCFS_ASSERT(bit < map->num_nodes);
- clear_bit(bit, map->map);
-}
-
-// clear all the bits in "target" which are set in "mask"
-void ocfs_node_map_clear_bits(ocfs_node_map *target, ocfs_node_map *mask)
-{
- int bit, prev=0;
- while (1) {
- bit = find_next_bit (mask->map, mask->num_nodes, prev);
- if (bit >= mask->num_nodes)
- break;
- ocfs_node_map_clear_bit(target, bit);
- prev = bit+1;
- }
-}
-
-// set all the bits in "target" which are set in "mask"
-void ocfs_node_map_set_bits(ocfs_node_map *target, ocfs_node_map *mask)
-{
- int bit, prev=0;
- while (1) {
- bit = find_next_bit (mask->map, mask->num_nodes, prev);
- if (bit >= mask->num_nodes)
- break;
- ocfs_node_map_set_bit(target, bit);
- prev = bit+1;
- }
-}
-
-int ocfs_node_map_test_bit(ocfs_node_map *map, int bit)
-{
- if (bit >= map->num_nodes) {
- LOG_ERROR_ARGS("bit=%d map->num_nodes=%d\n", bit, map->num_nodes);
- BUG();
- }
- return test_bit(bit, map->map);
-}
-
-static int ocfs_node_map_stringify(ocfs_node_map *map, char **str)
-{
- int i, n;
- char *s;
-
- OCFS_ASSERT(map->num_nodes > 0);
-
- *str = kmalloc( strlen("123 ") * map->num_nodes, GFP_KERNEL);
- if (!(*str))
- return -ENOMEM;
-
- memset(*str, 0, strlen("123 ") * map->num_nodes);
-
- s = *str;
- for (i=0; i<map->num_nodes; i++) {
- if (ocfs_node_map_test_bit(map, i)) {
- n = sprintf(s, "%3d ", i);
- if (n != strlen("123 ")) {
- kfree(*str);
- return -ENOMEM;
- }
- s += n;
- }
- }
- return 0;
-}
-
-int ocfs_node_map_is_empty(ocfs_node_map *map)
-{
- int bit;
- OCFS_ASSERT(map->num_nodes > 0);
- bit = find_next_bit(map->map, map->num_nodes, 0);
- if (bit < map->num_nodes)
- return 0;
- return 1;
-}
-
-int ocfs_node_map_is_equal(ocfs_node_map *map1, ocfs_node_map *map2)
-{
- int num_longs, i;
-
- OCFS_ASSERT(map1->num_nodes == map2->num_nodes);
- OCFS_ASSERT(map1->num_nodes > 0);
-
- num_longs = BITS_TO_LONGS(map1->num_nodes);
- for (i=0; i<num_longs; i++) {
- if (map1->map[i] != map2->map[i])
- return 0;
- }
- return 1;
-}
-
-
-void ocfs_node_map_and(ocfs_node_map *target, ocfs_node_map *mask)
-{
- int num_longs, i;
-
- OCFS_ASSERT(target->num_nodes == mask->num_nodes);
- OCFS_ASSERT(target->num_nodes > 0);
-
- num_longs = BITS_TO_LONGS(target->num_nodes);
- for (i=0; i<num_longs; i++)
- target->map[i] &= mask->map[i];
-}
-
-void ocfs_node_map_set(ocfs_node_map *target, ocfs_node_map *from)
-{
- int num_longs, i;
-
- OCFS_ASSERT(target->num_nodes == from->num_nodes);
- OCFS_ASSERT(target->num_nodes > 0);
-
- num_longs = BITS_TO_LONGS(target->num_nodes);
- for (i=0; i<num_longs; i++)
- target->map[i] = from->map[i];
-}
-
-
-void ocfs_node_map_dup(ocfs_super *osb, ocfs_node_map *target, ocfs_node_map *from)
-{
- OCFS_ASSERT(from->num_nodes > 0);
- ocfs_node_map_init(osb, target);
- ocfs_node_map_set(target, from);
-}
-
-/* returns 1 if bit is the only bit set in target, 0 otherwise */
-int ocfs_node_map_is_only(ocfs_super *osb, ocfs_node_map *target, int bit)
-{
- ocfs_node_map temp;
- int ret;
-
- ocfs_node_map_dup(osb, &temp, target);
- ocfs_node_map_clear_bit(&temp, bit);
- ret = ocfs_node_map_is_empty(&temp);
- return ret;
-}
-
-/*
-** All structures have a type, and a size associated with it.
-** The type serves to identify the structure. The size is used for
-** consistency checking ...
-*/
-void ocfs_publish_map_set(ocfs_node_map *pubmap, int num)
-{
- ocfs_node_map_set_bit(pubmap, num);
-}
-
-void ocfs_publish_map_clear(ocfs_node_map *pubmap, int num)
-{
- ocfs_node_map_clear_bit(pubmap, num);
-}
-
-/* update the recovery map here */
-void ocfs_recovery_map_set(ocfs_super *osb, int num)
-{
- spin_lock(&osb->recovery_map_lock);
- ocfs_node_map_set_bit(&osb->recovery_map, num);
- spin_unlock(&osb->recovery_map_lock);
-}
-
-void ocfs_recovery_map_clear(ocfs_super *osb, int num)
-{
- spin_lock(&osb->recovery_map_lock);
- ocfs_node_map_clear_bit(&osb->recovery_map, num);
- spin_unlock(&osb->recovery_map_lock);
-}
-
-int ocfs_node_is_recovering(ocfs_super *osb, int num)
-{
- if (num == -1)
- return 0;
- return ocfs_node_map_test_bit(&osb->recovery_map, num);
-}
-
-int ocfs_node_is_alive(ocfs_node_map *pubmap, int index)
-{
- if (index == -1)
- return 0;
- return ocfs_node_map_test_bit(pubmap, index);
-}
-
Deleted: branches/dlm-glue/src/vote.h
===================================================================
--- branches/dlm-glue/src/vote.h 2004-10-26 01:06:23 UTC (rev 1593)
+++ branches/dlm-glue/src/vote.h 2004-10-26 01:08:32 UTC (rev 1594)
@@ -1,75 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * vote.h
- *
- * Function prototypes
- *
- * Copyright (C) 2002, 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef OCFS2_VOTE_H
-#define OCFS2_VOTE_H
-
-int ocfs_init_udp_sock(struct socket **send_sock,
- struct socket **recv_sock);
-int ocfs_lookup_obj_for_proc(ocfs_vote_obj *obj,
- ocfs_vote_obj_lookup_data *data);
-int ocfs_lookup_obj_by_lockid(ocfs_vote_obj *obj,
- ocfs_vote_obj_lookup_data *data);
-int ocfs_lookup_vote_request_obj(ocfs_super *osb,
- ocfs_vote_obj_lookup_data *data);
-void ocfs_process_one_vote_reply(ocfs_super *osb,
- ocfs_vote_reply_ctxt *ctxt,
- __u32 node_num);
-int ocfs_recv_udp_msg(ocfs_recv_ctxt *recv_ctxt);
-int ocfs_send_dismount_msg(ocfs_super *osb);
-int ocfs_send_dlm_request_msg (ocfs_super * osb, __u64 lock_id,
- __u32 lock_type, __u32 flags,
- ocfs_node_map *votemap,
- struct inode *inode, __u32 num_ident,
- int *vote_status);
-int ocfs_send_vote_reply(ocfs_super *osb, ocfs_dlm_msg *dlm_msg,
- __u32 vote_status);
-int ocfs_lookup_vote_request_obj (ocfs_super *osb,
- ocfs_vote_obj_lookup_data *data);
-
-void ocfs_node_map_init(ocfs_super *osb, ocfs_node_map *map);
-void ocfs_node_map_set_bit(ocfs_node_map *map, int bit);
-void ocfs_node_map_clear_bit(ocfs_node_map *map, int bit);
-// clear all the bits in "target" which are set in "mask"
-void ocfs_node_map_clear_bits(ocfs_node_map *target, ocfs_node_map *mask);
-// set all the bits in "target" which are set in "mask"
-void ocfs_node_map_set_bits(ocfs_node_map *target, ocfs_node_map *mask);
-int ocfs_node_map_test_bit(ocfs_node_map *map, int bit);
-int ocfs_node_map_is_empty(ocfs_node_map *map);
-int ocfs_node_map_is_equal(ocfs_node_map *map1, ocfs_node_map *map2);
-void ocfs_node_map_and(ocfs_node_map *target, ocfs_node_map *mask);
-void ocfs_node_map_set(ocfs_node_map *target, ocfs_node_map *from);
-void ocfs_node_map_dup(ocfs_super *osb, ocfs_node_map *target, ocfs_node_map *from);
-/* returns 1 if bit is the only bit set in target, 0 otherwise */
-int ocfs_node_map_is_only(ocfs_super *osb, ocfs_node_map *target, int bit);
-
-int ocfs_node_is_recovering(ocfs_super *osb, int num);
-int ocfs_node_is_alive(ocfs_node_map *pubmap, int index);
-void ocfs_publish_map_set(ocfs_node_map *pubmap, int num);
-void ocfs_publish_map_clear(ocfs_node_map *pubmap, int num);
-void ocfs_recovery_map_set(ocfs_super *osb, int num);
-void ocfs_recovery_map_clear(ocfs_super *osb, int num);
-
-#endif /* OCFS2_VOTE_H */
More information about the Ocfs2-commits
mailing list