[Ocfs2-commits] mfasheh commits r1665 - branches/dlm-glue/src

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Mon Nov 22 22:01:39 CST 2004


Author: mfasheh
Date: 2004-11-22 22:01:37 -0600 (Mon, 22 Nov 2004)
New Revision: 1665

Added:
   branches/dlm-glue/src/vote.c
   branches/dlm-glue/src/vote.h
Modified:
   branches/dlm-glue/src/Makefile
   branches/dlm-glue/src/dlmglue.c
   branches/dlm-glue/src/dlmglue.h
   branches/dlm-glue/src/inode.c
   branches/dlm-glue/src/journal.c
   branches/dlm-glue/src/namei.c
   branches/dlm-glue/src/ocfs_log.h
   branches/dlm-glue/src/super.c
Log:
* break out the network vote stuff into it's own file. This cleans
  things up considerably.



Modified: branches/dlm-glue/src/Makefile
===================================================================
--- branches/dlm-glue/src/Makefile	2004-11-23 03:40:18 UTC (rev 1664)
+++ branches/dlm-glue/src/Makefile	2004-11-23 04:01:37 UTC (rev 1665)
@@ -86,7 +86,7 @@
 	sysfile.c		\
 	util.c			\
 	ver.c			\
-
+	vote.c
 HFILES = \
 	ocfs2_fs.h		\
 	ocfs1_fs_compat.h	\
@@ -115,7 +115,7 @@
 	sysfile.h		\
 	util.h			\
 	ver.h			\
-
+	vote.h
 VERSION_FILES = $(CFILES) $(HFILES)
 VERSION_SRC = ver.c
 VERSION_PREFIX = OCFS

Modified: branches/dlm-glue/src/dlmglue.c
===================================================================
--- branches/dlm-glue/src/dlmglue.c	2004-11-23 03:40:18 UTC (rev 1664)
+++ branches/dlm-glue/src/dlmglue.c	2004-11-23 04:01:37 UTC (rev 1665)
@@ -49,6 +49,7 @@
 #include "inode.h"
 #include "slot_map.h"
 #include "util.h"
+#include "vote.h"
 
 #include "ocfs_journal.h"
 #include "buffer_head_io.h"
@@ -149,10 +150,6 @@
 				       int new_level);
 static int ocfs2_meta_lock_update(struct inode *inode,
 				  struct buffer_head **bh);
-static int ocfs2_vote_thread(void *arg);
-static void ocfs2_vote_thread_do_work(ocfs_super *osb);
-static void ocfs2_process_vote(ocfs_super *osb,
-			       ocfs2_vote_msg *msg);
 static int __ocfs2_drop_lock(ocfs_super *osb,
 			     ocfs2_lock_res *lockres);
 static void ocfs2_drop_super_lock(ocfs_super *osb);
@@ -163,7 +160,6 @@
 				    int lvb);
 static int __ocfs2_cancel_convert(ocfs_super *osb,
 				  ocfs2_lock_res *lockres);
-static void ocfs2_process_blocked_inode(struct inode *inode);
 static inline int ocfs2_can_downconvert_meta_lock(struct inode *inode,
 						  ocfs2_lock_res *lockres,
 						  int new_level);
@@ -171,10 +167,6 @@
 				      int *requeue);
 static int ocfs2_process_blocked_data(struct inode *inode,
 				      int *requeue);
-static int ocfs2_do_request_vote(ocfs_super *osb,
-				 u64 blkno,
-				 unsigned int generation,
-				 enum ocfs2_vote_request type);
 
 static inline int ocfs2_lvb_is_trustable(ocfs2_lock_res *lockres)
 {
@@ -1645,7 +1637,7 @@
 /* TODO: This is very generic, and looks much like
  * ocfs2_process_blocked_data. Lets try to find a way to combine these
  * two. */
-static void ocfs2_process_blocked_super(ocfs_super *osb)
+void ocfs2_process_blocked_super(ocfs_super *osb)
 {
 	int status = 0;
 	int new_level;
@@ -1705,7 +1697,7 @@
 	ocfs2_schedule_blocked_super(osb);
 }
 
-static void ocfs2_process_blocked_inode(struct inode *inode)
+void ocfs2_process_blocked_inode(struct inode *inode)
 {
 	int status;
 	int requeue = 0;
@@ -1747,762 +1739,3 @@
 	osb->blocked_inode_count++;
 	spin_unlock(&osb->vote_task_lock);
 }
-
-static void ocfs2_process_mount_request(ocfs_super *osb,
-					unsigned int node_num)
-{
-	printk("MOUNT vote from node %u\n", node_num);
-	/* The other node only sends us this message when he has an EX
-	 * on the superblock, so our recovery threads (if having been
-	 * launched) are waiting on it.*/
-	ocfs_recovery_map_clear(osb, node_num);
-	ocfs_node_map_set_bit(osb, &osb->mounted_map, node_num);
-}
-
-static void ocfs2_process_umount_request(ocfs_super *osb,
-					 unsigned int node_num)
-{
-	printk("UMOUNT vote from node %u\n", node_num);
-	ocfs_node_map_clear_bit(osb, &osb->mounted_map, node_num);
-	ocfs_node_map_set_bit(osb, &osb->umount_map, node_num);
-}
-
-static int ocfs2_process_delete_request(struct inode *inode)
-{
-	int response = -EBUSY;
-
-	LOG_TRACE_ARGS("DELETE vote on inode %lu, read "
-		       "lnk_cnt = %u\n", inode->i_ino, 
-		       inode->i_nlink);
-
-	/* force this as ours may be out of date. */
-	inode->i_nlink = 0;
-
-	spin_lock(&OCFS_I(inode)->ip_lock);
-	/* vote no if the file is still open. */
-	if (OCFS_I(inode)->ip_open_cnt > 0) {
-		LOG_TRACE_PROCESS_VOTE("open count = %u\n", 
-		       OCFS_I(inode)->ip_open_cnt);
-		spin_unlock(&OCFS_I(inode)->ip_lock);
-		goto done;
-	}
-	spin_unlock(&OCFS_I(inode)->ip_lock);
-
-	/* directories are a bit ugly... What if someone is sitting in
-	 * it? We want to make sure the inode is removed completely as
-	 * a result of the iput in process_vote. */
-	if (S_ISDIR(inode->i_mode) && (atomic_read(&inode->i_count) != 1)) {
-		LOG_TRACE_PROCESS_VOTE("i_count = %u\n", 
-		       atomic_read(&inode->i_count));
-		goto done;
-	}
-
-	/* If we get here, then we're voting 'yes', so commit the
-	 * delete on our side. */
-	response = 0;
-
-	spin_lock(&OCFS_I(inode)->ip_lock);
-	SET_INODE_DELETED(inode);
-	/* We set the SKIP_DELETE flag on the inode so we don't try to
-	 * delete it in delete_inode ourselves. */
-	OCFS_I(inode)->ip_flags |=  OCFS_INODE_SKIP_DELETE;
-	spin_unlock(&OCFS_I(inode)->ip_lock);
-
-	d_prune_aliases (inode);
-
-	/* TODO: How much of this is really necessary? */
-	sync_mapping_buffers(inode->i_mapping);
-	ocfs_truncate_inode_pages(inode, 0);
-	ocfs2_extent_map_trunc(inode, 0);
-
-done:
-	return response;
-}
-
-static void ocfs2_process_dentry_request(struct inode *inode,
-					 int rename)
-{
-	d_prune_aliases (inode);
-
-	/* for rename, we don't drop link counts */
-	if (!rename) {
-		if (S_ISDIR(inode->i_mode))
-			inode->i_nlink = 0;
-		else
-			inode->i_nlink--;
-	}
-}
-
-static void ocfs2_process_vote(ocfs_super *osb,
-			       ocfs2_vote_msg *msg)
-{
-	int net_status, vote_response;
-	int rename = 0;
-	unsigned int node_num, generation;
-	u64 blkno;
-	enum ocfs2_vote_request request;
-	struct inode *inode = NULL;
-	struct inode *remote_node;
-	ocfs2_msg_hdr *hdr = &msg->v_hdr;
-	ocfs2_response_msg response;
-
-	/* decode the network mumbo jumbo into local variables. */
-	request = ntohl(hdr->h_request);
-	blkno = be64_to_cpu(hdr->h_blkno);
-	generation = ntohl(hdr->h_generation);
-	node_num = ntohl(hdr->h_node_num);
-
-	printk("ocfs2: processing vote: request = %u, blkno = %llu, "
-	       "generation = %u, node_num = %u\n", request, blkno, generation,
-	       node_num);
-
-	vote_response = 0;
-	switch (request) {
-	case OCFS2_VOTE_REQ_UMOUNT:
-		ocfs2_process_umount_request(osb, node_num);
-		goto respond;
-	case OCFS2_VOTE_REQ_MOUNT:
-		ocfs2_process_mount_request(osb, node_num);
-		goto respond;
-	default:
-		/* avoids a gcc warning */
-		break;
-	}
-
-	/* We cannot process the remaining message types before we're
-	 * fully mounted. It's perfectly safe however to send a 'yes'
-	 * response as we can't possibly have any of the state they're
-	 * asking us to modify yet. */
-	if (atomic_read(&osb->vol_state) == VOLUME_INIT)
-		goto respond;
-
-	vote_response = -EINVAL;
-	/* If we get here, then the request is against an inode. */
-	inode = ocfs_ilookup(osb, blkno);
-	if (!inode)
-		goto respond;
-
-	OCFS_ASSERT(inode->i_generation == generation);
-
-	switch (request) {
-	case OCFS2_VOTE_REQ_DELETE:
-		vote_response = ocfs2_process_delete_request(inode);
-		break;
-	case OCFS2_VOTE_REQ_RENAME:
-		rename = 1;
-		/* fall through */
-	case OCFS2_VOTE_REQ_UNLINK:
-		ocfs2_process_dentry_request(inode, rename);
-		break;
-	default:
-		printk("ocfs2_process_vote: node %u, invalid request: %u\n",
-		       node_num, request);
-	}
-
-respond:
-	/* Response struture is small so we just put it on the stack
-	 * and stuff it inline. */
-	memset(&response, 0, sizeof(ocfs2_response_msg));
-	response.r_hdr.h_response_id = hdr->h_response_id;
-	response.r_hdr.h_blkno = hdr->h_blkno;
-	response.r_hdr.h_generation = hdr->h_generation;
-	response.r_hdr.h_node_num = htonl(osb->node_num);
-	response.r_response = htonl(vote_response);
-
-	remote_node = nm_get_node_by_num(node_num);
-	if (!remote_node) {
-		LOG_ERROR_ARGS("Couldn't get inode for node %u!\n", node_num);
-	} else {
-		net_status = net_send_message(OCFS2_MESSAGE_TYPE_RESPONSE,
-					      osb->net_key,
-					      &response,
-					      sizeof(ocfs2_response_msg),
-					      remote_node,
-					      NULL);
-		if (net_status < 0)
-			LOG_ERROR_ARGS("message to node %u fails with error "
-				       "%d!\n", node_num, net_status);
-		iput(remote_node);
-	}
-
-	if (inode)
-		iput(inode);
-}
-
-static void ocfs2_vote_thread_do_work(ocfs_super *osb)
-{
-	struct inode *inode = NULL;
-	ocfs_inode_private *ip;
-	int processed;
-	ocfs2_vote_work *work;
-
-	spin_lock(&osb->vote_task_lock);
-	if (osb->blocked_super_count) {
-		OCFS_ASSERT(osb->blocked_super_count == 1);
-		osb->blocked_super_count = 0;
-		spin_unlock(&osb->vote_task_lock);
-
-		ocfs2_process_blocked_super(osb);
-
-		spin_lock(&osb->vote_task_lock);
-	}
-
-	processed = osb->blocked_inode_count;
-	while (processed) {
-		OCFS_ASSERT(!list_empty(&osb->blocked_inode_list));
-
-		ip = list_entry(&osb->blocked_inode_list.next,
-				ocfs_inode_private, ip_blocked_list);
-		list_del_init(&ip->ip_blocked_list);
-		osb->blocked_inode_count--;
-		spin_unlock(&osb->vote_task_lock);
-
-		inode = ip->ip_inode;
-		processed--;
-
-		ocfs2_process_blocked_inode(inode);
-
-		iput(inode);
-
-		spin_lock(&osb->vote_task_lock);
-	}
-
-	while (osb->vote_count) {
-		OCFS_ASSERT(!list_empty(&osb->vote_list));
-		work = list_entry(&osb->vote_list.next,
-				  ocfs2_vote_work, w_list);
-		list_del(&work->w_list);
-		osb->vote_count--;
-		spin_unlock(&osb->vote_task_lock);
-
-		ocfs2_process_vote(osb, &work->w_msg);
-		kfree(work);
-
-		spin_lock(&osb->vote_task_lock);
-	}
-	spin_unlock(&osb->vote_task_lock);
-}
-
-static inline int ocfs2_vote_thread_has_work(ocfs_super *osb)
-{
-	if (list_empty(&osb->blocked_inode_list) &&
-	    list_empty(&osb->vote_list) &&
-	    !osb->blocked_super_count)
-		return 0;
-
-	return 1;
-}
-
-static int ocfs2_vote_thread(void *arg)
-{
-	int status = 0;
-	ocfs_super *osb = arg;
-	char proc[16];
-
-	sprintf (proc, "ocfs2vote-%d", osb->osb_id);
-	ocfs_daemonize (proc, strlen(proc), 0);
-
-	spin_lock(&osb->vote_task_lock);
-	osb->vote_task = current;
-	init_completion (&osb->vote_event_complete);
-
-	complete(&osb->vote_event_init);
-
-	while (1) {
-		if (osb->vote_exit) {
-			if (!ocfs2_vote_thread_has_work(osb))
-				break;
-			/* don't want to sleep if we're supposed to quit. */
-			atomic_set(&osb->wake_vote_task, 1);
-		}
-		spin_unlock(&osb->vote_task_lock);
-
-		wait_event_interruptible(osb->vote_event,
-					 atomic_read(&osb->wake_vote_task));
-
-		atomic_set(&osb->wake_vote_task, 0);
-#ifdef OCFS2_VERBOSE_LOCKING_TRACE
-		printk("(%u) vote_thread: awoken\n", current->pid);
-#endif
-		ocfs2_vote_thread_do_work(osb);
-		spin_lock(&osb->vote_task_lock);
-	}
-
-	osb->vote_task = NULL;
-	spin_unlock(&osb->vote_task_lock);
-
-	complete(&osb->vote_event_complete);
-
-	return status;
-}
-
-static ocfs2_net_wait_ctxt *ocfs2_new_net_wait_ctxt(ocfs_super *osb,
-						    unsigned int response_id)
-{
-	ocfs2_net_wait_ctxt *w;
-
-	w = kmalloc(sizeof(*w), GFP_KERNEL);
-	if (!w) {
-		LOG_ERROR_STATUS(-ENOMEM);
-		goto bail;
-	}
-	memset(w, 0, sizeof(*w));
-
-	INIT_LIST_HEAD(&w->n_list);
-	init_waitqueue_head(&w->n_event);
-	ocfs_node_map_init(osb, &w->n_node_map);
-	w->n_response_id = response_id;
-bail:
-	return w;
-}
-
-static unsigned int ocfs2_new_response_id(ocfs_super *osb)
-{
-	unsigned int ret;
-
-	spin_lock(&osb->net_response_lock);
-	ret = ++osb->net_response_ids;
-	spin_unlock(&osb->net_response_lock);
-
-	return ret;
-}
-
-static void ocfs2_dequeue_net_wait_ctxt(ocfs_super *osb,
-					ocfs2_net_wait_ctxt *w)
-{
-	spin_lock(&osb->net_response_lock);
-	list_del(&w->n_list);
-	spin_unlock(&osb->net_response_lock);
-}
-
-static void ocfs2_queue_net_wait_ctxt(ocfs_super *osb,
-				      ocfs2_net_wait_ctxt *w)
-{
-	spin_lock(&osb->net_response_lock);
-	list_add_tail(&osb->net_response_list,
-		      &w->n_list);
-	spin_unlock(&osb->net_response_lock);
-}
-
-#define OCFS2_RESPONSE_WAIT_JIFFIES (60 * (HZ >> 1))
-static int ocfs2_wait_on_vote_responses(ocfs_super *osb,
-					ocfs2_net_wait_ctxt *w)
-{
-	int status = 0;
-	signed long timeout = OCFS2_RESPONSE_WAIT_JIFFIES;
-	wait_queue_t wait;
-	init_waitqueue_entry(&wait, current);
-
-	add_wait_queue(&w->n_event, &wait);
-	while (1) {
-		set_current_state(TASK_INTERRUPTIBLE);
-
-		if (ocfs_node_map_is_empty(osb, &w->n_node_map))
-			break;
-
-		if (!signal_pending(current)) {
-			timeout = schedule_timeout(timeout);
-			if (!timeout) {
-				status = -ETIMEDOUT;
-				break;
-			}
-			continue;
-		}
-		status = -ERESTARTSYS;
-		break;
-	}
-	current->state = TASK_RUNNING;
-	remove_wait_queue(&w->n_event, &wait);
-
-	return status;
-}
-
-static int ocfs2_broadcast_vote(ocfs_super *osb,
-				ocfs2_vote_msg *request,
-				unsigned int response_id)
-{
-	int status, i, remote_err;
-	ocfs2_net_wait_ctxt *w = NULL;
-	struct inode *remote_node;
-
-	w = ocfs2_new_net_wait_ctxt(osb, response_id);
-	if (!w) {
-		status = -ENOMEM;
-		LOG_ERROR_STATUS(status);
-		goto bail;
-	}
-
-	/* we're pretty much ready to go at this point, and this fills
-	 * in n_response which we need anyway... */
-	ocfs2_queue_net_wait_ctxt(osb, w);
-
-	i = ocfs_node_map_iterate(osb, &osb->mounted_map, 0);
-	while (i != OCFS_INVALID_NODE_NUM) {
-		if (i != osb->node_num) {
-			ocfs_node_map_set_bit(osb, &w->n_node_map, i);
-
-			remote_node = nm_get_node_by_num(i);
-			if (!remote_node) {
-				status = -EINVAL;
-				goto bail;
-			}
-
-			remote_err = 0;
-			status = net_send_message(OCFS2_MESSAGE_TYPE_VOTE,
-						  osb->net_key,
-						  request,
-						  sizeof(*request),
-						  remote_node,
-						  &remote_err);
-			iput(remote_node);
-			if (status == -ETIMEDOUT) {
-				printk("ocfs2: remote node %d timed out!\n",
-				       i);
-				status = -EAGAIN;
-				goto bail;
-			}
-			if (remote_err < 0) {
-				status = remote_err;
-				printk("ocfs2: remote error %d on node %d!\n",
-				       remote_err, i);
-				goto bail;
-			}
-			if (status < 0) {
-				LOG_ERROR_STATUS(status);
-				goto bail;
-			}
-		}
-		i = ocfs_node_map_iterate(osb, &osb->mounted_map, i);
-	}
-
-	status = ocfs2_wait_on_vote_responses(osb, w);
-	if (status < 0) {
-		if (status != -EINTR)
-			LOG_ERROR_STATUS(status);
-		goto bail;
-	}
-
-	ocfs2_dequeue_net_wait_ctxt(osb, w);
-	status = w->n_response;
-bail:
-	if (w) {
-		ocfs2_dequeue_net_wait_ctxt(osb, w);
-		kfree(w);
-	}
-
-	return status;
-}
-
-static int ocfs2_do_request_vote(ocfs_super *osb,
-				 u64 blkno,
-				 unsigned int generation,
-				 enum ocfs2_vote_request type)
-{
-	int status;
-	unsigned int response_id;
-	ocfs2_vote_msg *request = NULL;
-	ocfs2_msg_hdr *hdr;
-
-	OCFS_ASSERT(type == OCFS2_VOTE_REQ_DELETE ||
-		    type == OCFS2_VOTE_REQ_UNLINK ||
-		    type == OCFS2_VOTE_REQ_RENAME ||
-		    type == OCFS2_VOTE_REQ_UMOUNT);
-
-	request = kmalloc(sizeof(*request), GFP_KERNEL);
-	if (!request) {
-		status = -ENOMEM;
-		LOG_ERROR_STATUS(status);
-		goto bail;
-	}
-	memset(request, 0, sizeof(*request));
-	hdr = &request->v_hdr;
-
-	response_id = ocfs2_new_response_id(osb);
-
-	hdr->h_response_id = htonl(response_id);
-	hdr->h_request = htonl(type);
-	hdr->h_blkno = cpu_to_be64(blkno);
-	hdr->h_generation = htonl(generation);
-	hdr->h_node_num = htonl((unsigned int) osb->node_num);
-
-	status = ocfs2_broadcast_vote(osb, request, response_id);
-	if (status < 0) {
-		if (status != -EINTR)
-			LOG_ERROR_STATUS(status);
-		goto bail;
-	}
-
-bail:
-	if (request)
-		kfree(request);
-
-	return status;
-}
-
-static int ocfs2_request_vote(struct inode *inode,
-			      enum ocfs2_vote_request type)
-{
-	int status;
-	ocfs_super *osb = OCFS2_SB(inode->i_sb);
-
-	if (ocfs_inode_is_new(inode))
-		return 0;
-
-	status = -EAGAIN;
-	while (status == -EAGAIN) {
-		if (signal_pending(current))
-			return -EINTR;
-
-		if (ocfs_node_map_is_only(osb, &osb->mounted_map,
-					  osb->node_num))
-			return 0;
-
-		status = ocfs2_super_lock(osb, 0);
-		if (status < 0) {
-			if (status != -EINTR)
-				LOG_ERROR_STATUS(status);
-			break;
-		}
-
-		status = ocfs2_do_request_vote(osb, OCFS_I(inode)->ip_blkno,
-					       inode->i_generation, type);
-
-		ocfs2_super_unlock(osb, 0);
-	}
-	return status;
-}
-
-int ocfs2_request_delete_vote(struct inode *inode)
-{
-	return ocfs2_request_vote(inode, OCFS2_VOTE_REQ_DELETE);
-}
-
-int ocfs2_request_unlink_vote(struct inode *inode)
-{
-	return ocfs2_request_vote(inode, OCFS2_VOTE_REQ_UNLINK);
-}
-
-int ocfs2_request_rename_vote(struct inode *inode)
-{
-	return ocfs2_request_vote(inode, OCFS2_VOTE_REQ_RENAME);
-}
-
-int ocfs2_request_mount_vote(ocfs_super *osb)
-{
-	int status;
-
-	status = -EAGAIN;
-	while (status == -EAGAIN) {
-		if (signal_pending(current))
-			return -EINTR;
-
-		if (ocfs_node_map_is_only(osb, &osb->mounted_map,
-					  osb->node_num))
-			return 0;
-
-		status = ocfs2_do_request_vote(osb, 0ULL, 0,
-					       OCFS2_VOTE_REQ_MOUNT);
-	}
-	return status;
-}
-
-int ocfs2_request_umount_vote(ocfs_super *osb)
-{
-	int status;
-
-	status = -EAGAIN;
-	while (status == -EAGAIN) {
-		if (signal_pending(current))
-			return -EINTR;
-
-		if (ocfs_node_map_is_only(osb, &osb->mounted_map,
-					  osb->node_num))
-			return 0;
-
-		status = ocfs2_do_request_vote(osb, 0ULL, 0,
-					       OCFS2_VOTE_REQ_UMOUNT);
-	}
-	return status;
-}
-
-/* TODO: This should eventually be a hash table! */
-static ocfs2_net_wait_ctxt * __ocfs2_find_net_wait_ctxt(ocfs_super *osb,
-							u32 response_id)
-{
-	struct list_head *p;
-	ocfs2_net_wait_ctxt *w = NULL;
-
-	list_for_each(p, &osb->net_response_list) {
-		w = list_entry(p, ocfs2_net_wait_ctxt, n_list);
-		if (response_id == w->n_response_id)
-			break;
-		w = NULL;
-	}
-
-	return w;
-}
-
-static int ocfs2_handle_response_message(net_msg *msg,
-					 u32 len,
-					 void *data)
-{
-	unsigned int response_id, node_num;
-	int response_status;
-	ocfs_super *osb = data;
-	ocfs2_response_msg *resp;
-	ocfs2_net_wait_ctxt * w;
-
-	resp = (ocfs2_response_msg *) msg->buf;
-
-	response_id = ntohl(resp->r_hdr.h_response_id);
-	node_num = ntohl(resp->r_hdr.h_node_num);
-	response_status = ntohl(resp->r_response);
-
-	printk("recieved response message:\n");
-	printk("h_response_id = %u\n", ntohl(response_id));
-	printk("h_request = %u\n", ntohl(resp->r_hdr.h_request));
-	printk("h_blkno = %llu\n", be64_to_cpu(resp->r_hdr.h_blkno));
-	printk("h_generation = %u\n", ntohl(resp->r_hdr.h_generation));
-	printk("h_node_num = %u\n", node_num);
-	printk("r_response = %d\n", response_status);
-
-	spin_lock(&osb->net_response_lock);
-	w = __ocfs2_find_net_wait_ctxt(osb, response_id);
-	if (!w) {
-		spin_unlock(&osb->net_response_lock);
-		printk("request not found!\n");
-		goto bail;
-	}
-
-	if (response_status && (!w->n_response)) {
-		/* we only really need one negative response so don't
-		 * set it twice. */
-		w->n_response = response_status;
-	}
-
-	ocfs_node_map_clear_bit(osb, &w->n_node_map, node_num);
-	if (ocfs_node_map_is_empty(osb, &w->n_node_map))
-		wake_up_all(&w->n_event);
-	spin_unlock(&osb->net_response_lock);
-
-bail:
-	return 0;
-}
-
-static int ocfs2_handle_vote_message(net_msg *msg,
-					u32 len,
-					void *data)
-{
-	int status;
-	ocfs_super *osb = data;
-	ocfs2_vote_work *work;
-
-	work = kmalloc(sizeof(ocfs2_vote_work), GFP_KERNEL);
-	if (!work) {
-		status = -ENOMEM;
-		LOG_ERROR_STATUS(status);
-		goto bail;
-	}
-
-	INIT_LIST_HEAD(&work->w_list);
-	memcpy(&work->w_msg, msg->buf, sizeof(ocfs2_vote_msg));
-
-	printk("scheduling vote request:\n");
-	printk("h_response_id = %u\n", work->w_msg.v_hdr.h_response_id);
-	printk("h_request = %u\n", work->w_msg.v_hdr.h_request);
-	printk("h_blkno = %llu\n", work->w_msg.v_hdr.h_blkno);
-	printk("h_generation = %u\n", work->w_msg.v_hdr.h_generation);
-	printk("h_node_num = %u\n", work->w_msg.v_hdr.h_node_num);
-
-	spin_lock(&osb->vote_task_lock);
-	list_add_tail(&osb->vote_list, &work->w_list);
-	osb->vote_count++;
-	spin_unlock(&osb->vote_task_lock);
-
-	ocfs2_kick_vote_thread(osb);
-
-	status = 0;
-bail:
-	return status;
-}
-
-int ocfs2_register_net_handlers(ocfs_super *osb)
-{
-	int status;
-	int i = MAX_VOL_ID_LENGTH - sizeof(osb->net_key);
-
-	memcpy(&osb->net_key, &osb->uuid[i], sizeof(osb->net_key));
-	osb->net_response_buf = osb->net_vote_buf = NULL;
-	osb->net_response_ids = 0;
-	spin_lock_init(&osb->net_response_lock);
-	INIT_LIST_HEAD(&osb->net_response_list);
-
-	osb->net_response_buf = kmalloc(sizeof(ocfs2_response_msg),
-					GFP_KERNEL);
-	if (!osb->net_response_buf) {
-		status = -ENOMEM;
-		LOG_ERROR_STATUS(status);
-		goto bail;
-	}
-
-	osb->net_vote_buf = kmalloc(sizeof(ocfs2_vote_msg),
-				    GFP_KERNEL);
-	if (!osb->net_vote_buf) {
-		status = -ENOMEM;
-		LOG_ERROR_STATUS(status);
-		goto bail;
-	}
-
-	status = net_register_handler(OCFS2_MESSAGE_TYPE_RESPONSE,
-				      osb->net_key,
-				      0,
-				      sizeof(ocfs2_response_msg),
-				      ocfs2_handle_response_message,
-				      osb,
-				      osb->net_response_buf);
-	if (status < 0) {
-		LOG_ERROR_STATUS(status);
-		goto bail;
-	}
-
-	status = net_register_handler(OCFS2_MESSAGE_TYPE_VOTE,
-				      osb->net_key,
-				      0,
-				      sizeof(ocfs2_vote_msg),
-				      ocfs2_handle_vote_message,
-				      osb,
-				      osb->net_vote_buf);
-	if (status < 0) {
-		/* TODO: net_unregister here! */
-		LOG_ERROR_STATUS(status);
-		goto bail;
-	}
-
-bail:
-	if (status < 0) {
-		if (osb->net_response_buf)
-			kfree(osb->net_response_buf);
-		if (osb->net_vote_buf)
-			kfree(osb->net_vote_buf);
-		osb->net_response_buf = osb->net_vote_buf = NULL;
-		/* 0 indicates we never registered anything */
-		osb->net_key = 0;
-	}
-	return status;
-}
-
-void ocfs2_unregister_net_handlers(ocfs_super *osb)
-{
-	if (!osb->net_key)
-		return;
-
-	/* TODO: net_unregister here! */
-	/* TODO: net_unregister here! */
-
-	if (!list_empty(&osb->net_response_list))
-		printk("ocfs2: net response list not empty!\n");
-
-	kfree(osb->net_response_buf);
-	kfree(osb->net_vote_buf);
-}
-

Modified: branches/dlm-glue/src/dlmglue.h
===================================================================
--- branches/dlm-glue/src/dlmglue.h	2004-11-23 03:40:18 UTC (rev 1664)
+++ branches/dlm-glue/src/dlmglue.h	2004-11-23 04:01:37 UTC (rev 1665)
@@ -110,18 +110,9 @@
 void ocfs2_super_unlock(ocfs_super *osb,
 			int ex);
 
-static inline void ocfs2_kick_vote_thread(ocfs_super *osb)
-{
-	atomic_set(&osb->wake_vote_task, 1);
-	wake_up(&osb->vote_event);
-}
-int ocfs2_request_delete_vote(struct inode *inode);
-int ocfs2_request_unlink_vote(struct inode *inode);
-int ocfs2_request_rename_vote(struct inode *inode);
-int ocfs2_request_mount_vote(ocfs_super *osb);
-int ocfs2_request_umount_vote(ocfs_super *osb);
-int ocfs2_register_net_handlers(ocfs_super *osb);
-void ocfs2_unregister_net_handlers(ocfs_super *osb);
+/* for the vote thread */
+void ocfs2_process_blocked_inode(struct inode *inode);
+void ocfs2_process_blocked_super(ocfs_super *osb);
 
 static inline void ocfs2_lvb_set_trunc_clusters(struct inode *inode,
 						unsigned int trunc_clusters)
@@ -138,53 +129,4 @@
 	spin_unlock(&lockres->l_lock);
 }
 
-#define OCFS2_MESSAGE_TYPE_VOTE     (0x1)
-#define OCFS2_MESSAGE_TYPE_RESPONSE (0x2)
-typedef struct _ocfs2_msg_hdr
-{
-	u32 h_response_id; /* used to lookup message handle on sending
-			    * node. */
-	u32 h_request;
-	u64 h_blkno;
-	u32 h_generation;
-	u32 h_node_num;    /* node sending this particular message. */
-} ocfs2_msg_hdr;
-
-typedef struct _ocfs2_vote_msg
-{
-	ocfs2_msg_hdr v_hdr;
-	/* may put stuff in here... */
-} ocfs2_vote_msg;
-
-typedef struct _ocfs2_response_msg
-{
-	ocfs2_msg_hdr r_hdr;
-	s32 r_response; /* this maps to '0' or a -value in errno.h */
-} ocfs2_response_msg;
-
-typedef struct _ocfs2_vote_work {
-	struct list_head   w_list;
-	ocfs2_vote_msg w_msg;
-} ocfs2_vote_work;
-
-enum ocfs2_vote_request {
-	OCFS2_VOTE_REQ_INVALID = 0,
-	OCFS2_VOTE_REQ_DELETE,
-	OCFS2_VOTE_REQ_UNLINK,
-	OCFS2_VOTE_REQ_RENAME,
-	OCFS2_VOTE_REQ_MOUNT,
-	OCFS2_VOTE_REQ_UMOUNT
-};
-
-typedef struct _ocfs2_net_wait_ctxt {
-	struct list_head   n_list;
-	u32                n_response_id;
-	wait_queue_head_t  n_event;
-	ocfs_node_map      n_node_map;
-	int                n_response; /* an agreggate response. 0 if
-					* all nodes are go, < 0 on any
-					* negative response from any
-					* node or network error. */
-} ocfs2_net_wait_ctxt;
-
 #endif

Modified: branches/dlm-glue/src/inode.c
===================================================================
--- branches/dlm-glue/src/inode.c	2004-11-23 03:40:18 UTC (rev 1664)
+++ branches/dlm-glue/src/inode.c	2004-11-23 04:01:37 UTC (rev 1665)
@@ -49,6 +49,7 @@
 #include "symlink.h"
 #include "sysfile.h"
 #include "util.h"
+#include "vote.h"
 
 #include "ocfs_journal.h"
 #include "buffer_head_io.h"

Modified: branches/dlm-glue/src/journal.c
===================================================================
--- branches/dlm-glue/src/journal.c	2004-11-23 03:40:18 UTC (rev 1664)
+++ branches/dlm-glue/src/journal.c	2004-11-23 04:01:37 UTC (rev 1665)
@@ -45,6 +45,7 @@
 #include "slot_map.h"
 #include "super.h"
 #include "util.h"
+#include "vote.h"
 #include "sysfile.h"
 
 #include "ocfs_journal.h"

Modified: branches/dlm-glue/src/namei.c
===================================================================
--- branches/dlm-glue/src/namei.c	2004-11-23 03:40:18 UTC (rev 1664)
+++ branches/dlm-glue/src/namei.c	2004-11-23 04:01:37 UTC (rev 1665)
@@ -58,6 +58,7 @@
 #include "namei.h"
 #include "suballoc.h"
 #include "util.h"
+#include "vote.h"
 
 #include "ocfs_journal.h"
 #include "buffer_head_io.h"

Modified: branches/dlm-glue/src/ocfs_log.h
===================================================================
--- branches/dlm-glue/src/ocfs_log.h	2004-11-23 03:40:18 UTC (rev 1664)
+++ branches/dlm-glue/src/ocfs_log.h	2004-11-23 04:01:37 UTC (rev 1665)
@@ -100,17 +100,17 @@
 #define OCFS_DEBUG_CONTEXT_ALLOC       0x00000001	/* alloc.c    */
 #define OCFS_DEBUG_CONTEXT_DIR         0x00000002	/* dir.c      */
 #define OCFS_DEBUG_CONTEXT_EXTMAP      0x00000004	/* extmap.c   */
-#define OCFS_DEBUG_CONTEXT_SLOTMAP     0x00000008	/*            */
+#define OCFS_DEBUG_CONTEXT_SLOTMAP     0x00000008	/* slotmap.c  */
 #define OCFS_DEBUG_CONTEXT_IOCTL       0x00000010	/* ioctl.c    */
-#define OCFS_DEBUG_CONTEXT_UNUSED2     0x00000020	/*            */
+#define OCFS_DEBUG_CONTEXT_VOTE        0x00000020	/* vote.c     */
 #define OCFS_DEBUG_CONTEXT_PROC        0x00000040	/* proc.c     */
 #define OCFS_DEBUG_CONTEXT_SYMLINK     0x00000080	/* symlink.c  */
 #define OCFS_DEBUG_CONTEXT_BITMAP      0x00000100	/* bitmap.c   */
 #define OCFS_DEBUG_CONTEXT_FILE        0x00000200	/* file.c     */
 #define OCFS_DEBUG_CONTEXT_INODE       0x00000400	/* inode.c    */
 #define OCFS_DEBUG_CONTEXT_JOURNAL     0x00000800	/* journal.c  */
-#define OCFS_DEBUG_CONTEXT_CHAINALLOC  0x00001000	/*            */
-#define OCFS_DEBUG_CONTEXT_LOCALALLOC  0x00002000	/*            */
+#define OCFS_DEBUG_CONTEXT_CHAINALLOC  0x00001000	/* chainalloc */
+#define OCFS_DEBUG_CONTEXT_LOCALALLOC  0x00002000	/* localalloc */
 #define OCFS_DEBUG_CONTEXT_SYSFILE     0x00004000	/* sysfile.c  */
 #define OCFS_DEBUG_CONTEXT_VOLCFG      0x00008000	/* volcfg.c   */
 #define OCFS_DEBUG_CONTEXT_DCACHE      0x00010000	/* dcache.c   */

Modified: branches/dlm-glue/src/super.c
===================================================================
--- branches/dlm-glue/src/super.c	2004-11-23 03:40:18 UTC (rev 1664)
+++ branches/dlm-glue/src/super.c	2004-11-23 04:01:37 UTC (rev 1665)
@@ -66,6 +66,7 @@
 #include "sysfile.h"
 #include "util.h"
 #include "ver.h"
+#include "vote.h"
 
 #include "ocfs_journal.h"
 #include "buffer_head_io.h"

Added: branches/dlm-glue/src/vote.c
===================================================================
--- branches/dlm-glue/src/vote.c	2004-11-23 03:40:18 UTC (rev 1664)
+++ branches/dlm-glue/src/vote.c	2004-11-23 04:01:37 UTC (rev 1665)
@@ -0,0 +1,871 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * vote.c
+ *
+ * description here
+ *
+ * Copyright (C) 2003, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "ocfs_compat.h"
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <linux/smp_lock.h>
+
+#include <dlmutil.h>
+#include <dlmcommon.h>
+#include <dlmhb.h>
+#include <dlmnm.h>
+#include <dlmtcp.h>
+#include <dlmmod.h>
+
+#include "ocfs_log.h"
+#include "ocfs.h"
+#include "ocfs2.h"
+
+#include "alloc.h"
+#include "dlmglue.h"
+#include "extent_map.h"
+#include "heartbeat.h"
+#include "inode.h"
+#include "slot_map.h"
+#include "util.h"
+#include "vote.h"
+
+#include "ocfs_journal.h"
+#include "buffer_head_io.h"
+
+#define OCFS_DEBUG_CONTEXT    OCFS_DEBUG_CONTEXT_VOTE
+
+#define OCFS2_MESSAGE_TYPE_VOTE     (0x1)
+#define OCFS2_MESSAGE_TYPE_RESPONSE (0x2)
+typedef struct _ocfs2_msg_hdr
+{
+	u32 h_response_id; /* used to lookup message handle on sending
+			    * node. */
+	u32 h_request;
+	u64 h_blkno;
+	u32 h_generation;
+	u32 h_node_num;    /* node sending this particular message. */
+} ocfs2_msg_hdr;
+
+typedef struct _ocfs2_vote_msg
+{
+	ocfs2_msg_hdr v_hdr;
+	/* may put stuff in here... */
+} ocfs2_vote_msg;
+
+typedef struct _ocfs2_response_msg
+{
+	ocfs2_msg_hdr r_hdr;
+	s32 r_response; /* this maps to '0' or a -value in errno.h */
+} ocfs2_response_msg;
+
+typedef struct _ocfs2_vote_work {
+	struct list_head   w_list;
+	ocfs2_vote_msg w_msg;
+} ocfs2_vote_work;
+
+enum ocfs2_vote_request {
+	OCFS2_VOTE_REQ_INVALID = 0,
+	OCFS2_VOTE_REQ_DELETE,
+	OCFS2_VOTE_REQ_UNLINK,
+	OCFS2_VOTE_REQ_RENAME,
+	OCFS2_VOTE_REQ_MOUNT,
+	OCFS2_VOTE_REQ_UMOUNT
+};
+
+typedef struct _ocfs2_net_wait_ctxt {
+	struct list_head   n_list;
+	u32                n_response_id;
+	wait_queue_head_t  n_event;
+	ocfs_node_map      n_node_map;
+	int                n_response; /* an agreggate response. 0 if
+					* all nodes are go, < 0 on any
+					* negative response from any
+					* node or network error. */
+} ocfs2_net_wait_ctxt;
+
+static void ocfs2_vote_thread_do_work(ocfs_super *osb);
+static void ocfs2_process_vote(ocfs_super *osb,
+			       ocfs2_vote_msg *msg);
+static int ocfs2_do_request_vote(ocfs_super *osb,
+				 u64 blkno,
+				 unsigned int generation,
+				 enum ocfs2_vote_request type);
+
+static void ocfs2_process_mount_request(ocfs_super *osb,
+					unsigned int node_num)
+{
+	printk("MOUNT vote from node %u\n", node_num);
+	/* The other node only sends us this message when he has an EX
+	 * on the superblock, so our recovery threads (if having been
+	 * launched) are waiting on it.*/
+	ocfs_recovery_map_clear(osb, node_num);
+	ocfs_node_map_set_bit(osb, &osb->mounted_map, node_num);
+}
+
+static void ocfs2_process_umount_request(ocfs_super *osb,
+					 unsigned int node_num)
+{
+	printk("UMOUNT vote from node %u\n", node_num);
+	ocfs_node_map_clear_bit(osb, &osb->mounted_map, node_num);
+	ocfs_node_map_set_bit(osb, &osb->umount_map, node_num);
+}
+
+static int ocfs2_process_delete_request(struct inode *inode)
+{
+	int response = -EBUSY;
+
+	LOG_TRACE_ARGS("DELETE vote on inode %lu, read "
+		       "lnk_cnt = %u\n", inode->i_ino, 
+		       inode->i_nlink);
+
+	/* force this as ours may be out of date. */
+	inode->i_nlink = 0;
+
+	spin_lock(&OCFS_I(inode)->ip_lock);
+	/* vote no if the file is still open. */
+	if (OCFS_I(inode)->ip_open_cnt > 0) {
+		LOG_TRACE_PROCESS_VOTE("open count = %u\n", 
+		       OCFS_I(inode)->ip_open_cnt);
+		spin_unlock(&OCFS_I(inode)->ip_lock);
+		goto done;
+	}
+	spin_unlock(&OCFS_I(inode)->ip_lock);
+
+	/* directories are a bit ugly... What if someone is sitting in
+	 * it? We want to make sure the inode is removed completely as
+	 * a result of the iput in process_vote. */
+	if (S_ISDIR(inode->i_mode) && (atomic_read(&inode->i_count) != 1)) {
+		LOG_TRACE_PROCESS_VOTE("i_count = %u\n", 
+		       atomic_read(&inode->i_count));
+		goto done;
+	}
+
+	/* If we get here, then we're voting 'yes', so commit the
+	 * delete on our side. */
+	response = 0;
+
+	spin_lock(&OCFS_I(inode)->ip_lock);
+	SET_INODE_DELETED(inode);
+	/* We set the SKIP_DELETE flag on the inode so we don't try to
+	 * delete it in delete_inode ourselves. */
+	OCFS_I(inode)->ip_flags |=  OCFS_INODE_SKIP_DELETE;
+	spin_unlock(&OCFS_I(inode)->ip_lock);
+
+	d_prune_aliases (inode);
+
+	/* TODO: How much of this is really necessary? */
+	sync_mapping_buffers(inode->i_mapping);
+	ocfs_truncate_inode_pages(inode, 0);
+	ocfs2_extent_map_trunc(inode, 0);
+
+done:
+	return response;
+}
+
+static void ocfs2_process_dentry_request(struct inode *inode,
+					 int rename)
+{
+	d_prune_aliases (inode);
+
+	/* for rename, we don't drop link counts */
+	if (!rename) {
+		if (S_ISDIR(inode->i_mode))
+			inode->i_nlink = 0;
+		else
+			inode->i_nlink--;
+	}
+}
+
+static void ocfs2_process_vote(ocfs_super *osb,
+			       ocfs2_vote_msg *msg)
+{
+	int net_status, vote_response;
+	int rename = 0;
+	unsigned int node_num, generation;
+	u64 blkno;
+	enum ocfs2_vote_request request;
+	struct inode *inode = NULL;
+	struct inode *remote_node;
+	ocfs2_msg_hdr *hdr = &msg->v_hdr;
+	ocfs2_response_msg response;
+
+	/* decode the network mumbo jumbo into local variables. */
+	request = ntohl(hdr->h_request);
+	blkno = be64_to_cpu(hdr->h_blkno);
+	generation = ntohl(hdr->h_generation);
+	node_num = ntohl(hdr->h_node_num);
+
+	printk("ocfs2: processing vote: request = %u, blkno = %llu, "
+	       "generation = %u, node_num = %u\n", request, blkno, generation,
+	       node_num);
+
+	vote_response = 0;
+	switch (request) {
+	case OCFS2_VOTE_REQ_UMOUNT:
+		ocfs2_process_umount_request(osb, node_num);
+		goto respond;
+	case OCFS2_VOTE_REQ_MOUNT:
+		ocfs2_process_mount_request(osb, node_num);
+		goto respond;
+	default:
+		/* avoids a gcc warning */
+		break;
+	}
+
+	/* We cannot process the remaining message types before we're
+	 * fully mounted. It's perfectly safe however to send a 'yes'
+	 * response as we can't possibly have any of the state they're
+	 * asking us to modify yet. */
+	if (atomic_read(&osb->vol_state) == VOLUME_INIT)
+		goto respond;
+
+	vote_response = -EINVAL;
+	/* If we get here, then the request is against an inode. */
+	inode = ocfs_ilookup(osb, blkno);
+	if (!inode)
+		goto respond;
+
+	OCFS_ASSERT(inode->i_generation == generation);
+
+	switch (request) {
+	case OCFS2_VOTE_REQ_DELETE:
+		vote_response = ocfs2_process_delete_request(inode);
+		break;
+	case OCFS2_VOTE_REQ_RENAME:
+		rename = 1;
+		/* fall through */
+	case OCFS2_VOTE_REQ_UNLINK:
+		ocfs2_process_dentry_request(inode, rename);
+		break;
+	default:
+		printk("ocfs2_process_vote: node %u, invalid request: %u\n",
+		       node_num, request);
+	}
+
+respond:
+	/* Response struture is small so we just put it on the stack
+	 * and stuff it inline. */
+	memset(&response, 0, sizeof(ocfs2_response_msg));
+	response.r_hdr.h_response_id = hdr->h_response_id;
+	response.r_hdr.h_blkno = hdr->h_blkno;
+	response.r_hdr.h_generation = hdr->h_generation;
+	response.r_hdr.h_node_num = htonl(osb->node_num);
+	response.r_response = htonl(vote_response);
+
+	remote_node = nm_get_node_by_num(node_num);
+	if (!remote_node) {
+		LOG_ERROR_ARGS("Couldn't get inode for node %u!\n", node_num);
+	} else {
+		net_status = net_send_message(OCFS2_MESSAGE_TYPE_RESPONSE,
+					      osb->net_key,
+					      &response,
+					      sizeof(ocfs2_response_msg),
+					      remote_node,
+					      NULL);
+		if (net_status < 0)
+			LOG_ERROR_ARGS("message to node %u fails with error "
+				       "%d!\n", node_num, net_status);
+		iput(remote_node);
+	}
+
+	if (inode)
+		iput(inode);
+}
+
+static void ocfs2_vote_thread_do_work(ocfs_super *osb)
+{
+	struct inode *inode = NULL;
+	ocfs_inode_private *ip;
+	int processed;
+	ocfs2_vote_work *work;
+
+	spin_lock(&osb->vote_task_lock);
+	if (osb->blocked_super_count) {
+		OCFS_ASSERT(osb->blocked_super_count == 1);
+		osb->blocked_super_count = 0;
+		spin_unlock(&osb->vote_task_lock);
+
+		ocfs2_process_blocked_super(osb);
+
+		spin_lock(&osb->vote_task_lock);
+	}
+
+	processed = osb->blocked_inode_count;
+	while (processed) {
+		OCFS_ASSERT(!list_empty(&osb->blocked_inode_list));
+
+		ip = list_entry(&osb->blocked_inode_list.next,
+				ocfs_inode_private, ip_blocked_list);
+		list_del_init(&ip->ip_blocked_list);
+		osb->blocked_inode_count--;
+		spin_unlock(&osb->vote_task_lock);
+
+		inode = ip->ip_inode;
+		processed--;
+
+		ocfs2_process_blocked_inode(inode);
+
+		iput(inode);
+
+		spin_lock(&osb->vote_task_lock);
+	}
+
+	while (osb->vote_count) {
+		OCFS_ASSERT(!list_empty(&osb->vote_list));
+		work = list_entry(&osb->vote_list.next,
+				  ocfs2_vote_work, w_list);
+		list_del(&work->w_list);
+		osb->vote_count--;
+		spin_unlock(&osb->vote_task_lock);
+
+		ocfs2_process_vote(osb, &work->w_msg);
+		kfree(work);
+
+		spin_lock(&osb->vote_task_lock);
+	}
+	spin_unlock(&osb->vote_task_lock);
+}
+
+static inline int ocfs2_vote_thread_has_work(ocfs_super *osb)
+{
+	if (list_empty(&osb->blocked_inode_list) &&
+	    list_empty(&osb->vote_list) &&
+	    !osb->blocked_super_count)
+		return 0;
+
+	return 1;
+}
+
+int ocfs2_vote_thread(void *arg)
+{
+	int status = 0;
+	ocfs_super *osb = arg;
+	char proc[16];
+
+	sprintf (proc, "ocfs2vote-%d", osb->osb_id);
+	ocfs_daemonize (proc, strlen(proc), 0);
+
+	spin_lock(&osb->vote_task_lock);
+	osb->vote_task = current;
+	init_completion (&osb->vote_event_complete);
+
+	complete(&osb->vote_event_init);
+
+	while (1) {
+		if (osb->vote_exit) {
+			if (!ocfs2_vote_thread_has_work(osb))
+				break;
+			/* don't want to sleep if we're supposed to quit. */
+			atomic_set(&osb->wake_vote_task, 1);
+		}
+		spin_unlock(&osb->vote_task_lock);
+
+		wait_event_interruptible(osb->vote_event,
+					 atomic_read(&osb->wake_vote_task));
+
+		atomic_set(&osb->wake_vote_task, 0);
+#ifdef OCFS2_VERBOSE_LOCKING_TRACE
+		printk("(%u) vote_thread: awoken\n", current->pid);
+#endif
+		ocfs2_vote_thread_do_work(osb);
+		spin_lock(&osb->vote_task_lock);
+	}
+
+	osb->vote_task = NULL;
+	spin_unlock(&osb->vote_task_lock);
+
+	complete(&osb->vote_event_complete);
+
+	return status;
+}
+
+static ocfs2_net_wait_ctxt *ocfs2_new_net_wait_ctxt(ocfs_super *osb,
+						    unsigned int response_id)
+{
+	ocfs2_net_wait_ctxt *w;
+
+	w = kmalloc(sizeof(*w), GFP_KERNEL);
+	if (!w) {
+		LOG_ERROR_STATUS(-ENOMEM);
+		goto bail;
+	}
+	memset(w, 0, sizeof(*w));
+
+	INIT_LIST_HEAD(&w->n_list);
+	init_waitqueue_head(&w->n_event);
+	ocfs_node_map_init(osb, &w->n_node_map);
+	w->n_response_id = response_id;
+bail:
+	return w;
+}
+
+static unsigned int ocfs2_new_response_id(ocfs_super *osb)
+{
+	unsigned int ret;
+
+	spin_lock(&osb->net_response_lock);
+	ret = ++osb->net_response_ids;
+	spin_unlock(&osb->net_response_lock);
+
+	return ret;
+}
+
+static void ocfs2_dequeue_net_wait_ctxt(ocfs_super *osb,
+					ocfs2_net_wait_ctxt *w)
+{
+	spin_lock(&osb->net_response_lock);
+	list_del(&w->n_list);
+	spin_unlock(&osb->net_response_lock);
+}
+
+static void ocfs2_queue_net_wait_ctxt(ocfs_super *osb,
+				      ocfs2_net_wait_ctxt *w)
+{
+	spin_lock(&osb->net_response_lock);
+	list_add_tail(&osb->net_response_list,
+		      &w->n_list);
+	spin_unlock(&osb->net_response_lock);
+}
+
+#define OCFS2_RESPONSE_WAIT_JIFFIES (60 * (HZ >> 1))
+static int ocfs2_wait_on_vote_responses(ocfs_super *osb,
+					ocfs2_net_wait_ctxt *w)
+{
+	int status = 0;
+	signed long timeout = OCFS2_RESPONSE_WAIT_JIFFIES;
+	wait_queue_t wait;
+	init_waitqueue_entry(&wait, current);
+
+	add_wait_queue(&w->n_event, &wait);
+	while (1) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (ocfs_node_map_is_empty(osb, &w->n_node_map))
+			break;
+
+		if (!signal_pending(current)) {
+			timeout = schedule_timeout(timeout);
+			if (!timeout) {
+				status = -ETIMEDOUT;
+				break;
+			}
+			continue;
+		}
+		status = -ERESTARTSYS;
+		break;
+	}
+	current->state = TASK_RUNNING;
+	remove_wait_queue(&w->n_event, &wait);
+
+	return status;
+}
+
+static int ocfs2_broadcast_vote(ocfs_super *osb,
+				ocfs2_vote_msg *request,
+				unsigned int response_id)
+{
+	int status, i, remote_err;
+	ocfs2_net_wait_ctxt *w = NULL;
+	struct inode *remote_node;
+
+	w = ocfs2_new_net_wait_ctxt(osb, response_id);
+	if (!w) {
+		status = -ENOMEM;
+		LOG_ERROR_STATUS(status);
+		goto bail;
+	}
+
+	/* we're pretty much ready to go at this point, and this fills
+	 * in n_response which we need anyway... */
+	ocfs2_queue_net_wait_ctxt(osb, w);
+
+	i = ocfs_node_map_iterate(osb, &osb->mounted_map, 0);
+	while (i != OCFS_INVALID_NODE_NUM) {
+		if (i != osb->node_num) {
+			ocfs_node_map_set_bit(osb, &w->n_node_map, i);
+
+			remote_node = nm_get_node_by_num(i);
+			if (!remote_node) {
+				status = -EINVAL;
+				goto bail;
+			}
+
+			remote_err = 0;
+			status = net_send_message(OCFS2_MESSAGE_TYPE_VOTE,
+						  osb->net_key,
+						  request,
+						  sizeof(*request),
+						  remote_node,
+						  &remote_err);
+			iput(remote_node);
+			if (status == -ETIMEDOUT) {
+				printk("ocfs2: remote node %d timed out!\n",
+				       i);
+				status = -EAGAIN;
+				goto bail;
+			}
+			if (remote_err < 0) {
+				status = remote_err;
+				printk("ocfs2: remote error %d on node %d!\n",
+				       remote_err, i);
+				goto bail;
+			}
+			if (status < 0) {
+				LOG_ERROR_STATUS(status);
+				goto bail;
+			}
+		}
+		i = ocfs_node_map_iterate(osb, &osb->mounted_map, i);
+	}
+
+	status = ocfs2_wait_on_vote_responses(osb, w);
+	if (status < 0) {
+		if (status != -EINTR)
+			LOG_ERROR_STATUS(status);
+		goto bail;
+	}
+
+	ocfs2_dequeue_net_wait_ctxt(osb, w);
+	status = w->n_response;
+bail:
+	if (w) {
+		ocfs2_dequeue_net_wait_ctxt(osb, w);
+		kfree(w);
+	}
+
+	return status;
+}
+
+static int ocfs2_do_request_vote(ocfs_super *osb,
+				 u64 blkno,
+				 unsigned int generation,
+				 enum ocfs2_vote_request type)
+{
+	int status;
+	unsigned int response_id;
+	ocfs2_vote_msg *request = NULL;
+	ocfs2_msg_hdr *hdr;
+
+	OCFS_ASSERT(type == OCFS2_VOTE_REQ_DELETE ||
+		    type == OCFS2_VOTE_REQ_UNLINK ||
+		    type == OCFS2_VOTE_REQ_RENAME ||
+		    type == OCFS2_VOTE_REQ_UMOUNT);
+
+	request = kmalloc(sizeof(*request), GFP_KERNEL);
+	if (!request) {
+		status = -ENOMEM;
+		LOG_ERROR_STATUS(status);
+		goto bail;
+	}
+	memset(request, 0, sizeof(*request));
+	hdr = &request->v_hdr;
+
+	response_id = ocfs2_new_response_id(osb);
+
+	hdr->h_response_id = htonl(response_id);
+	hdr->h_request = htonl(type);
+	hdr->h_blkno = cpu_to_be64(blkno);
+	hdr->h_generation = htonl(generation);
+	hdr->h_node_num = htonl((unsigned int) osb->node_num);
+
+	status = ocfs2_broadcast_vote(osb, request, response_id);
+	if (status < 0) {
+		if (status != -EINTR)
+			LOG_ERROR_STATUS(status);
+		goto bail;
+	}
+
+bail:
+	if (request)
+		kfree(request);
+
+	return status;
+}
+
+static int ocfs2_request_vote(struct inode *inode,
+			      enum ocfs2_vote_request type)
+{
+	int status;
+	ocfs_super *osb = OCFS2_SB(inode->i_sb);
+
+	if (ocfs_inode_is_new(inode))
+		return 0;
+
+	status = -EAGAIN;
+	while (status == -EAGAIN) {
+		if (signal_pending(current))
+			return -EINTR;
+
+		if (ocfs_node_map_is_only(osb, &osb->mounted_map,
+					  osb->node_num))
+			return 0;
+
+		status = ocfs2_super_lock(osb, 0);
+		if (status < 0) {
+			if (status != -EINTR)
+				LOG_ERROR_STATUS(status);
+			break;
+		}
+
+		status = ocfs2_do_request_vote(osb, OCFS_I(inode)->ip_blkno,
+					       inode->i_generation, type);
+
+		ocfs2_super_unlock(osb, 0);
+	}
+	return status;
+}
+
+int ocfs2_request_delete_vote(struct inode *inode)
+{
+	return ocfs2_request_vote(inode, OCFS2_VOTE_REQ_DELETE);
+}
+
+int ocfs2_request_unlink_vote(struct inode *inode)
+{
+	return ocfs2_request_vote(inode, OCFS2_VOTE_REQ_UNLINK);
+}
+
+int ocfs2_request_rename_vote(struct inode *inode)
+{
+	return ocfs2_request_vote(inode, OCFS2_VOTE_REQ_RENAME);
+}
+
+int ocfs2_request_mount_vote(ocfs_super *osb)
+{
+	int status;
+
+	status = -EAGAIN;
+	while (status == -EAGAIN) {
+		if (signal_pending(current))
+			return -EINTR;
+
+		if (ocfs_node_map_is_only(osb, &osb->mounted_map,
+					  osb->node_num))
+			return 0;
+
+		status = ocfs2_do_request_vote(osb, 0ULL, 0,
+					       OCFS2_VOTE_REQ_MOUNT);
+	}
+	return status;
+}
+
+int ocfs2_request_umount_vote(ocfs_super *osb)
+{
+	int status;
+
+	status = -EAGAIN;
+	while (status == -EAGAIN) {
+		if (signal_pending(current))
+			return -EINTR;
+
+		if (ocfs_node_map_is_only(osb, &osb->mounted_map,
+					  osb->node_num))
+			return 0;
+
+		status = ocfs2_do_request_vote(osb, 0ULL, 0,
+					       OCFS2_VOTE_REQ_UMOUNT);
+	}
+	return status;
+}
+
+/* TODO: This should eventually be a hash table! */
+static ocfs2_net_wait_ctxt * __ocfs2_find_net_wait_ctxt(ocfs_super *osb,
+							u32 response_id)
+{
+	struct list_head *p;
+	ocfs2_net_wait_ctxt *w = NULL;
+
+	list_for_each(p, &osb->net_response_list) {
+		w = list_entry(p, ocfs2_net_wait_ctxt, n_list);
+		if (response_id == w->n_response_id)
+			break;
+		w = NULL;
+	}
+
+	return w;
+}
+
+static int ocfs2_handle_response_message(net_msg *msg,
+					 u32 len,
+					 void *data)
+{
+	unsigned int response_id, node_num;
+	int response_status;
+	ocfs_super *osb = data;
+	ocfs2_response_msg *resp;
+	ocfs2_net_wait_ctxt * w;
+
+	resp = (ocfs2_response_msg *) msg->buf;
+
+	response_id = ntohl(resp->r_hdr.h_response_id);
+	node_num = ntohl(resp->r_hdr.h_node_num);
+	response_status = ntohl(resp->r_response);
+
+	printk("recieved response message:\n");
+	printk("h_response_id = %u\n", ntohl(response_id));
+	printk("h_request = %u\n", ntohl(resp->r_hdr.h_request));
+	printk("h_blkno = %llu\n", be64_to_cpu(resp->r_hdr.h_blkno));
+	printk("h_generation = %u\n", ntohl(resp->r_hdr.h_generation));
+	printk("h_node_num = %u\n", node_num);
+	printk("r_response = %d\n", response_status);
+
+	spin_lock(&osb->net_response_lock);
+	w = __ocfs2_find_net_wait_ctxt(osb, response_id);
+	if (!w) {
+		spin_unlock(&osb->net_response_lock);
+		printk("request not found!\n");
+		goto bail;
+	}
+
+	if (response_status && (!w->n_response)) {
+		/* we only really need one negative response so don't
+		 * set it twice. */
+		w->n_response = response_status;
+	}
+
+	ocfs_node_map_clear_bit(osb, &w->n_node_map, node_num);
+	if (ocfs_node_map_is_empty(osb, &w->n_node_map))
+		wake_up_all(&w->n_event);
+	spin_unlock(&osb->net_response_lock);
+
+bail:
+	return 0;
+}
+
+static int ocfs2_handle_vote_message(net_msg *msg,
+					u32 len,
+					void *data)
+{
+	int status;
+	ocfs_super *osb = data;
+	ocfs2_vote_work *work;
+
+	work = kmalloc(sizeof(ocfs2_vote_work), GFP_KERNEL);
+	if (!work) {
+		status = -ENOMEM;
+		LOG_ERROR_STATUS(status);
+		goto bail;
+	}
+
+	INIT_LIST_HEAD(&work->w_list);
+	memcpy(&work->w_msg, msg->buf, sizeof(ocfs2_vote_msg));
+
+	printk("scheduling vote request:\n");
+	printk("h_response_id = %u\n", work->w_msg.v_hdr.h_response_id);
+	printk("h_request = %u\n", work->w_msg.v_hdr.h_request);
+	printk("h_blkno = %llu\n", work->w_msg.v_hdr.h_blkno);
+	printk("h_generation = %u\n", work->w_msg.v_hdr.h_generation);
+	printk("h_node_num = %u\n", work->w_msg.v_hdr.h_node_num);
+
+	spin_lock(&osb->vote_task_lock);
+	list_add_tail(&osb->vote_list, &work->w_list);
+	osb->vote_count++;
+	spin_unlock(&osb->vote_task_lock);
+
+	ocfs2_kick_vote_thread(osb);
+
+	status = 0;
+bail:
+	return status;
+}
+
+int ocfs2_register_net_handlers(ocfs_super *osb)
+{
+	int status;
+	int i = MAX_VOL_ID_LENGTH - sizeof(osb->net_key);
+
+	memcpy(&osb->net_key, &osb->uuid[i], sizeof(osb->net_key));
+	osb->net_response_buf = osb->net_vote_buf = NULL;
+	osb->net_response_ids = 0;
+	spin_lock_init(&osb->net_response_lock);
+	INIT_LIST_HEAD(&osb->net_response_list);
+
+	osb->net_response_buf = kmalloc(sizeof(ocfs2_response_msg),
+					GFP_KERNEL);
+	if (!osb->net_response_buf) {
+		status = -ENOMEM;
+		LOG_ERROR_STATUS(status);
+		goto bail;
+	}
+
+	osb->net_vote_buf = kmalloc(sizeof(ocfs2_vote_msg),
+				    GFP_KERNEL);
+	if (!osb->net_vote_buf) {
+		status = -ENOMEM;
+		LOG_ERROR_STATUS(status);
+		goto bail;
+	}
+
+	status = net_register_handler(OCFS2_MESSAGE_TYPE_RESPONSE,
+				      osb->net_key,
+				      0,
+				      sizeof(ocfs2_response_msg),
+				      ocfs2_handle_response_message,
+				      osb,
+				      osb->net_response_buf);
+	if (status < 0) {
+		LOG_ERROR_STATUS(status);
+		goto bail;
+	}
+
+	status = net_register_handler(OCFS2_MESSAGE_TYPE_VOTE,
+				      osb->net_key,
+				      0,
+				      sizeof(ocfs2_vote_msg),
+				      ocfs2_handle_vote_message,
+				      osb,
+				      osb->net_vote_buf);
+	if (status < 0) {
+		/* TODO: net_unregister here! */
+		LOG_ERROR_STATUS(status);
+		goto bail;
+	}
+
+bail:
+	if (status < 0) {
+		if (osb->net_response_buf)
+			kfree(osb->net_response_buf);
+		if (osb->net_vote_buf)
+			kfree(osb->net_vote_buf);
+		osb->net_response_buf = osb->net_vote_buf = NULL;
+		/* 0 indicates we never registered anything */
+		osb->net_key = 0;
+	}
+	return status;
+}
+
+void ocfs2_unregister_net_handlers(ocfs_super *osb)
+{
+	if (!osb->net_key)
+		return;
+
+	/* TODO: net_unregister here! */
+	/* TODO: net_unregister here! */
+
+	if (!list_empty(&osb->net_response_list))
+		printk("ocfs2: net response list not empty!\n");
+
+	kfree(osb->net_response_buf);
+	kfree(osb->net_vote_buf);
+}

Added: branches/dlm-glue/src/vote.h
===================================================================
--- branches/dlm-glue/src/vote.h	2004-11-23 03:40:18 UTC (rev 1664)
+++ branches/dlm-glue/src/vote.h	2004-11-23 04:01:37 UTC (rev 1665)
@@ -0,0 +1,45 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * vote.h
+ *
+ * description here
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+
+#ifndef VOTE_H
+#define VOTE_H
+
+int ocfs2_vote_thread(void *arg);
+static inline void ocfs2_kick_vote_thread(ocfs_super *osb)
+{
+	atomic_set(&osb->wake_vote_task, 1);
+	wake_up(&osb->vote_event);
+}
+
+int ocfs2_request_delete_vote(struct inode *inode);
+int ocfs2_request_unlink_vote(struct inode *inode);
+int ocfs2_request_rename_vote(struct inode *inode);
+int ocfs2_request_mount_vote(ocfs_super *osb);
+int ocfs2_request_umount_vote(ocfs_super *osb);
+int ocfs2_register_net_handlers(ocfs_super *osb);
+void ocfs2_unregister_net_handlers(ocfs_super *osb);
+
+#endif



More information about the Ocfs2-commits mailing list