[Ocfs2-commits] zab commits r1938 - in trunk/fs/ocfs2: . cluster dlm

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Fri Mar 4 14:26:32 CST 2005


Author: zab
Signed-off-by: mfasheh
Signed-off-by: khackel
Date: 2005-03-04 14:26:31 -0600 (Fri, 04 Mar 2005)
New Revision: 1938

Modified:
   trunk/fs/ocfs2/cluster/heartbeat.c
   trunk/fs/ocfs2/cluster/tcp.c
   trunk/fs/ocfs2/cluster/util.c
   trunk/fs/ocfs2/cluster/util.h
   trunk/fs/ocfs2/dlm/dlmmod.c
   trunk/fs/ocfs2/dlm/dlmmod.h
   trunk/fs/ocfs2/dlm/dlmthread.c
   trunk/fs/ocfs2/dlm/util.c
   trunk/fs/ocfs2/dlm/util.h
   trunk/fs/ocfs2/dlmglue.c
   trunk/fs/ocfs2/journal.c
   trunk/fs/ocfs2/ocfs.h
   trunk/fs/ocfs2/super.c
   trunk/fs/ocfs2/util.c
   trunk/fs/ocfs2/util.h
   trunk/fs/ocfs2/vote.c
   trunk/fs/ocfs2/vote.h
Log:
Clean up our thread management.  Generally:
o use kthread routines and remove util_daemonize, ocfs_daemonize, 
  util_thread_info, etc
o don't have the threads exit on signals (made shutdown hard)
o patch up some races by using wait_event on level conditions instead of 
  edge-tiggered atomic_t guys

Specifically:

net receive thread:
o setup rx socket before launching thread, don't have it be a global anymore
o get rid of wait_queue management that duplicated what wait_event was doing
o wake up receive task explicitly instead of going through the rx socket
  wait queue

recovery:
o use the thread's task_struct instead of recovery_launched 
o get rid of open-coded ocfs2_commit_thread_sleep, use wait_event_i_t

vote:
o move to sequence numbers instead of a boolean woke to catch races

heartbeat:
o alloc page in launcher, instead of inside thread 
o have the thread exit when hb_complete_thread() asks it to

commit:
o kthread makes ->commit struct redundant
o rely more on kthread_should_stop() rather than _IN_SHUTDOWN

dlm:
o kthread makes ->thread struct redundant
o decide to wake by testing the dirty list, not ->woken

Signed-off-by: mfasheh
Signed-off-by: khackel


Modified: trunk/fs/ocfs2/cluster/heartbeat.c
===================================================================
--- trunk/fs/ocfs2/cluster/heartbeat.c	2005-03-04 02:08:55 UTC (rev 1937)
+++ trunk/fs/ocfs2/cluster/heartbeat.c	2005-03-04 20:26:31 UTC (rev 1938)
@@ -48,7 +48,6 @@
 #include <linux/socket.h>
 #include <linux/inet.h>
 #include <linux/in.h>
-#include <linux/module.h>
 
 #include <linux/linkage.h>
 #include <linux/time.h>
@@ -61,6 +60,7 @@
 #include <linux/pagemap.h>
 #include <linux/file.h>
 #include <linux/bitops.h>
+#include <linux/kthread.h>
 
 #include <asm/uaccess.h>
 
@@ -104,8 +104,6 @@
 static LIST_HEAD(hb_net_groups);
 static LIST_HEAD(hb_disk_groups);
 static struct task_struct *hb_task = NULL;
-static struct completion hb_complete;
-static int hb_pid = -1;
 
 static struct hb_callback {
 	struct list_head list;
@@ -369,65 +367,64 @@
 	return 0;
 }
 
-
 static int hb_thread(void *data)
 {
-	void *page;
-	
-	page = (void *) __get_free_page(GFP_KERNEL);
-	if (!page)
-		return -ENOMEM;
+	void *page = data;
 
-	util_daemonize ("hb_thread", strlen("hb_thread"), 1);
-	hb_task = current;
-
-	while (1) {
+	hbprintk("hb thread running\n");
+	
+	while (!kthread_should_stop()) {
 		hb_do_disk_heartbeat(page);
-		/* when we can really tear down this can wait on a wait
-		 * queue */
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		schedule_timeout(msecs_to_jiffies(HB_THREAD_MS));
 	}
 
-	flush_scheduled_work();
-	complete (&hb_complete);
-	hbprintk("quitting hb thread!!!!!!\n");
+	hbprintk("hb thread exiting\n");
+	free_page((unsigned long)page);
 	return 0;
 }
 
 /* Launch the hb thread for the mounted volume */
 static int hb_launch_thread(void)
 {
-	hb_pid = -1;
-	hb_task = NULL;
-	init_completion (&hb_complete);
+	void *page;
+	int ret;
 
+	page = (void *)__get_free_page(GFP_KERNEL);
+	if (!page) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
 	hbprintk("starting hb thread...\n");
-	hb_pid = kernel_thread (hb_thread, NULL, 
-				CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
-	if (hb_pid < 0) {
-		hbprintk("unable to launch hb thread, error=%d", hb_pid);
-		return -EINVAL;
+	hb_task = kthread_run(hb_thread, page, "hb_thread");
+	if (IS_ERR(hb_task)) {
+		hb_task = NULL;
+		hbprintk("unable to launch hb thread, error=%ld",
+			 PTR_ERR(hb_task));
+		ret = -EINVAL;
+		goto out;
 	}
-	hbprintk("hb thread running...\n");
-	return 0;
+
+	/* hb_thread is responsible for freeing the page if it runs */
+	page = NULL;
+	ret = 0;
+
+out:
+	if (page)
+		free_page((unsigned long)page);
+	return ret;
 }
 
 static void hb_complete_thread(void)
 {
-	hbprintk ("waiting for hb thread to exit....");
-	send_sig (SIGINT, hb_task, 0);
-	wait_for_completion (&hb_complete);
-	hbprintk ("hb thread exited\n");
-	hb_task = NULL;
+	if (hb_task) {
+		hbprintk("waiting for hb thread to exit\n");
+		kthread_stop(hb_task);
+		hb_task = NULL;
+	}
 }
 
-
-
-
-
-
-
 static int hb_init_disk_hb_group(struct inode *group, dev_t dev, u32 bits, 
 				 u32 blocks, u64 start)
 {

Modified: trunk/fs/ocfs2/cluster/tcp.c
===================================================================
--- trunk/fs/ocfs2/cluster/tcp.c	2005-03-04 02:08:55 UTC (rev 1937)
+++ trunk/fs/ocfs2/cluster/tcp.c	2005-03-04 20:26:31 UTC (rev 1938)
@@ -68,7 +68,6 @@
  * 	- find explicit stack call to drain rx queue
  * 	- add trivial version trading message at the start of a conn
  * 	- go nuts adding static
- * 	- move recv_sock into recv_thread
  * 	- nsc waiting is buggy, should be on socket.. wake w/err if socket dies
  * 	- compare socks in attach_sock so both size don't close
  * 	- implement net_remove_handlers
@@ -94,6 +93,7 @@
 #include <linux/blkdev.h>
 #include <linux/proc_fs.h>
 #include <linux/file.h>
+#include <linux/kthread.h>
 
 #include <asm/uaccess.h>
 
@@ -145,7 +145,6 @@
 #define sk_state_change		state_change
 #endif
 
-struct socket *recv_sock = NULL;
 static u16 ip_version, ip_port;
 static struct inode *net_inode = NULL;
 static u8 net_node_num;
@@ -160,9 +159,7 @@
 static spinlock_t net_active_lock = SPIN_LOCK_UNLOCKED;
 static LIST_HEAD(net_active_list);
 
-static int net_recv_pid = -1;
 static struct task_struct *net_recv_task = NULL;
-static struct completion net_recv_complete;
 
 static inline void net_abort_status_return(net_status_ctxt *nsc)
 {
@@ -180,11 +177,10 @@
 static void __exit net_driver_exit (void);
 static int net_add_handler(net_msg_handler *nmh);
 static void net_remove_handlers(void);
-static int net_init_tcp_recv_sock(void);
+static struct socket *net_init_tcp_recv_sock(void);
 static int net_receive_thread(void *data);
 static int net_receive(void);
-static void net_try_accept(void);
-static void net_release_tcp_sock(void);
+static void net_try_accept(struct socket *sock);
 static int net_process_message(struct socket *sock, net_msg *hdr);
 static int net_ioctl (struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
 
@@ -358,32 +354,47 @@
 
 static int net_startup(void)
 {
-	net_recv_pid = -1;
-	net_recv_task = NULL;
-	init_completion (&net_recv_complete);
+	struct socket *sock;
+	int ret = 0;
 
+	/* if the thread was setting up the rx socket we'd like to have it
+	 * communicate errors back to us here.  us setting up the socket
+	 * and passing it to the thread is easier */
+	sock = net_init_tcp_recv_sock();
+	if (IS_ERR(sock)) {
+		ret = PTR_ERR(sock);
+		goto out;
+	}
+
 	netprintk0("starting net receive thread...\n");
-	net_recv_pid = kernel_thread (net_receive_thread, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
-	if (net_recv_pid < 0) {
-		netprintk("unable to launch net receive thread, error=%d\n",
-			  net_recv_pid);
-		net_shutdown();
-		return -EINVAL;
+
+	net_recv_task = kthread_run(net_receive_thread, sock, "netrecv");
+	if (IS_ERR(net_recv_task)) {
+		ret = PTR_ERR(net_recv_task);
+		net_recv_task = NULL;
+		netprintk("unable to launch net receive thread, error=%ld\n",
+			  (long)ret);
+		goto out;
 	}
 
-	netprintk0("net thread running...\n");
+	/* once the thread is running it has ownership of the sock */
+	sock = NULL;
+
+out:
+	if (sock)
+		sock_release(sock);
 	return 0;
 }
 
 static void net_shutdown(void)
 {
-	netprintk ("waiting for net thread to exit....\n");
-	send_sig (SIGINT, net_recv_task, 0);
-	wait_for_completion (&net_recv_complete);
-	netprintk ("net thread exited\n");
+	if (net_recv_task) {
+		netprintk("waiting for net thread to exit....\n");
+		kthread_stop(net_recv_task);
+	}
 }
 
-static int net_rx_should_wake(void)
+static int net_rx_should_wake(struct socket *sock)
 {
 	int empty;
 
@@ -391,42 +402,26 @@
 	empty = list_empty(&net_active_list);
 	spin_unlock_bh(&net_active_lock);
 
-	return !empty || tcp_sk(recv_sock->sk)->accept_queue;
+	return !empty || tcp_sk(sock->sk)->accept_queue;
 }
 
 static int net_receive_thread(void *data)
 {
-	int status;
-	DECLARE_WAITQUEUE(main_wait, current);
+	struct socket *sock = data;
 
-	util_daemonize ("netrecv", strlen("netrecv"), 1);
-	net_recv_task = current;
+	netprintk0("net thread running...\n");
 
-	status = net_init_tcp_recv_sock();
-       	if (status >= 0 && recv_sock) {
-		add_wait_queue_exclusive(recv_sock->sk->sk_sleep, &main_wait);
-		while (1) {
-			net_try_accept();
-			net_receive();
+       	while(!kthread_should_stop()) {
+		net_try_accept(sock);
+		net_receive();
 
-			wait_event_interruptible(*recv_sock->sk->sk_sleep,
-						 net_rx_should_wake());
-
-			if (signal_pending(current)) {
-				netprintk0("net recv thread got signal!\n");
-				break;
-			}
-		}
-		remove_wait_queue(recv_sock->sk->sk_sleep, &main_wait);
-	} else {
-		netprintk0("failed to initialize net_thread!\n");
+		wait_event_interruptible(*sock->sk->sk_sleep,
+					 net_rx_should_wake(sock) ||
+					 kthread_should_stop());
 	}
 
-	/* Flush all scheduled tasks */
-	flush_scheduled_work();
-	net_release_tcp_sock();
-	net_recv_task = NULL;
-	complete (&net_recv_complete);
+	netprintk("net thread exiting\n");
+	sock_release(sock);
 	return 0;
 }
 
@@ -816,8 +811,8 @@
 	if (list_empty(&net->active_item))
 		list_add_tail(&net->active_item, &net_active_list);
 
-	if (recv_sock != NULL)
-		wake_up(recv_sock->sk->sk_sleep);
+	if (net_recv_task)
+		wake_up_process(net_recv_task);
 }
 
 /* teardown can race with these guys and stop them in their read lock.. 
@@ -1451,30 +1446,31 @@
 	return ret;
 }
 
-static void net_try_accept(void)
+static void net_try_accept(struct socket *sock)
 {
 	int error, slen;
 	struct sockaddr_in sin;
-	struct socket *sock = NULL;
+	struct socket *new_sock = NULL;
 	struct inode *inode = NULL;
 	nm_node_inode_private *priv;
 
-	BUG_ON(recv_sock == NULL);
-	error = sock_create_lite(recv_sock->sk->sk_family,
-				 recv_sock->sk->sk_type,
-				 recv_sock->sk->sk_protocol,
-				 &sock);
+	BUG_ON(sock == NULL);
+	error = sock_create_lite(sock->sk->sk_family,
+				 sock->sk->sk_type,
+				 sock->sk->sk_protocol,
+				 &new_sock);
 	if (error)
 		goto out;
 
-	sock->type = recv_sock->type;
-	sock->ops = recv_sock->ops;
-	error = recv_sock->ops->accept(recv_sock, sock, O_NONBLOCK);
+	new_sock->type = sock->type;
+	new_sock->ops = sock->ops;
+	error = sock->ops->accept(sock, new_sock, O_NONBLOCK);
 	if (error < 0)
 		goto out;
 
 	slen = sizeof(sin);
-	error = sock->ops->getname(sock, (struct sockaddr *) &sin, &slen, 1);
+	error = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin,
+				       &slen, 1);
 	if (error < 0)
 		goto out;
 	
@@ -1484,7 +1480,7 @@
 	inode = nm_get_node_by_ip(sin.sin_addr.s_addr);
 	if (inode == NULL) {
 		netprintk0("connect from unknown host...\n");
-		net_send_error(sock, NET_UNKNOWN_HOST);
+		net_send_error(new_sock, NET_UNKNOWN_HOST);
 		goto out;
 	}
 
@@ -1498,15 +1494,15 @@
 			  "%u.%u.%u.%u:%d\n", NIPQUAD(sin.sin_addr.s_addr),
 			  ntohs(sin.sin_port));
 
-	error = net_attach_sock(&priv->net, sock);
+	error = net_attach_sock(&priv->net, new_sock);
 	if (error == -EEXIST)
-		net_send_error(sock, NET_ALREADY_CONNECTED);
+		net_send_error(new_sock, NET_ALREADY_CONNECTED);
 
 out:
 	if (error) {
-		if (sock) {
-			net_sock_drain(sock);
-			sock_release(sock);
+		if (new_sock) {
+			net_sock_drain(new_sock);
+			sock_release(new_sock);
 		}
 		if (inode)
 			iput(inode);
@@ -1514,14 +1510,15 @@
 	return;
 }
 
-static int net_init_tcp_recv_sock(void)
+static struct socket *net_init_tcp_recv_sock(void)
 {
 	struct sockaddr_in sin;
+	struct socket *sock;
 	int error;
 
 	error = sock_create(net_ip_version_to_family(ip_version),
 			     SOCK_STREAM, IPPROTO_TCP,
-			     &recv_sock);
+			     &sock);
 	if (error < 0) {
 		netprintk("unable to create socket, error=%d\n", error);
 		goto bail;
@@ -1532,8 +1529,8 @@
 	sin.sin_addr.s_addr = htonl(INADDR_ANY);
 	sin.sin_port = ip_port;
 
-	error = recv_sock->ops->bind(recv_sock, (struct sockaddr *)&sin,
-				      sizeof(sin));
+	error = sock->ops->bind(sock, (struct sockaddr *)&sin,
+				sizeof(sin));
 	if (error < 0) {
 		netprintk ("unable to bind socket to port %d, error=%d\n", 
 			ntohs(ip_port), error);
@@ -1541,24 +1538,18 @@
 	}
 
 	/* !!! dunno about these... */
-	recv_sock->sk->sk_reuse = 1;
-	error = recv_sock->ops->listen(recv_sock, 64);
+	sock->sk->sk_reuse = 1;
+	error = sock->ops->listen(sock, 64);
 
 bail:
-	if (error && recv_sock) {
-		sock_release(recv_sock);
-		recv_sock = NULL;
+	if (error) {
+	       if (sock)
+			sock_release(sock);
+	       sock = ERR_PTR(error);
 	}
-	return error;
-}				/* net_init_tcp_recv_sock */
 
-
-static void net_release_tcp_sock(void)
-{
-	if (recv_sock) {
-		sock_release(recv_sock);
-		recv_sock = NULL;
-	}
+	BUG_ON(sock == NULL);
+	return sock;
 }
 
 MODULE_LICENSE("GPL");

Modified: trunk/fs/ocfs2/cluster/util.c
===================================================================
--- trunk/fs/ocfs2/cluster/util.c	2005-03-04 02:08:55 UTC (rev 1937)
+++ trunk/fs/ocfs2/cluster/util.c	2005-03-04 20:26:31 UTC (rev 1938)
@@ -67,48 +67,6 @@
 #endif
 }
 
-/*
- * util_daemonize() 
- *
- */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-/* yes, len is unused but kept here for backwards compatibility. */
-void util_daemonize (char *name, int len, int shutdown_sigs)
-{
-	sigset_t tmpsig;
-
-	daemonize (name);
-
-	if (shutdown_sigs) {
-		/* Unblock SIGKILL, SIGSTOP, SIGHUP and SIGINT */
-		sigemptyset(&tmpsig);
-		sigaddsetmask(&tmpsig, SHUTDOWN_SIGS);
-		sigprocmask(SIG_UNBLOCK, &tmpsig, NULL);
-	}
-
-	return;
-}				/* util_daemonize */
-#else
-void util_daemonize (char *name, int len, int shutdown_sigs)
-{
-	daemonize ();
-	reparent_to_init ();
-
-	if (len > 0) {
-		if (len > 15)
-			BUG();
-		strncpy (current->comm, name, len);
-		current->comm[len] = '\0';
-	}
-
-	if (shutdown_sigs)
-		util_block_sigs(NULL, SHUTDOWN_SIGS);
-	else
-		util_block_sigs(NULL, 0);
-	return;
-}				/* util_daemonize */
-#endif
-
 /* prefetch has been declared to allow to build in debug mode */
 #ifdef DEBUG
 #ifndef ARCH_HAS_PREFETCH

Modified: trunk/fs/ocfs2/cluster/util.h
===================================================================
--- trunk/fs/ocfs2/cluster/util.h	2005-03-04 02:08:55 UTC (rev 1937)
+++ trunk/fs/ocfs2/cluster/util.h	2005-03-04 20:26:31 UTC (rev 1938)
@@ -40,7 +40,6 @@
 } util_timeout;
 
 void util_clear_timeout(util_timeout *to);
-void util_daemonize(char *name, int len, int shutdown_sigs);
 void util_init_timeout(util_timeout *to);
 void util_set_timeout(util_timeout *to, __u32 timeout);
 void util_show_stack(unsigned long *esp);
@@ -75,25 +74,4 @@
 void * util_rarray_idx_to_slot(util_rarray *arr, int idx);
 int util_resize_rarray(util_rarray *arr, int newelem);
 
-#ifdef __KERNEL__
-typedef struct _util_thread_info
-{
-	wait_queue_head_t thread_wq;
-	atomic_t woken;
-	struct task_struct *task;
-	struct completion complete;
-	int pid;
-} util_thread_info;
-
-
-static inline void util_thread_info_init(util_thread_info *info)
-{
-	init_waitqueue_head(&info->thread_wq);
-	atomic_set(&info->woken, 0);
-	info->task = NULL;
-	info->pid = -1;
-	init_completion(&info->complete);
-}
-#endif /* __KERNEL__ */
-
 #endif /* CLUSTER_UTIL_H */

Modified: trunk/fs/ocfs2/dlm/dlmmod.c
===================================================================
--- trunk/fs/ocfs2/dlm/dlmmod.c	2005-03-04 02:08:55 UTC (rev 1937)
+++ trunk/fs/ocfs2/dlm/dlmmod.c	2005-03-04 20:26:31 UTC (rev 1938)
@@ -579,8 +579,7 @@
 
 	/* if the network code had any unregister calls, they would be here. */
 
-	if (dlm->thread.task)
-		dlm_complete_thread(dlm);
+	dlm_complete_thread(dlm);
 
 	/* We've left the domain. Now we can take ourselves out of the
 	 * list and allow the kref stuff to help us free the
@@ -650,8 +649,8 @@
 	INIT_LIST_HEAD(&dlm->dirty_list);
 	INIT_LIST_HEAD(&dlm->reco.resources);
 	INIT_LIST_HEAD(&dlm->reco.received);
-	util_thread_info_init(&dlm->thread);
-	util_thread_info_init(&dlm->reco.thread);
+	dlm->dlm_thread_task = NULL;
+	init_waitqueue_head(&dlm->dlm_thread_wq);
 	init_rwsem(&dlm->recovery_sem);
 
 	/* this eats the reference we got above. */

Modified: trunk/fs/ocfs2/dlm/dlmmod.h
===================================================================
--- trunk/fs/ocfs2/dlm/dlmmod.h	2005-03-04 02:08:55 UTC (rev 1937)
+++ trunk/fs/ocfs2/dlm/dlmmod.h	2005-03-04 20:26:31 UTC (rev 1938)
@@ -181,7 +181,6 @@
 	u8  dead_node;
 	u8  sending_node;
 	u32 next_seq;
-	util_thread_info thread;
 	unsigned long node_map[BITS_TO_LONGS(NM_MAX_NODES)];
 } dlm_recovery_ctxt;
 
@@ -199,7 +198,6 @@
 	spinlock_t spinlock;
 	struct rw_semaphore recovery_sem;
 	char *name;
-	util_thread_info thread;
 	struct inode *group;
 	u32 key;
 	u8  group_index;
@@ -212,6 +210,8 @@
 	unsigned int num_joins;
 	struct hb_callback_func dlm_hb_up;
 	struct hb_callback_func dlm_hb_down;
+	struct task_struct *dlm_thread_task;
+	wait_queue_head_t dlm_thread_wq;
 };
 
 #define DLM_LOCK_RES_UNINITED             0x00000001

Modified: trunk/fs/ocfs2/dlm/dlmthread.c
===================================================================
--- trunk/fs/ocfs2/dlm/dlmthread.c	2005-03-04 02:08:55 UTC (rev 1937)
+++ trunk/fs/ocfs2/dlm/dlmthread.c	2005-03-04 20:26:31 UTC (rev 1938)
@@ -39,6 +39,7 @@
 #include <linux/socket.h>
 #include <linux/inet.h>
 #include <linux/timer.h>
+#include <linux/kthread.h>
 
 #include "util.h"
 
@@ -56,6 +57,7 @@
 extern u8 dlm_global_index;
 
 static int dlm_thread(void *data);
+struct task_struct *dlm_thread_task;
 
 #define dlm_lock_is_remote(dlm, lock)     ((lock)->ml.node != (dlm)->group_index)
 
@@ -245,50 +247,54 @@
 		spin_unlock(&dlm->spinlock);
 	}
 
-	/* wake the dlm thread */
-	atomic_set(&dlm->thread.woken, 1);
-	wake_up(&dlm->thread.thread_wq);
+	wake_up(&dlm->dlm_thread_wq);
 }
 
 /* Launch the NM thread for the mounted volume */
 int dlm_launch_thread(dlm_ctxt *dlm)
 {
 	dlmprintk0("starting dlm thread...\n");
-	dlm->thread.pid = kernel_thread (dlm_thread, dlm, 
-					 CLONE_FS | CLONE_FILES | 
-					 CLONE_SIGHAND);
-	if (dlm->thread.pid < 0) {
-		dlmprintk("unable to launch dlm thread, error=%d", 
-			  dlm->thread.pid);
+
+	dlm->dlm_thread_task = kthread_run(dlm_thread, dlm, "dlm_thread");
+	if (IS_ERR(dlm->dlm_thread_task)) {
+		dlm->dlm_thread_task = NULL;
+		dlmprintk("unable to launch dlm thread, error=%ld", 
+			  PTR_ERR(dlm->dlm_thread_task));
 		return -EINVAL;
 	}
-	dlmprintk("dlm thread running for %s...\n", dlm->name);
+
 	return 0;
 }
 
 void dlm_complete_thread(dlm_ctxt *dlm)
 {
-	dlmprintk0 ("waiting for dlm thread to exit....");
-	send_sig (SIGINT, dlm->thread.task, 0);
-	wait_for_completion (&dlm->thread.complete);
-	dlmprintk0 ("dlm thread exited\n");
-	dlm->thread.task = NULL;
+	if (dlm->dlm_thread_task) {
+		dlmprintk0("waiting for dlm thread to exit\n");
+		kthread_stop(dlm->dlm_thread_task);
+		dlm->dlm_thread_task = NULL;
+	}
 }
 
+static int dlm_dirty_list_empty(dlm_ctxt *dlm)
+{
+	int empty;
 
+	spin_lock(&dlm->spinlock);
+	empty = list_empty(&dlm->dirty_list);
+	spin_unlock(&dlm->spinlock);
+	
+	return empty;
+}
 
-
 static int dlm_thread(void *data)
 {
 	struct list_head *iter, *tmpiter;
 	dlm_lock_resource *res;
 	dlm_ctxt *dlm = data;
 
-	util_daemonize ("dlm_thread", strlen("dlm_thread"), 1);
-	dlm->thread.task = current;
+	dlmprintk("dlm thread running for %s...\n", dlm->name);
 
-	while (1) {
-		atomic_set(&dlm->thread.woken, 0);
+	while (!kthread_should_stop()) {
 
 		down_read(&dlm->recovery_sem);
 		spin_lock(&dlm->spinlock);
@@ -307,17 +313,11 @@
 		spin_unlock(&dlm->spinlock);
 		up_read(&dlm->recovery_sem);
 
-		wait_event_interruptible(dlm->thread.thread_wq,
-					 atomic_read(&dlm->thread.woken));
-
-		if (signal_pending(current)) {
-			dlmprintk("DLM thread got signal while waiting\n");
-			break;
-		}
+		wait_event_interruptible(dlm->dlm_thread_wq,
+					 !dlm_dirty_list_empty(dlm) ||
+					 kthread_should_stop());
 	}
 
-	flush_scheduled_work();
-	complete (&dlm->thread.complete);
-	dlmprintk0("quitting DLM thread!!!!!!\n");
+	dlmprintk0("quitting DLM thread\n");
 	return 0;
 }

Modified: trunk/fs/ocfs2/dlm/util.c
===================================================================
--- trunk/fs/ocfs2/dlm/util.c	2005-03-04 02:08:55 UTC (rev 1937)
+++ trunk/fs/ocfs2/dlm/util.c	2005-03-04 20:26:31 UTC (rev 1938)
@@ -65,48 +65,6 @@
 #endif
 }
 
-/*
- * util_daemonize() 
- *
- */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-/* yes, len is unused but kept here for backwards compatibility. */
-void util_daemonize (char *name, int len, int shutdown_sigs)
-{
-	sigset_t tmpsig;
-
-	daemonize (name);
-
-	if (shutdown_sigs) {
-		/* Unblock SIGKILL, SIGSTOP, SIGHUP and SIGINT */
-		sigemptyset(&tmpsig);
-		sigaddsetmask(&tmpsig, SHUTDOWN_SIGS);
-		sigprocmask(SIG_UNBLOCK, &tmpsig, NULL);
-	}
-
-	return;
-}				/* util_daemonize */
-#else
-void util_daemonize (char *name, int len, int shutdown_sigs)
-{
-	daemonize ();
-	reparent_to_init ();
-
-	if (len > 0) {
-		if (len > 15)
-			BUG();
-		strncpy (current->comm, name, len);
-		current->comm[len] = '\0';
-	}
-
-	if (shutdown_sigs)
-		util_block_sigs(NULL, SHUTDOWN_SIGS);
-	else
-		util_block_sigs(NULL, 0);
-	return;
-}				/* util_daemonize */
-#endif
-
 /* prefetch has been declared to allow to build in debug mode */
 #ifdef DEBUG
 #ifndef ARCH_HAS_PREFETCH

Modified: trunk/fs/ocfs2/dlm/util.h
===================================================================
--- trunk/fs/ocfs2/dlm/util.h	2005-03-04 02:08:55 UTC (rev 1937)
+++ trunk/fs/ocfs2/dlm/util.h	2005-03-04 20:26:31 UTC (rev 1938)
@@ -43,7 +43,6 @@
 } util_timeout;
 
 void util_clear_timeout(util_timeout *to);
-void util_daemonize(char *name, int len, int shutdown_sigs);
 void util_init_timeout(util_timeout *to);
 void util_set_timeout(util_timeout *to, __u32 timeout);
 void util_show_stack(unsigned long *esp);
@@ -78,25 +77,4 @@
 void * util_rarray_idx_to_slot(util_rarray *arr, int idx);
 int util_resize_rarray(util_rarray *arr, int newelem);
 
-#ifdef __KERNEL__
-typedef struct _util_thread_info
-{
-	wait_queue_head_t thread_wq;
-	atomic_t woken;
-	struct task_struct *task;
-	struct completion complete;
-	int pid;
-} util_thread_info;
-
-
-static inline void util_thread_info_init(util_thread_info *info)
-{
-	init_waitqueue_head(&info->thread_wq);
-	atomic_set(&info->woken, 0);
-	info->task = NULL;
-	info->pid = -1;
-	init_completion(&info->complete);
-}
-#endif /* __KERNEL__ */
-
 #endif /* CLUSTER_UTIL_H */

Modified: trunk/fs/ocfs2/dlmglue.c
===================================================================
--- trunk/fs/ocfs2/dlmglue.c	2005-03-04 02:08:55 UTC (rev 1937)
+++ trunk/fs/ocfs2/dlmglue.c	2005-03-04 20:26:31 UTC (rev 1938)
@@ -31,6 +31,7 @@
 #include <linux/mm.h>
 #include <linux/smp_lock.h>
 #include <linux/crc32.h>
+#include <linux/kthread.h>
 
 #include <cluster/util.h>
 #include <cluster/clcommon.h>
@@ -1598,22 +1599,21 @@
 
 int ocfs2_dlm_init(ocfs_super *osb)
 {
-	int status, pid;
+	int status;
 	u32 dlm_key;
 	dlm_ctxt *dlm = NULL;
 
 	LOG_ENTRY();
 
 	/* launch vote thread */
-	init_completion (&osb->vote_event_init);
-	pid = kernel_thread(ocfs2_vote_thread, osb,
-			    CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
-	if (pid < 0) {
-		status = pid;
+	osb->vote_task = kthread_run(ocfs2_vote_thread, osb, "ocfs2vote-%d",
+				     osb->osb_id);
+	if (IS_ERR(osb->vote_task)) {
+		status = PTR_ERR(osb->vote_task);
+		osb->vote_task = NULL;
 		LOG_ERROR_STATUS(status);
 		goto bail;
 	}
-	wait_for_completion(&osb->vote_event_init);
 
 	/* used by the dlm code to make message headers unique, each
 	 * node in this domain must agree on this. */
@@ -1641,24 +1641,14 @@
 
 void ocfs2_dlm_shutdown(ocfs_super *osb)
 {
-	int wait_on_vote_task = 0;
-
 	LOG_ENTRY();
 	ocfs2_drop_super_lock(osb);
 
-	/* needs to be able to deal with the dlm being in many
-	 * different states. */
-	spin_lock(&osb->vote_task_lock);
 	if (osb->vote_task) {
-		osb->vote_exit = 1;
-		ocfs2_kick_vote_thread(osb);
-		wait_on_vote_task = 1;
+		kthread_stop(osb->vote_task);
+		osb->vote_task = NULL;
 	}
-	spin_unlock(&osb->vote_task_lock);
 
-	if (wait_on_vote_task)
-		wait_for_completion(&osb->vote_event_complete);
-
 	ocfs2_lock_res_free(&osb->super_lockres);
 	dlm_unregister_domain(osb->dlm);
 	LOG_EXIT();

Modified: trunk/fs/ocfs2/journal.c
===================================================================
--- trunk/fs/ocfs2/journal.c	2005-03-04 02:08:55 UTC (rev 1937)
+++ trunk/fs/ocfs2/journal.c	2005-03-04 20:26:31 UTC (rev 1938)
@@ -29,6 +29,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
+#include <linux/kthread.h>
 
 #include "ocfs_log.h"
 #include "ocfs.h"
@@ -60,7 +61,6 @@
 static int __ocfs_recovery_thread(void *arg);
 static int ocfs_commit_cache (ocfs_super * osb);
 static int ocfs_wait_on_mount(ocfs_super *osb);
-static int ocfs2_commit_thread_sleep(ocfs_super *osb);
 static void ocfs_handle_cleanup_locks(ocfs_journal *journal, 
 				      ocfs_journal_handle *handle,
 				      int set_id);
@@ -778,14 +778,11 @@
 	/* The OCFS_JOURNAL_IN_SHUTDOWN will signal to commit_cache to not
 	 * drop the trans_lock (which we want to hold until we
 	 * completely destroy the journal. */
-	if (osb->commit && osb->commit->c_task) {
+	if (osb->commit_task) {
 		/* Wait for the commit thread */
 		LOG_TRACE_STR ("Waiting for ocfs2commit to exit....");
-		atomic_set (&osb->needs_checkpoint, 1);
-		wake_up (&osb->checkpoint_event);
-		wait_for_completion(&osb->commit->c_complete);
-		osb->commit->c_task = NULL;
-		kfree(osb->commit);
+		kthread_stop(osb->commit_task);
+		osb->commit_task = NULL;
 	}
 
 	OCFS_ASSERT(atomic_read(&(osb->journal->num_trans)) == 0);
@@ -818,7 +815,6 @@
 {
 	int status = 0;
 	int olderr = 0;
-	int child_pid;
 	ocfs_super *osb;
 
 	LOG_ENTRY();
@@ -848,22 +844,15 @@
 	}
 
 	/* Launch the commit thread */
-	osb->commit = kmalloc(sizeof(ocfs_commit_task), GFP_KERNEL);
-	if (osb->commit == NULL) {
-		LOG_ERROR_STATUS(status = -ENOMEM);
+	osb->commit_task = kthread_run(ocfs_commit_thread, osb, "ocfs2cmt-%d",
+				       osb->osb_id);
+	if (IS_ERR(osb->commit_task)) {
+		status = PTR_ERR(osb->commit_task);
+		osb->commit_task = NULL;
+		LOG_ERROR_ARGS ("unable to launch ocfs2commit thread, "
+				"error=%d", status);
 		goto done;
-	}
-	memset(osb->commit, 0, sizeof(ocfs_commit_task));
-	child_pid = kernel_thread (ocfs_commit_thread, osb,
-				   CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
-	if (child_pid < 0) {
-		LOG_ERROR_ARGS ("unable to launch ocfs2commit thread, error=%d",
-				child_pid);
-		status = child_pid;
-		goto done;
-	} else {
-		init_completion (&osb->commit->c_complete);
-	}
+	} 
 
 done:
 	LOG_EXIT_STATUS(status);
@@ -969,13 +958,9 @@
 	ocfs_super *osb = arg;
 	int status = 0;
 	int node_num;
-	char proc[16];
 
 	LOG_ENTRY();
 
-	sprintf (proc, "ocfs2rec-%d", osb->osb_id);
-	ocfs_daemonize (proc, strlen(proc), 0);
-
 	status = ocfs_wait_on_mount(osb);
 	if (status < 0) {
 		if (status == -EBUSY)
@@ -1020,12 +1005,17 @@
 		goto restart;
 	}
 
-	osb->recovery_launched = 0;
+	osb->recovery_thread_task = NULL;
+	mb(); /* sync with ocfs2_recovery_thread_running */
 	wake_up(&osb->recovery_event);
 
 	up(&osb->recovery_lock);
 
 	LOG_EXIT_STATUS(status);
+	/* no one is callint kthread_stop() for us so the kthread() api
+	 * requires that we call do_exit().  And it isn't exported, but
+	 * complete_and_exit() seems to be a minimal wrapper around it. */
+	complete_and_exit(NULL, status);
 	return status;
 }
 
@@ -1035,19 +1025,25 @@
 		       node_num, osb->node_num);
 
 	down(&osb->recovery_lock);
-	if (!osb->disable_recovery) {
-		/* People waiting on recovery will wait on
-		 * the recovery map to empty. */
-		ocfs_recovery_map_set(osb, node_num);
+	if (osb->disable_recovery)
+		goto out;
+	/* People waiting on recovery will wait on
+	 * the recovery map to empty. */
+	ocfs_recovery_map_set(osb, node_num);
 
-		LOG_TRACE_STR("starting recovery thread...");
+	LOG_TRACE_STR("starting recovery thread...");
 
-		if (!osb->recovery_launched) {
-			kernel_thread(__ocfs_recovery_thread, osb,
-				      CLONE_VM | CLONE_FS | CLONE_FILES);
-			osb->recovery_launched = 1;
-		}
+	if (osb->recovery_thread_task)
+		goto out;
+
+	osb->recovery_thread_task =  kthread_run(__ocfs_recovery_thread, osb,
+						 "ocfs2rec-%d", osb->osb_id);
+	if (IS_ERR(osb->recovery_thread_task)) {
+		LOG_ERROR_STATUS((int)PTR_ERR(osb->recovery_thread_task));
+		osb->recovery_thread_task = NULL;
 	}
+
+out:
 	up(&osb->recovery_lock);
 	wake_up(&osb->recovery_event);
 
@@ -1455,104 +1451,42 @@
 	goto retry;
 }
 
-static int ocfs2_commit_thread_sleep(ocfs_super *osb)
-{
-	int status;
-	signed long timeout = OCFS_CHECKPOINT_INTERVAL;
-	DECLARE_WAITQUEUE(wait, current);
-
-	if (atomic_read(&osb->needs_checkpoint))
-		return 0;
-
-	status = 0;
-	add_wait_queue(&osb->checkpoint_event, &wait);
-	while (1) {
-		set_current_state(TASK_INTERRUPTIBLE);
-
-		if (atomic_read(&osb->needs_checkpoint))
-			break;
-
-		if (!signal_pending(current)) {
-			timeout = schedule_timeout(timeout);
-			if (!timeout) {
-				status = -ETIMEDOUT;
-				break;
-			}
-			continue;
-		}
-		status = -EINTR;
-		break;
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&osb->checkpoint_event, &wait);
-
-	return status;
-}
-
 int ocfs_commit_thread(void *arg)
 {
-	int status = 0, misses = 0;
+	int status;
 	ocfs_super *osb = arg;
-	ocfs_commit_task *commit = osb->commit;
-	char name[16];
 	ocfs_journal *journal = osb->journal;
-	siginfo_t info;
 
-	sprintf (name, "ocfs2cmt-%d", osb->osb_id);
-	ocfs_daemonize (name, strlen(name), 0);
+	/* we can trust num_trans here because _should_stop() is only set in
+	 * shutdown and nobody other than ourselves should be able to start
+	 * transactions.  committing on shutdown might take a few iterations
+	 * as final transactions put deleted inodes on the list */
+	while (!(kthread_should_stop() && 
+		 atomic_read(&journal->num_trans) == 0)) {
 
-	commit->c_task = current;
+		status = wait_event_timeout(osb->checkpoint_event,
+					    atomic_read(&journal->num_trans) ||
+						kthread_should_stop(),
+					    OCFS_CHECKPOINT_INTERVAL);
+		if (status == 0)
+			LOG_TRACE_STR("timed out");
+		else if (status == -ERESTARTSYS)
+			LOG_TRACE_STR("signal pending\n");
+		else
+			LOG_TRACE_STR("woken");
 
-	misses = 0;
-	while (1) {
-		status = ocfs2_commit_thread_sleep(osb);
-		atomic_set (&osb->needs_checkpoint, 0);
-
-		switch (status) {
-			case -ETIMEDOUT:
-				LOG_TRACE_STR("timed out");
-				break;
-			case -EINTR:
-				LOG_ERROR_STR("Commit thread got a signal!");
-				/* ignore the actual signal */
-				if (signal_pending(current)) {
-					dequeue_signal_lock(current, 
-							    &current->blocked, 
-							    &info);
-				}
-				break;
-			case 0:
-				LOG_TRACE_STR("woken\n");
-				break;
-			default:
-				LOG_ERROR_STR("invalid status!\n");
-				break;
-		}
-
-skip_sleep:
 		status = ocfs_commit_cache(osb);
 		if (status < 0)
 			LOG_ERROR_STATUS(status);
 
-		/* journal shutdown has asked me to sync up and then
-		 * exit. We might still have transactions pending as
-		 * our last commit might've put deleted inodes on the
-		 * list so we loop back around. */
-		if (journal->state == OCFS_JOURNAL_IN_SHUTDOWN) {
-			/* we can trust num_trans here because we're
-			 * in shutdown and nobody other than ourselves
-			 * should be able to start more. */
-			if (atomic_read(&journal->num_trans) == 0)
-				break;
 #ifdef VERBOSE_COMMIT_THREAD
+		if (kthread_should_stop() && atomic_read(&journal->num_trans)){
 			printk("(%u) commit_thread: %u transactions pending "
-			       "on shutdown\n", 
-			       current->pid, atomic_read(&journal->num_trans));
-#endif
-			goto skip_sleep;
+			       "on shutdown\n", current->pid,
+			       atomic_read(&journal->num_trans));
 		}
+#endif
 	}
 
-	complete (&(commit->c_complete));
 	return 0;
 }

Modified: trunk/fs/ocfs2/ocfs.h
===================================================================
--- trunk/fs/ocfs2/ocfs.h	2005-03-04 02:08:55 UTC (rev 1937)
+++ trunk/fs/ocfs2/ocfs.h	2005-03-04 20:26:31 UTC (rev 1938)
@@ -287,12 +287,6 @@
 }
 ocfs_vol_state;
 
-typedef struct _ocfs_commit_task
-{
-	struct completion c_complete;
-	struct task_struct *c_task;
-} ocfs_commit_task;
-
 typedef struct _ocfs_alloc_stats
 {
 	atomic_t moves;
@@ -317,7 +311,7 @@
 {
 	struct list_head osb_next;	/* list of ocfs_super(s) */
 	u32 osb_id;		/* id used by the proc interface */
-	ocfs_commit_task *commit;
+	struct task_struct *commit_task;
 	struct super_block *sb;
 	struct inode *root_inode;
 	struct inode *sys_root_inode;
@@ -360,7 +354,7 @@
 
 	atomic_t vol_state;
 	struct semaphore recovery_lock;
-	int recovery_launched;
+	struct task_struct *recovery_thread_task;
 	int disable_recovery;
 	wait_queue_head_t checkpoint_event;
 	atomic_t needs_checkpoint;
@@ -389,8 +383,8 @@
 	spinlock_t vote_task_lock;
 	struct task_struct *vote_task;
 	wait_queue_head_t vote_event;
-	atomic_t wake_vote_task;
-	int vote_exit;
+	unsigned long vote_wake_sequence;
+	unsigned long vote_work_sequence;
 
 	struct list_head blocked_lock_list;
 	unsigned long blocked_lock_count;
@@ -398,9 +392,6 @@
 	struct list_head vote_list;
 	int vote_count;
 
-	struct completion vote_event_complete;
-	struct completion vote_event_init;
-
 	u32 net_key;
 	spinlock_t net_response_lock;
 	unsigned int net_response_ids;

Modified: trunk/fs/ocfs2/super.c
===================================================================
--- trunk/fs/ocfs2/super.c	2005-03-04 02:08:55 UTC (rev 1937)
+++ trunk/fs/ocfs2/super.c	2005-03-04 20:26:31 UTC (rev 1938)
@@ -1010,6 +1010,15 @@
 }				/* ocfs_mount_volume */
 
 
+/* we can't grab the goofy sem lock from inside wait_event, so we use
+ * memory barriers to make sure that we'll see the null task before
+ * being woken up */
+static int ocfs2_recovery_thread_running(ocfs_super *osb)
+{
+	mb();
+	return osb->recovery_thread_task != NULL;
+}
+
 /*
  * ocfs_dismount_volume()
  *
@@ -1031,13 +1040,8 @@
 	 * running ones to exit. Do this before setting the vol_state. */
 	down(&osb->recovery_lock);
 	osb->disable_recovery = 1;
-	while (osb->recovery_launched) {
-		up(&osb->recovery_lock);
-		LOG_TRACE_STR("Waiting on a recovery thread to complete.");
-		schedule();
-		down(&osb->recovery_lock);
-	}
 	up(&osb->recovery_lock);
+	wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
 
 	ocfs_journal_shutdown(osb);
 
@@ -1135,9 +1139,10 @@
 	}
 
 	init_waitqueue_head(&osb->recovery_event);
-	atomic_set(&osb->wake_vote_task, 0);
 	spin_lock_init(&osb->vote_task_lock);
 	init_waitqueue_head(&osb->vote_event);
+	osb->vote_work_sequence = 0;
+	osb->vote_wake_sequence = 0;
 	INIT_LIST_HEAD(&osb->blocked_lock_list);
 	osb->blocked_lock_count = 0;
 	INIT_LIST_HEAD(&osb->vote_list);
@@ -1171,7 +1176,7 @@
 	init_MUTEX (&(osb->recovery_lock));
 
 	osb->disable_recovery = 0;
-	osb->recovery_launched = 0;
+	osb->recovery_thread_task = NULL;
 
 	init_waitqueue_head (&osb->checkpoint_event);
 	atomic_set (&osb->needs_checkpoint, 0);

Modified: trunk/fs/ocfs2/util.c
===================================================================
--- trunk/fs/ocfs2/util.c	2005-03-04 02:08:55 UTC (rev 1937)
+++ trunk/fs/ocfs2/util.c	2005-03-04 20:26:31 UTC (rev 1938)
@@ -69,45 +69,6 @@
 #endif
 }
 
-/*
- * ocfs_daemonize() 
- *
- */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-/* yes, len is unused but kept here for backwards compatibility. */
-void ocfs_daemonize (char *name, int len, int shutdown_sigs)
-{
-	sigset_t tmpsig;
-
-	daemonize (name);
-
-	if (shutdown_sigs) {
-		/* Unblock SIGKILL, SIGSTOP, SIGHUP and SIGINT */
-		sigemptyset(&tmpsig);
-		sigaddsetmask(&tmpsig, SHUTDOWN_SIGS);
-		sigprocmask(SIG_UNBLOCK, &tmpsig, NULL);
-	}
-}				/* ocfs_daemonize */
-#else
-void ocfs_daemonize (char *name, int len, int shutdown_sigs)
-{
-	daemonize ();
-	reparent_to_init ();
-
-	if (len > 0) {
-		if (len > 15)
-			BUG();
-		strncpy (current->comm, name, len);
-		current->comm[len] = '\0';
-	}
-
-	if (shutdown_sigs)
-		ocfs_block_sigs(NULL, SHUTDOWN_SIGS);
-	else
-		ocfs_block_sigs(NULL, 0);
-}				/* ocfs_daemonize */
-#endif
-
 /* prefetch has been declared to allow to build in debug mode */
 #ifdef DEBUG
 #ifndef ARCH_HAS_PREFETCH

Modified: trunk/fs/ocfs2/util.h
===================================================================
--- trunk/fs/ocfs2/util.h	2005-03-04 02:08:55 UTC (rev 1937)
+++ trunk/fs/ocfs2/util.h	2005-03-04 20:26:31 UTC (rev 1938)
@@ -26,7 +26,6 @@
 #ifndef OCFS2_UTIL_H
 #define OCFS2_UTIL_H
 
-void ocfs_daemonize(char *name, int len, int shutdown_sigs);
 void ocfs_show_stack(unsigned long *esp);
 void ocfs_show_trace(unsigned long *stack);
 void ocfs_block_sigs(sigset_t *oldsigs, unsigned long mask);

Modified: trunk/fs/ocfs2/vote.c
===================================================================
--- trunk/fs/ocfs2/vote.c	2005-03-04 02:08:55 UTC (rev 1937)
+++ trunk/fs/ocfs2/vote.c	2005-03-04 20:26:31 UTC (rev 1938)
@@ -29,6 +29,7 @@
 #include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/smp_lock.h>
+#include <linux/kthread.h>
 
 #include <cluster/util.h>
 #include <cluster/clcommon.h>
@@ -327,6 +328,10 @@
 	LOG_ENTRY();
 
 	spin_lock(&osb->vote_task_lock);
+	/* grab this early so we know to try again if a state change and
+	 * wake happens part-way through our work  */
+	osb->vote_work_sequence = osb->vote_wake_sequence;
+
 	processed = osb->blocked_lock_count;
 	while (processed) {
 		OCFS_ASSERT(!list_empty(&osb->blocked_lock_list));
@@ -363,55 +368,52 @@
 	LOG_EXIT();
 }
 
-static inline int ocfs2_vote_thread_has_work(ocfs_super *osb)
+
+static int ocfs2_vote_thread_lists_empty(ocfs_super *osb)
 {
-	if (list_empty(&osb->blocked_lock_list) &&
-	    list_empty(&osb->vote_list))
-		return 0;
+	int empty = 0;
 
-	return 1;
+	spin_lock(&osb->vote_task_lock);
+       if (list_empty(&osb->blocked_lock_list) &&
+	   list_empty(&osb->vote_list))
+		empty = 1;
+
+	spin_unlock(&osb->vote_task_lock);
+	return empty;
 }
 
+static int ocfs2_vote_thread_should_wake(ocfs_super *osb)
+{
+	int should_wake = 0;
+
+	spin_lock(&osb->vote_task_lock);
+	if (osb->vote_work_sequence != osb->vote_wake_sequence)
+		should_wake = 1;
+	spin_unlock(&osb->vote_task_lock);
+
+	return should_wake;
+}
+
 int ocfs2_vote_thread(void *arg)
 {
 	int status = 0;
 	ocfs_super *osb = arg;
-	char proc[16];
 
-	sprintf (proc, "ocfs2vote-%d", osb->osb_id);
-	ocfs_daemonize (proc, strlen(proc), 0);
+	/* only quit once we've been asked to stop and there is no more
+	 * work available */
+	while (!(kthread_should_stop() &&
+		 ocfs2_vote_thread_lists_empty(osb))) {
 
-	spin_lock(&osb->vote_task_lock);
-	osb->vote_task = current;
-	init_completion (&osb->vote_event_complete);
-
-	complete(&osb->vote_event_init);
-
-	while (1) {
-		if (osb->vote_exit) {
-			if (!ocfs2_vote_thread_has_work(osb))
-				break;
-			/* don't want to sleep if we're supposed to quit. */
-			atomic_set(&osb->wake_vote_task, 1);
-		}
-		spin_unlock(&osb->vote_task_lock);
-
 		wait_event_interruptible(osb->vote_event,
-					 atomic_read(&osb->wake_vote_task));
+					 ocfs2_vote_thread_should_wake(osb) ||
+					 kthread_should_stop());
 
-		atomic_set(&osb->wake_vote_task, 0);
-
 		LOG_TRACE_STR("vote_thread: awoken");
 
 		ocfs2_vote_thread_do_work(osb);
-		spin_lock(&osb->vote_task_lock);
 	}
 
 	osb->vote_task = NULL;
-	spin_unlock(&osb->vote_task_lock);
-
-	complete(&osb->vote_event_complete);
-
 	return status;
 }
 

Modified: trunk/fs/ocfs2/vote.h
===================================================================
--- trunk/fs/ocfs2/vote.h	2005-03-04 02:08:55 UTC (rev 1937)
+++ trunk/fs/ocfs2/vote.h	2005-03-04 20:26:31 UTC (rev 1938)
@@ -30,7 +30,11 @@
 int ocfs2_vote_thread(void *arg);
 static inline void ocfs2_kick_vote_thread(ocfs_super *osb)
 {
-	atomic_set(&osb->wake_vote_task, 1);
+	spin_lock(&osb->vote_task_lock);
+	/* make sure the voting thread gets a swipe at whatever changes
+	 * the caller may have made to the voting state */
+	osb->vote_wake_sequence++;
+	spin_unlock(&osb->vote_task_lock);
 	wake_up(&osb->vote_event);
 }
 



More information about the Ocfs2-commits mailing list