[Ocfs2-commits] mfasheh commits r1976 - in trunk/fs/ocfs2: . cluster

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Tue Mar 15 16:03:08 CST 2005


Author: mfasheh
Signed-off-by: zab
Date: 2005-03-15 16:03:06 -0600 (Tue, 15 Mar 2005)
New Revision: 1976

Modified:
   trunk/fs/ocfs2/cluster/tcp.c
   trunk/fs/ocfs2/cluster/tcp.h
   trunk/fs/ocfs2/heartbeat.c
Log:
* Rework the tcp stuff so that all messages send a status return. This
  allows us to detect errors on recieving nodes and handle them
  appropriately at the sending side.

* use wait_event instead of wait_event_interruptible in
  net_send_message_iov. Once the message has been sent we need to consider
  that something which can't be interrupted with a signal, otherwise things
  like dlm state on recieving nodes will get messed up.

* hook the tcp stuff into heartbeat so that it can throw an error at any
  processes waiting for a response from a node which just died.

* adds a couple debug prints which aided me in tracing paths through the
  code.

Signed-off-by: zab



Modified: trunk/fs/ocfs2/cluster/tcp.c
===================================================================
--- trunk/fs/ocfs2/cluster/tcp.c	2005-03-15 20:34:47 UTC (rev 1975)
+++ trunk/fs/ocfs2/cluster/tcp.c	2005-03-15 22:03:06 UTC (rev 1976)
@@ -169,6 +169,9 @@
 	spin_unlock(&net_status_lock);
 }
 
+static int net_register_hb_callbacks(void);
+static void net_unregister_hb_callbacks(void);
+
 /////////////////////
 static void net_shutdown(void);
 static int net_startup(void);
@@ -228,7 +231,10 @@
 	if (!de)
 		return -1;
 	de->proc_fops->ioctl = net_ioctl;
-	
+
+	if (net_register_hb_callbacks())
+		return -1;
+
 	netprintk0("Loaded net Driver module\n");
 	return 0;
 }				/* net_driver_entry */
@@ -347,6 +353,7 @@
 	}
 	up(&net_state_lock);
 	remove_proc_entry("cluster/net", NULL);
+	net_unregister_hb_callbacks();
 	netprintk0("Unloading net driver module\n");
 	return;
 }				/* net_driver_exit */
@@ -609,7 +616,8 @@
 			 size_t caller_iovlen, struct inode *inode,
 			 int *status)
 {
-	int ret, tmpret;
+	int ret;
+	int cleanup_wq = 0;
 	net_msg *msg = NULL;
 	net_status_ctxt nsc;
 	wait_queue_t sleep;
@@ -617,6 +625,8 @@
 	struct iovec *iov = NULL;
 	struct socket *sock = NULL;
 
+	BUG_ON(current == net_recv_task);
+
 	if (!inode || !inode->u.generic_ip) {
 		netprintk0("bad inode, cannot send message\n");
 		ret = -EINVAL;
@@ -641,6 +651,8 @@
 	if (ret)
 		goto out;
 
+	netprintk0("returned from net_sock_addref_or_connect, building msg\n");
+
 	/* build up our iovec */
 	iovlen = caller_iovlen + 1;
 	iov = kmalloc(sizeof(struct iovec) * iovlen, GFP_KERNEL);
@@ -660,6 +672,8 @@
 	msg->magic = NET_MSG_MAGIC;
 	msg->data_len = caller_bytes;
 	msg->msg_type = msg_type;
+	msg->sys_status = NET_ERR_NONE;
+	msg->status = 0;
 	msg->key = key;
 	msg->msg_num = net_next_msg_num();
 
@@ -667,56 +681,52 @@
 	iov[0].iov_base = msg;
 	memcpy(&iov[1], caller_iov, caller_iovlen * sizeof(struct iovec));
 
-	/* does the caller want to wait for a simple status? */
-	if (status) {
-		msg->status = 1;
+	/* Setup for status return wait */
+	init_waitqueue_head(&nsc.wq);
+	atomic_set(&nsc.woken, 0);
+	nsc.msg_num = msg->msg_num;
+	nsc.sys_status = NET_ERR_NONE;
+	nsc.status = 0;
+	/* XXX: Should be using group index here. */
+	nsc.target_node = nm_get_node_global_index(inode);
 
-		init_waitqueue_head(&nsc.wq);
-		atomic_set(&nsc.woken, 0);
-		nsc.msg_num = msg->msg_num;
-		nsc.status = 0;
+	init_waitqueue_entry(&sleep, current);
+	add_wait_queue(&nsc.wq, &sleep);
+	cleanup_wq = 1;
 
-		init_waitqueue_entry(&sleep, current);
-		add_wait_queue(&nsc.wq, &sleep);
+	spin_lock(&net_status_lock);
+	list_add_tail(&nsc.list, &net_status_list);
+	spin_unlock(&net_status_lock);
 
-		spin_lock(&net_status_lock);
-		list_add_tail(&nsc.list, &net_status_list);
-		spin_unlock(&net_status_lock);
-	}
-
-	/* finally, convert the message header to network byte-order and send */
+	/* finally, convert the message header to network byte-order
+	 * and send */
 	net_msg_to_net(msg);
 	ret = net_send_tcp_msg(sock, iov, iovlen,
 			       sizeof(net_msg) + caller_bytes);
 	net_msg_to_host(msg);  /* just swapping for printk, its unused now */
 	msgprintk(msg, "sending returned %d\n", ret);
-
-	if (status) {
-		if (ret >= 0) {
-			/* wait on other node's handler */
-			tmpret = wait_event_interruptible(nsc.wq,
-					(atomic_read(&nsc.woken) == 1));
-			if (tmpret==0) {
-				*status = nsc.status;
-				netprintk("status return requested, status is %d\n", *status);
-			} else {
-				ret = tmpret;
-				net_abort_status_return(&nsc);
-				netprintk0("net_abort_status_return called\n");
-				netprintk("status return requested, and error occurred while waiting=%d\n", ret);
-				*status = ret;
-			}
-		} else {
-			netprintk("status return requested, and error returned from net_send_tcp_msg=%d\n", ret);
-			/* return bad status right away */
-			*status = ret;
-		}
-		remove_wait_queue(&nsc.wq, &sleep);
-	} else if (ret < 0) {
-		netprintk("no status return requested, but error returned from net_send_tcp_msg=%d\n", ret);
+	if (ret < 0) {
+		net_abort_status_return(&nsc);
+		netprintk("error returned from net_send_tcp_msg=%d\n", ret);
+		goto out;
 	}
 
+	/* wait on other node's handler */
+	wait_event(nsc.wq, (atomic_read(&nsc.woken) == 1));
+
+	/* Note that we avoid overwriting the callers status return
+	 * variable if a system error was reported on the other
+	 * side. Callers beware. */
+	ret = net_sys_err_to_errno(nsc.sys_status);
+	if (status && !ret)
+		*status = nsc.status;
+
+	netprintk("woken, returning system status %d, user status %d",
+		  ret, nsc.status);
+
 out:
+	if (cleanup_wq)
+		remove_wait_queue(&nsc.wq, &sleep);
 	if (sock)
 		net_sock_decref(inode, ret);
 	if (iov)
@@ -741,6 +751,7 @@
  *       in nm, and to have registered the message handler
  *   - if status was requested, it will be returned to the caller
  *       already converted to host byteorder
+ *   - status will not be set on return code != 0
  */
 int net_send_message(u32 msg_type, u32 key, void *data, u32 len,
 		     struct inode *inode, int *status)
@@ -753,15 +764,19 @@
 }
 EXPORT_SYMBOL(net_send_message);
 
-static int net_send_status_magic(struct socket *sock, net_msg *hdr, int err)
+static int net_send_status_magic(struct socket *sock, net_msg *hdr,
+				 enum net_system_error syserr, int err)
 {
 	struct iovec iov = {
 		.iov_base = hdr,
 		.iov_len = sizeof(net_msg),
 	};
-	       
+
+	BUG_ON(syserr >= NET_ERR_MAX);
+
 	/* leave other fields intact from the incoming message, msg_num
 	 * in particular */
+	hdr->sys_status = syserr;
 	hdr->status = err;
 	hdr->magic = NET_MSG_STATUS_MAGIC;  // twiddle the magic
 	hdr->data_len = 0;
@@ -1008,6 +1023,7 @@
 	list_for_each(iter, &net_status_list) {
 		nsc = list_entry(iter, net_status_ctxt, list);
 		if (nsc->msg_num == hdr->msg_num) {
+			nsc->sys_status = hdr->sys_status;
 			nsc->status = hdr->status;
 			atomic_set(&nsc->woken, 1);
 			list_del_init(&nsc->list);
@@ -1021,11 +1037,72 @@
 	msgprintk(hdr, "sent to nsc %p\n", nsc);
 }
 
+static void net_kill_node_messages(u8 node)
+{
+	unsigned int num_kills = 0;
+	net_status_ctxt *nsc = NULL;
+	struct list_head *iter;
+
+	spin_lock(&net_status_lock);
+	list_for_each(iter, &net_status_list) {
+		nsc = list_entry(iter, net_status_ctxt, list);
+		if (nsc->target_node == node) {
+			nsc->sys_status = NET_ERR_DIED;
+			atomic_set(&nsc->woken, 1);
+			list_del_init(&nsc->list);
+			wake_up(&nsc->wq);
+
+			num_kills++;
+		}
+	}
+	spin_unlock(&net_status_lock);
+
+	netprintk("node %u died, killed %d messages\n", node, num_kills);
+}
+
+static void net_hb_node_down_cb(struct inode *group,
+				struct inode *node,
+				int node_num,
+				void *data)
+{
+	net_kill_node_messages(node_num);
+}
+
+static struct hb_callback_func	*net_hb_down = NULL;
+#define NET_HB_NODE_DOWN_PRI     (0x1)
+
+static int net_register_hb_callbacks(void)
+{
+	net_hb_down = kmalloc(sizeof(*net_hb_down), GFP_KERNEL);
+	if (!net_hb_down)
+		return -ENOMEM;
+	memset(net_hb_down, 0, sizeof(*net_hb_down));
+
+	hb_setup_callback(net_hb_down, HB_NODE_DOWN_CB,
+			  net_hb_node_down_cb, NULL, NET_HB_NODE_DOWN_PRI);
+	return hb_register_callback(net_hb_down);
+}
+
+static void net_unregister_hb_callbacks(void)
+{
+	int status;
+
+	if (net_hb_down) {
+		status = hb_unregister_callback(net_hb_down);
+		if (status < 0)
+			printk("ocfs2_tcp: Status return %d unregistering "
+			       "heartbeat callback!\n", status);
+		kfree(net_hb_down);
+		net_hb_down = NULL;
+	}
+}
+
 /* this returns -errno if the header was unknown or too large, etc.
  * after this is called the buffer us reused for the next message */
 static int net_process_message(struct socket *sock, net_msg *hdr)
 {
-	int ret;
+	int ret, tmpret;
+	enum net_system_error syserr;
 	net_msg_handler *hnd = NULL;
 
 	netprintk("received message header... magic=%u type=%u key=%u\n", 
@@ -1054,39 +1131,33 @@
 	}
 
 	/* find a handler for it */
+	ret = 0;
 	hnd = net_lookup_handler(hdr->msg_type, hdr->key);
 	if (!hnd) {
-		ret = -EINVAL;
-		msgprintk0(hdr, "no handler for message.\n");
-		goto out;
+		syserr = NET_ERR_NO_HNDLR;
+		goto out_respond;
 	}
 
-	ret = 0;
+	syserr = NET_ERR_NONE;
+
 	spin_lock(&hnd->lock);
 	if (hdr->data_len > hnd->max_len)
-		ret = -EOVERFLOW;
+		syserr = NET_ERR_OVERFLOW;
 	spin_unlock(&hnd->lock);
-	if (ret) {
-		msgprintk(hdr, "advertised data_len > handlers max_len (%u)\n",
-			  hnd->max_len);
-		goto out;
-	}
 
+	if (syserr != NET_ERR_NONE)
+		goto out_respond;
+
 	net_num_dispatched++;
 	ret = (hnd->func)(hdr, sizeof(net_msg) + hdr->data_len, hnd->data);
-	
-	/* if node has requested status return, do it now */
-	if (hdr->status) {
-		int tmpret;
-		/* this destroys the hdr, so don't use it after this */
-		tmpret = net_send_status_magic(sock, hdr, ret);
-		hdr = NULL;
-		netprintk("sending status %d returned %d\n", ret, tmpret);
-		ret = 0;
-	} else if (ret < 0) {
-		msgprintk(hdr, "dispatch returned %d\n", ret);
-	}
 
+out_respond:
+	/* this destroys the hdr, so don't use it after this */
+	tmpret = net_send_status_magic(sock, hdr, syserr, ret);
+	hdr = NULL;
+	netprintk("sending status %d, syserr %d returned %d\n", ret, syserr,
+		  tmpret);
+
 out:
 	if (hnd)
 		net_put_handler(hnd);
@@ -1390,6 +1461,8 @@
 		sock = net->sock;
 		net->sock_refs++;
 	} else {
+		netprintk("Initiating the connect!\n");
+
 		if (!net->sock_pending) {
 			/* ok, we'll be initiating the connect */
 			net->sock_pending = 1;

Modified: trunk/fs/ocfs2/cluster/tcp.h
===================================================================
--- trunk/fs/ocfs2/cluster/tcp.h	2005-03-15 20:34:47 UTC (rev 1975)
+++ trunk/fs/ocfs2/cluster/tcp.h	2005-03-15 22:03:06 UTC (rev 1976)
@@ -52,6 +52,31 @@
 #define NET_DISP_THREAD_MS   5000   /* TODO */
 #define NET_RECV_THREAD_MS   5000   /* TODO */
 
+enum net_system_error {
+	NET_ERR_NONE = 0,
+	NET_ERR_NO_HNDLR,
+	NET_ERR_OVERFLOW,
+	NET_ERR_DIED,
+	NET_ERR_MAX
+};
+
+static int net_sys_err_translations[NET_ERR_MAX] =
+		{[NET_ERR_NONE] = 0,
+		 [NET_ERR_NO_HNDLR] = -ENOPROTOOPT,
+		 [NET_ERR_OVERFLOW]  = -EOVERFLOW,
+		 [NET_ERR_DIED] = -EHOSTDOWN,};
+
+static inline int net_sys_err_to_errno(enum net_system_error err)
+{
+	int trans;
+	BUG_ON(err >= NET_ERR_MAX);
+	trans = net_sys_err_translations[err];
+
+	/* Just in case we mess up the translation table above */
+	BUG_ON(err != NET_ERR_NONE && trans == 0);
+	return trans;
+}
+
 #define NET_MSG_MAGIC           ((u16)0xfa55)
 #define NET_MSG_STATUS_MAGIC    ((u16)0xfa56)
 typedef struct _net_msg
@@ -59,7 +84,9 @@
 	__u16 magic;
 	__u16 data_len;
 	__u16 msg_type;
-	__s16 status;
+	__u16 pad1;
+	__u32 sys_status;
+	__s32 status;
 	__u32 key;
 	__u32 msg_num;
 	__u8  buf[0];
@@ -70,7 +97,8 @@
 	m->magic = htons(m->magic);
 	m->data_len = htons(m->data_len);
 	m->msg_type = htons(m->msg_type);
-	m->status = htons(m->status);
+	m->sys_status = htonl(m->sys_status);
+	m->status = htonl(m->status);
 	m->key = htonl(m->key);
 	m->msg_num = htonl(m->msg_num);
 }
@@ -79,7 +107,8 @@
 	m->magic = ntohs(m->magic);
 	m->data_len = ntohs(m->data_len);
 	m->msg_type = ntohs(m->msg_type);
-	m->status = ntohs(m->status);
+	m->sys_status = ntohl(m->sys_status);
+	m->status = ntohl(m->status);
 	m->key = ntohl(m->key);
 	m->msg_num = ntohl(m->msg_num);
 }
@@ -101,7 +130,9 @@
 
 typedef struct _net_status_ctxt
 {
+	u8 target_node;
 	struct list_head list;
+	enum net_system_error sys_status;
 	s32 status;
 	u64 msg_num;
 	wait_queue_head_t wq;

Modified: trunk/fs/ocfs2/heartbeat.c
===================================================================
--- trunk/fs/ocfs2/heartbeat.c	2005-03-15 20:34:47 UTC (rev 1975)
+++ trunk/fs/ocfs2/heartbeat.c	2005-03-15 22:03:06 UTC (rev 1976)
@@ -50,7 +50,7 @@
 /* Tracing */
 #define OCFS_DEBUG_CONTEXT      OCFS_DEBUG_CONTEXT_HEARTBEAT
 
-#define OCFS2_HB_NODE_DOWN_PRI     (0x0000001)
+#define OCFS2_HB_NODE_DOWN_PRI     (0x0000002)
 #define OCFS2_HB_NODE_UP_PRI	   OCFS2_HB_NODE_DOWN_PRI
 
 static void ocfs2_hb_node_down_cb(struct inode *group,



More information about the Ocfs2-commits mailing list