[Ocfs2-commits] mfasheh commits r1976 - in trunk/fs/ocfs2: . cluster
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Tue Mar 15 16:03:08 CST 2005
Author: mfasheh
Signed-off-by: zab
Date: 2005-03-15 16:03:06 -0600 (Tue, 15 Mar 2005)
New Revision: 1976
Modified:
trunk/fs/ocfs2/cluster/tcp.c
trunk/fs/ocfs2/cluster/tcp.h
trunk/fs/ocfs2/heartbeat.c
Log:
* Rework the tcp stuff so that all messages send a status return. This
allows us to detect errors on recieving nodes and handle them
appropriately at the sending side.
* use wait_event instead of wait_event_interruptible in
net_send_message_iov. Once the message has been sent we need to consider
that something which can't be interrupted with a signal, otherwise things
like dlm state on recieving nodes will get messed up.
* hook the tcp stuff into heartbeat so that it can throw an error at any
processes waiting for a response from a node which just died.
* adds a couple debug prints which aided me in tracing paths through the
code.
Signed-off-by: zab
Modified: trunk/fs/ocfs2/cluster/tcp.c
===================================================================
--- trunk/fs/ocfs2/cluster/tcp.c 2005-03-15 20:34:47 UTC (rev 1975)
+++ trunk/fs/ocfs2/cluster/tcp.c 2005-03-15 22:03:06 UTC (rev 1976)
@@ -169,6 +169,9 @@
spin_unlock(&net_status_lock);
}
+static int net_register_hb_callbacks(void);
+static void net_unregister_hb_callbacks(void);
+
/////////////////////
static void net_shutdown(void);
static int net_startup(void);
@@ -228,7 +231,10 @@
if (!de)
return -1;
de->proc_fops->ioctl = net_ioctl;
-
+
+ if (net_register_hb_callbacks())
+ return -1;
+
netprintk0("Loaded net Driver module\n");
return 0;
} /* net_driver_entry */
@@ -347,6 +353,7 @@
}
up(&net_state_lock);
remove_proc_entry("cluster/net", NULL);
+ net_unregister_hb_callbacks();
netprintk0("Unloading net driver module\n");
return;
} /* net_driver_exit */
@@ -609,7 +616,8 @@
size_t caller_iovlen, struct inode *inode,
int *status)
{
- int ret, tmpret;
+ int ret;
+ int cleanup_wq = 0;
net_msg *msg = NULL;
net_status_ctxt nsc;
wait_queue_t sleep;
@@ -617,6 +625,8 @@
struct iovec *iov = NULL;
struct socket *sock = NULL;
+ BUG_ON(current == net_recv_task);
+
if (!inode || !inode->u.generic_ip) {
netprintk0("bad inode, cannot send message\n");
ret = -EINVAL;
@@ -641,6 +651,8 @@
if (ret)
goto out;
+ netprintk0("returned from net_sock_addref_or_connect, building msg\n");
+
/* build up our iovec */
iovlen = caller_iovlen + 1;
iov = kmalloc(sizeof(struct iovec) * iovlen, GFP_KERNEL);
@@ -660,6 +672,8 @@
msg->magic = NET_MSG_MAGIC;
msg->data_len = caller_bytes;
msg->msg_type = msg_type;
+ msg->sys_status = NET_ERR_NONE;
+ msg->status = 0;
msg->key = key;
msg->msg_num = net_next_msg_num();
@@ -667,56 +681,52 @@
iov[0].iov_base = msg;
memcpy(&iov[1], caller_iov, caller_iovlen * sizeof(struct iovec));
- /* does the caller want to wait for a simple status? */
- if (status) {
- msg->status = 1;
+ /* Setup for status return wait */
+ init_waitqueue_head(&nsc.wq);
+ atomic_set(&nsc.woken, 0);
+ nsc.msg_num = msg->msg_num;
+ nsc.sys_status = NET_ERR_NONE;
+ nsc.status = 0;
+ /* XXX: Should be using group index here. */
+ nsc.target_node = nm_get_node_global_index(inode);
- init_waitqueue_head(&nsc.wq);
- atomic_set(&nsc.woken, 0);
- nsc.msg_num = msg->msg_num;
- nsc.status = 0;
+ init_waitqueue_entry(&sleep, current);
+ add_wait_queue(&nsc.wq, &sleep);
+ cleanup_wq = 1;
- init_waitqueue_entry(&sleep, current);
- add_wait_queue(&nsc.wq, &sleep);
+ spin_lock(&net_status_lock);
+ list_add_tail(&nsc.list, &net_status_list);
+ spin_unlock(&net_status_lock);
- spin_lock(&net_status_lock);
- list_add_tail(&nsc.list, &net_status_list);
- spin_unlock(&net_status_lock);
- }
-
- /* finally, convert the message header to network byte-order and send */
+ /* finally, convert the message header to network byte-order
+ * and send */
net_msg_to_net(msg);
ret = net_send_tcp_msg(sock, iov, iovlen,
sizeof(net_msg) + caller_bytes);
net_msg_to_host(msg); /* just swapping for printk, its unused now */
msgprintk(msg, "sending returned %d\n", ret);
-
- if (status) {
- if (ret >= 0) {
- /* wait on other node's handler */
- tmpret = wait_event_interruptible(nsc.wq,
- (atomic_read(&nsc.woken) == 1));
- if (tmpret==0) {
- *status = nsc.status;
- netprintk("status return requested, status is %d\n", *status);
- } else {
- ret = tmpret;
- net_abort_status_return(&nsc);
- netprintk0("net_abort_status_return called\n");
- netprintk("status return requested, and error occurred while waiting=%d\n", ret);
- *status = ret;
- }
- } else {
- netprintk("status return requested, and error returned from net_send_tcp_msg=%d\n", ret);
- /* return bad status right away */
- *status = ret;
- }
- remove_wait_queue(&nsc.wq, &sleep);
- } else if (ret < 0) {
- netprintk("no status return requested, but error returned from net_send_tcp_msg=%d\n", ret);
+ if (ret < 0) {
+ net_abort_status_return(&nsc);
+ netprintk("error returned from net_send_tcp_msg=%d\n", ret);
+ goto out;
}
+ /* wait on other node's handler */
+ wait_event(nsc.wq, (atomic_read(&nsc.woken) == 1));
+
+ /* Note that we avoid overwriting the callers status return
+ * variable if a system error was reported on the other
+ * side. Callers beware. */
+ ret = net_sys_err_to_errno(nsc.sys_status);
+ if (status && !ret)
+ *status = nsc.status;
+
+ netprintk("woken, returning system status %d, user status %d",
+ ret, nsc.status);
+
out:
+ if (cleanup_wq)
+ remove_wait_queue(&nsc.wq, &sleep);
if (sock)
net_sock_decref(inode, ret);
if (iov)
@@ -741,6 +751,7 @@
* in nm, and to have registered the message handler
* - if status was requested, it will be returned to the caller
* already converted to host byteorder
+ * - status will not be set on return code != 0
*/
int net_send_message(u32 msg_type, u32 key, void *data, u32 len,
struct inode *inode, int *status)
@@ -753,15 +764,19 @@
}
EXPORT_SYMBOL(net_send_message);
-static int net_send_status_magic(struct socket *sock, net_msg *hdr, int err)
+static int net_send_status_magic(struct socket *sock, net_msg *hdr,
+ enum net_system_error syserr, int err)
{
struct iovec iov = {
.iov_base = hdr,
.iov_len = sizeof(net_msg),
};
-
+
+ BUG_ON(syserr >= NET_ERR_MAX);
+
/* leave other fields intact from the incoming message, msg_num
* in particular */
+ hdr->sys_status = syserr;
hdr->status = err;
hdr->magic = NET_MSG_STATUS_MAGIC; // twiddle the magic
hdr->data_len = 0;
@@ -1008,6 +1023,7 @@
list_for_each(iter, &net_status_list) {
nsc = list_entry(iter, net_status_ctxt, list);
if (nsc->msg_num == hdr->msg_num) {
+ nsc->sys_status = hdr->sys_status;
nsc->status = hdr->status;
atomic_set(&nsc->woken, 1);
list_del_init(&nsc->list);
@@ -1021,11 +1037,72 @@
msgprintk(hdr, "sent to nsc %p\n", nsc);
}
+static void net_kill_node_messages(u8 node)
+{
+ unsigned int num_kills = 0;
+ net_status_ctxt *nsc = NULL;
+ struct list_head *iter;
+
+ spin_lock(&net_status_lock);
+ list_for_each(iter, &net_status_list) {
+ nsc = list_entry(iter, net_status_ctxt, list);
+ if (nsc->target_node == node) {
+ nsc->sys_status = NET_ERR_DIED;
+ atomic_set(&nsc->woken, 1);
+ list_del_init(&nsc->list);
+ wake_up(&nsc->wq);
+
+ num_kills++;
+ }
+ }
+ spin_unlock(&net_status_lock);
+
+ netprintk("node %u died, killed %d messages\n", node, num_kills);
+}
+
+static void net_hb_node_down_cb(struct inode *group,
+ struct inode *node,
+ int node_num,
+ void *data)
+{
+ net_kill_node_messages(node_num);
+}
+
+static struct hb_callback_func *net_hb_down = NULL;
+#define NET_HB_NODE_DOWN_PRI (0x1)
+
+static int net_register_hb_callbacks(void)
+{
+ net_hb_down = kmalloc(sizeof(*net_hb_down), GFP_KERNEL);
+ if (!net_hb_down)
+ return -ENOMEM;
+ memset(net_hb_down, 0, sizeof(*net_hb_down));
+
+ hb_setup_callback(net_hb_down, HB_NODE_DOWN_CB,
+ net_hb_node_down_cb, NULL, NET_HB_NODE_DOWN_PRI);
+ return hb_register_callback(net_hb_down);
+}
+
+static void net_unregister_hb_callbacks(void)
+{
+ int status;
+
+ if (net_hb_down) {
+ status = hb_unregister_callback(net_hb_down);
+ if (status < 0)
+ printk("ocfs2_tcp: Status return %d unregistering "
+ "heartbeat callback!\n", status);
+ kfree(net_hb_down);
+ net_hb_down = NULL;
+ }
+}
+
/* this returns -errno if the header was unknown or too large, etc.
* after this is called the buffer us reused for the next message */
static int net_process_message(struct socket *sock, net_msg *hdr)
{
- int ret;
+ int ret, tmpret;
+ enum net_system_error syserr;
net_msg_handler *hnd = NULL;
netprintk("received message header... magic=%u type=%u key=%u\n",
@@ -1054,39 +1131,33 @@
}
/* find a handler for it */
+ ret = 0;
hnd = net_lookup_handler(hdr->msg_type, hdr->key);
if (!hnd) {
- ret = -EINVAL;
- msgprintk0(hdr, "no handler for message.\n");
- goto out;
+ syserr = NET_ERR_NO_HNDLR;
+ goto out_respond;
}
- ret = 0;
+ syserr = NET_ERR_NONE;
+
spin_lock(&hnd->lock);
if (hdr->data_len > hnd->max_len)
- ret = -EOVERFLOW;
+ syserr = NET_ERR_OVERFLOW;
spin_unlock(&hnd->lock);
- if (ret) {
- msgprintk(hdr, "advertised data_len > handlers max_len (%u)\n",
- hnd->max_len);
- goto out;
- }
+ if (syserr != NET_ERR_NONE)
+ goto out_respond;
+
net_num_dispatched++;
ret = (hnd->func)(hdr, sizeof(net_msg) + hdr->data_len, hnd->data);
-
- /* if node has requested status return, do it now */
- if (hdr->status) {
- int tmpret;
- /* this destroys the hdr, so don't use it after this */
- tmpret = net_send_status_magic(sock, hdr, ret);
- hdr = NULL;
- netprintk("sending status %d returned %d\n", ret, tmpret);
- ret = 0;
- } else if (ret < 0) {
- msgprintk(hdr, "dispatch returned %d\n", ret);
- }
+out_respond:
+ /* this destroys the hdr, so don't use it after this */
+ tmpret = net_send_status_magic(sock, hdr, syserr, ret);
+ hdr = NULL;
+ netprintk("sending status %d, syserr %d returned %d\n", ret, syserr,
+ tmpret);
+
out:
if (hnd)
net_put_handler(hnd);
@@ -1390,6 +1461,8 @@
sock = net->sock;
net->sock_refs++;
} else {
+ netprintk("Initiating the connect!\n");
+
if (!net->sock_pending) {
/* ok, we'll be initiating the connect */
net->sock_pending = 1;
Modified: trunk/fs/ocfs2/cluster/tcp.h
===================================================================
--- trunk/fs/ocfs2/cluster/tcp.h 2005-03-15 20:34:47 UTC (rev 1975)
+++ trunk/fs/ocfs2/cluster/tcp.h 2005-03-15 22:03:06 UTC (rev 1976)
@@ -52,6 +52,31 @@
#define NET_DISP_THREAD_MS 5000 /* TODO */
#define NET_RECV_THREAD_MS 5000 /* TODO */
+enum net_system_error {
+ NET_ERR_NONE = 0,
+ NET_ERR_NO_HNDLR,
+ NET_ERR_OVERFLOW,
+ NET_ERR_DIED,
+ NET_ERR_MAX
+};
+
+static int net_sys_err_translations[NET_ERR_MAX] =
+ {[NET_ERR_NONE] = 0,
+ [NET_ERR_NO_HNDLR] = -ENOPROTOOPT,
+ [NET_ERR_OVERFLOW] = -EOVERFLOW,
+ [NET_ERR_DIED] = -EHOSTDOWN,};
+
+static inline int net_sys_err_to_errno(enum net_system_error err)
+{
+ int trans;
+ BUG_ON(err >= NET_ERR_MAX);
+ trans = net_sys_err_translations[err];
+
+ /* Just in case we mess up the translation table above */
+ BUG_ON(err != NET_ERR_NONE && trans == 0);
+ return trans;
+}
+
#define NET_MSG_MAGIC ((u16)0xfa55)
#define NET_MSG_STATUS_MAGIC ((u16)0xfa56)
typedef struct _net_msg
@@ -59,7 +84,9 @@
__u16 magic;
__u16 data_len;
__u16 msg_type;
- __s16 status;
+ __u16 pad1;
+ __u32 sys_status;
+ __s32 status;
__u32 key;
__u32 msg_num;
__u8 buf[0];
@@ -70,7 +97,8 @@
m->magic = htons(m->magic);
m->data_len = htons(m->data_len);
m->msg_type = htons(m->msg_type);
- m->status = htons(m->status);
+ m->sys_status = htonl(m->sys_status);
+ m->status = htonl(m->status);
m->key = htonl(m->key);
m->msg_num = htonl(m->msg_num);
}
@@ -79,7 +107,8 @@
m->magic = ntohs(m->magic);
m->data_len = ntohs(m->data_len);
m->msg_type = ntohs(m->msg_type);
- m->status = ntohs(m->status);
+ m->sys_status = ntohl(m->sys_status);
+ m->status = ntohl(m->status);
m->key = ntohl(m->key);
m->msg_num = ntohl(m->msg_num);
}
@@ -101,7 +130,9 @@
typedef struct _net_status_ctxt
{
+ u8 target_node;
struct list_head list;
+ enum net_system_error sys_status;
s32 status;
u64 msg_num;
wait_queue_head_t wq;
Modified: trunk/fs/ocfs2/heartbeat.c
===================================================================
--- trunk/fs/ocfs2/heartbeat.c 2005-03-15 20:34:47 UTC (rev 1975)
+++ trunk/fs/ocfs2/heartbeat.c 2005-03-15 22:03:06 UTC (rev 1976)
@@ -50,7 +50,7 @@
/* Tracing */
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_HEARTBEAT
-#define OCFS2_HB_NODE_DOWN_PRI (0x0000001)
+#define OCFS2_HB_NODE_DOWN_PRI (0x0000002)
#define OCFS2_HB_NODE_UP_PRI OCFS2_HB_NODE_DOWN_PRI
static void ocfs2_hb_node_down_cb(struct inode *group,
More information about the Ocfs2-commits
mailing list