[Ocfs2-commits] zab commits r1767 - trunk/cluster
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Thu Jan 13 15:20:29 CST 2005
Author: zab
Date: 2005-01-13 15:20:28 -0600 (Thu, 13 Jan 2005)
New Revision: 1767
Modified:
trunk/cluster/dlmmod.c
trunk/cluster/tcp.c
trunk/cluster/tcp.h
Log:
Use iovecs instead of our net_data attempt. Callers build their packets as an
iovec and send to net_send_message_iov (formerly _arr). It allocates its own
iovec one larger so it can prepend the net_msg header. It passes the iovec on
through to sendmsg instead of trying to allocate a full packet and copy all the
data into it. The formerly duplicate net_send_message() function just becomes
a special case wrapper around _iov that builds a single entry iovec.
Modified: trunk/cluster/dlmmod.c
===================================================================
--- trunk/cluster/dlmmod.c 2005-01-13 20:01:47 UTC (rev 1766)
+++ trunk/cluster/dlmmod.c 2005-01-13 21:20:28 UTC (rev 1767)
@@ -852,11 +852,9 @@
int tmpret;
dlm_status ret;
int status = 0;
+ struct iovec iov[2];
+ size_t iovlen = 1;
- int arrsz = 1, msgsz = sizeof(dlm_unlock_lock);
- net_data nd[2];
-
-
dlmprintk0("\n");
memset(&unlock, 0, sizeof(unlock));
@@ -866,15 +864,14 @@
unlock.namelen = res->lockname.len;
strncpy(unlock.name, res->lockname.name, unlock.namelen);
- nd[0].bytes = msgsz;
- nd[0].ptr = &unlock;
+ iov[0].iov_len = sizeof(dlm_unlock_lock);
+ iov[0].iov_base = &unlock;
if (flags & LKM_PUT_LVB) {
/* extra data to send if we are updating lvb */
- nd[1].bytes = DLM_LVB_LEN;
- nd[1].ptr = lock->lksb->lvb;
- arrsz++;
- msgsz += DLM_LVB_LEN;
+ iov[1].iov_len = DLM_LVB_LEN;
+ iov[1].iov_base = lock->lksb->lvb;
+ iovlen++;
}
ret = DLM_NOLOCKMGR;
@@ -882,7 +879,8 @@
inode = nm_get_group_node_by_index(dlm->group, res->owner);
if (inode) {
dlm_unlock_lock_to_net(&unlock);
- tmpret = net_send_message_arr(DLM_UNLOCK_LOCK_MSG, dlm->key, arrsz, nd, msgsz, inode, &status);
+ tmpret = net_send_message_iov(DLM_UNLOCK_LOCK_MSG, dlm->key,
+ iov, iovlen, inode, &status);
if (tmpret >= 0) {
// successfully sent and received
if (status == DLM_CANCELGRANT)
@@ -1414,10 +1412,11 @@
int dlm_send_proxy_ast(dlm_ctxt *dlm, dlm_lock_resource *res, dlm_lock *lock, int type, int blocked_type)
{
- int ret = 0, arrsz = 1, msgsz = sizeof(dlm_proxy_ast);
+ int ret = 0;
dlm_proxy_ast past;
struct inode *inode = NULL;
- net_data nd[2];
+ struct iovec iov[2];
+ size_t iovlen = 1;
dlmprintk("to=%u, type=%d, blocked_type=%d\n", lock->node, type, blocked_type);
@@ -1429,21 +1428,21 @@
strncpy(past.name, res->lockname.name, past.namelen);
past.cookie = lock->cookie;
- nd[0].bytes = msgsz;
- nd[0].ptr = &past;
+ iov[0].iov_len = sizeof(dlm_proxy_ast);
+ iov[0].iov_base = &past;
if (lock->lksb->flags & DLM_LKSB_GET_LVB) {
past.flags |= LKM_GET_LVB;
- nd[1].bytes = DLM_LVB_LEN;
- nd[1].ptr = lock->lksb->lvb;
- arrsz++;
- msgsz += DLM_LVB_LEN;
+ iov[1].iov_len = DLM_LVB_LEN;
+ iov[1].iov_base = lock->lksb->lvb;
+ iovlen++;
}
ret = -EINVAL;
inode = nm_get_group_node_by_index(dlm->group, lock->node);
if (inode) {
dlm_proxy_ast_to_net(&past);
- ret = net_send_message_arr(DLM_PROXY_AST_MSG, dlm->key, arrsz, nd, msgsz, inode, NULL);
+ ret = net_send_message_iov(DLM_PROXY_AST_MSG, dlm->key,
+ iov, iovlen, inode, NULL);
iput(inode);
}
if (ret < 0) {
@@ -1691,8 +1690,8 @@
int tmpret;
dlm_status ret;
int status = 0;
- int arrsz = 1, msgsz = sizeof(dlm_convert_lock);
- net_data nd[2];
+ struct iovec iov[2];
+ size_t iovlen = 1;
dlmprintk0("\n");
@@ -1704,22 +1703,22 @@
convert.flags = flags;
strncpy(convert.name, res->lockname.name, convert.namelen);
- nd[0].bytes = msgsz;
- nd[0].ptr = &convert;
+ iov[0].iov_len = sizeof(dlm_convert_lock);
+ iov[0].iov_base = &convert;
if (flags & LKM_PUT_LVB) {
/* extra data to send if we are updating lvb */
- nd[1].bytes = DLM_LVB_LEN;
- nd[1].ptr = lock->lksb->lvb;
- arrsz++;
- msgsz += DLM_LVB_LEN;
+ iov[1].iov_len = DLM_LVB_LEN;
+ iov[1].iov_base = lock->lksb->lvb;
+ iovlen++;
}
ret = DLM_NOLOCKMGR;
inode = nm_get_group_node_by_index(dlm->group, res->owner);
if (inode) {
dlm_convert_lock_to_net(&convert);
- tmpret = net_send_message_arr(DLM_CONVERT_LOCK_MSG, dlm->key, arrsz, nd, msgsz, inode, &status);
+ tmpret = net_send_message_iov(DLM_CONVERT_LOCK_MSG, dlm->key,
+ iov, iovlen, inode, &status);
if (tmpret >= 0) {
// successfully sent and received
ret = status; // this is already a dlm_status
Modified: trunk/cluster/tcp.c
===================================================================
--- trunk/cluster/tcp.c 2005-01-13 20:01:47 UTC (rev 1766)
+++ trunk/cluster/tcp.c 2005-01-13 21:20:28 UTC (rev 1767)
@@ -39,8 +39,6 @@
* - if spin sending be sure to exclude concurrent racing senders
* - refcounting around sock against tx/teardown/etc
* - get sin/iov/msg off the stack, per sock structures
- * - share code between net_send_message{,arr}()
- * - net_send_tcp_msg() callers should pass an iov
* - just rely on rx calls to discover busted sockets?
* - make sk_state another define
* - add trivial version trading message at the start of a conn
@@ -786,7 +784,8 @@
return status;
} /* net_recv_tcp_msg */
-int net_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+static int net_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+ size_t size)
{
int ret = sock_sendmsg(sock, msg, size);
@@ -796,11 +795,8 @@
return ret;
}
-/*
- * net_send_tcp_msg()
- *
- */
-int net_send_tcp_msg (struct inode *inode, struct socket *sock, void *data, u32 packet_len)
+static int net_send_tcp_msg(struct inode *inode, struct socket *sock,
+ struct iovec *iov, size_t iovlen, size_t total)
{
int status = 0, error;
struct sockaddr_in sin;
@@ -827,13 +823,10 @@
status = -EINVAL;
if (sock) {
- struct iovec iov = {
- .iov_base = data,
- .iov_len = packet_len
- };
+
struct msghdr msg = {
- .msg_iov = &iov,
- .msg_iovlen = 1,
+ .msg_iov = iov,
+ .msg_iovlen = iovlen,
.msg_control = NULL,
.msg_controllen = 0,
.msg_name = (struct sockaddr *) &sin,
@@ -843,7 +836,7 @@
status = 0;
set_fs (get_ds ());
- error = net_sock_sendmsg (sock, &msg, packet_len);
+ error = net_sock_sendmsg (sock, &msg, total);
set_fs (oldfs);
if (error < 0) {
@@ -856,7 +849,7 @@
status = 0;
return status;
-} /* net_send_tcp_msg */
+}
static u64 net_next_msg_num(void)
{
@@ -871,164 +864,31 @@
return ret;
}
-/*
- * net_send_message
- *
- * - this is probably the function you are looking for
- * - it will package up the message for you, verifying that
- * the message handler is there and the length is ok,
- * connect to the other node if there is not already a
- * socket for it, and optionally wait on a status return
- * from the other node
- * - all you need prior to this call is to have inited the
- * net stuff, to have a valid inode for the node to contact
- * in nm, and to have registered the message handler
- * - if status was requested, it will be returned to the caller
- * already converted to host byteorder
- */
-int net_send_message(u32 msg_type, u32 key, void *data, u32 len, struct inode *inode, int *status)
+int net_send_message_iov(u32 msg_type, u32 key, struct iovec *caller_iov,
+ size_t caller_iovlen, struct inode *inode,
+ int *status)
{
- int ret = 0, tmpret;
+ int ret, tmpret;
net_msg *msg = NULL;
net_msg_handler *handler = NULL;
- u32 packet_len;
net_status_ctxt nsc;
wait_queue_t sleep;
nm_node_inode_private *priv = NULL;
net_inode_private *net = NULL;
+ size_t i, iovlen, caller_bytes = 0;
+ struct iovec *iov = NULL;
if (!inode || !inode->u.generic_ip) {
netprintk0("bad inode, cannot send message\n");
- return -EINVAL;
- }
- priv = (nm_node_inode_private *)inode->u.generic_ip;
- net = &priv->net;
-
- /* this pattern is awfully awkward */
- spin_lock(&net->sock_lock);
- if (!net->sock) {
- spin_unlock(&net->sock_lock);
- ret = net_init_tcp_sock(inode);
- if (!(ret == 0 || ret == -EEXIST)) {
- netprintk0("failed to create socket!");
- return -EINVAL;
- }
- spin_lock(&net->sock_lock);
- }
- spin_unlock(&net->sock_lock);
-
- handler = net_lookup_handler(msg_type, key);
-
- if (!handler) {
- netprintk("no such message type: %u/%u\n", msg_type, key);
- return -EINVAL;
- }
-
- if (!net_handler_msg_len_ok(handler, len)) {
- netprintk("len for message type %u incorrect: %u, should be %u\n",
- msg_type, len, handler->max_len);
ret = -EINVAL;
goto done;
}
- packet_len = len + sizeof(net_msg);
- msg = kmalloc(packet_len, GFP_KERNEL);
- if (!msg) {
- netprintk("failed to allocate %u bytes for message!\n", packet_len);
- ret = -ENOMEM;
+ if (caller_iovlen == 0) {
+ netprintk0("bad iovec array length\n");
+ ret = -EINVAL;
goto done;
}
- memset(msg, 0, packet_len);
- msg->magic = NET_MSG_MAGIC;
- msg->data_len = len;
- msg->msg_type = msg_type;
- msg->key = key;
- msg->msg_num = net_next_msg_num();
- if (len > 0)
- memcpy(&(msg->buf[0]), data, len);
-
- /* does the caller want to wait for a simple status? */
- if (status) {
- msg->status = 1;
-
- INIT_LIST_HEAD(&nsc.list);
- init_waitqueue_head(&nsc.wq);
- atomic_set(&nsc.woken, 0);
- nsc.msg_num = msg->msg_num;
- nsc.status = 0;
- spin_lock(&net_status_lock);
- list_add(&nsc.list, &net_status_list);
- spin_unlock(&net_status_lock);
-
- init_waitqueue_entry(&sleep, current);
- spin_lock(&net->sock_lock);
- if (!net->sock) {
- spin_unlock(&net->sock_lock);
- netprintk0("caller wanted status return but socket went away!\n");
- kfree(msg);
- return -EINVAL;
- }
- add_wait_queue(net->sock->sk->sk_sleep, &sleep);
- spin_unlock(&net->sock_lock);
- }
-
- /* finally, convert the message header to network byte-order and send */
- net_msg_to_net(msg);
- ret = net_send_tcp_msg(inode, NULL, msg, packet_len);
-
- if (status) {
- if (ret >= 0) {
- /* wait on other node's handler */
- tmpret = util_wait_atomic_eq(&nsc.wq, &nsc.woken, 1, 0);
- if (tmpret==0) {
- *status = nsc.status;
- netprintk("status return requested, status is %d\n", *status);
- } else {
- ret = tmpret;
- net_abort_status_return(&nsc);
- netprintk0("net_abort_status_return called\n");
- netprintk("status return requested, and error occurred while waiting=%d\n", ret);
- *status = ret;
- }
- remove_wait_queue(recv_sock->sk->sk_sleep, &sleep);
- } else {
- netprintk("status return requested, and error returned from net_send_tcp_msg=%d\n", ret);
- /* return bad status right away */
- *status = ret;
- }
- } else if (ret < 0) {
- netprintk("no status return requested, but error returned from net_send_tcp_msg=%d\n", ret);
- }
-
-done:
- if (handler)
- net_put_handler(handler);
- if (msg)
- kfree(msg);
- return ret;
-}
-EXPORT_SYMBOL(net_send_message);
-
-int net_send_message_arr(u32 msg_type, u32 key, int arrlen, net_data *arr, u32 len, struct inode *inode, int *status)
-{
- int ret = 0, tmpret, i;
- net_msg *msg = NULL;
- net_msg_handler *handler = NULL;
- u32 packet_len;
- net_status_ctxt nsc;
- wait_queue_t sleep;
- nm_node_inode_private *priv = NULL;
- net_inode_private *net = NULL;
- char *src, *dest;
-
- if (!inode || !inode->u.generic_ip) {
- netprintk0("bad inode, cannot send message\n");
- return -EINVAL;
- }
- if (arrlen <= 0) {
- netprintk0("bad data array length\n");
- return -EINVAL;
- }
priv = (nm_node_inode_private *)inode->u.generic_ip;
net = &priv->net;
spin_lock(&net->sock_lock);
@@ -1037,54 +897,55 @@
ret = net_init_tcp_sock(inode);
if (!(ret == 0 || ret == -EEXIST)) {
netprintk0("failed to create socket!");
- return -EINVAL;
+ ret = -EINVAL;
+ goto done;
}
spin_lock(&net->sock_lock);
}
spin_unlock(&net->sock_lock);
handler = net_lookup_handler(msg_type, key);
-
if (!handler) {
netprintk("no such message type: %u/%u\n", msg_type, key);
- return -EINVAL;
+ ret = -EINVAL;
+ goto done;
}
- if (!net_handler_msg_len_ok(handler, len)) {
+ for(i = 0; i < caller_iovlen; i++)
+ caller_bytes += caller_iov[i].iov_len;
+
+ if (!net_handler_msg_len_ok(handler, caller_bytes)) {
netprintk("len for message type %u incorrect: %u, should be %u\n",
- msg_type, len, handler->max_len);
+ msg_type, caller_bytes, handler->max_len);
ret = -EINVAL;
goto done;
}
- packet_len = len + sizeof(net_msg);
- msg = kmalloc(packet_len, GFP_KERNEL);
+
+ /* build up our iovec */
+ iovlen = caller_iovlen + 1;
+ iov = kmalloc(sizeof(struct iovec) * iovlen, GFP_KERNEL);
+ if (iov == NULL) {
+ netprintk("failed to %zu element iovec!\n", iovlen);
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ msg = kmalloc(sizeof(net_msg), GFP_KERNEL);
if (!msg) {
- netprintk("failed to allocate %u bytes for message!\n", packet_len);
+ netprintk("failed to allocate a net_msg!\n");
ret = -ENOMEM;
goto done;
}
- memset(msg, 0, packet_len);
+ memset(msg, 0, sizeof(net_msg));
msg->magic = NET_MSG_MAGIC;
- msg->data_len = len;
+ msg->data_len = caller_bytes;
msg->msg_type = msg_type;
msg->key = key;
msg->msg_num = net_next_msg_num();
- if (len > 0) {
- int tmplen = len;
- dest = &(msg->buf[0]);
- for (i=0; i<arrlen; i++) {
- src = arr[i].ptr;
- if (arr[i].bytes > tmplen) {
- netprintk0("data array is too large!\n");
- kfree(msg);
- return -EINVAL;
- }
- memcpy(dest, src, arr[i].bytes);
- tmplen -= arr[i].bytes;
- dest += arr[i].bytes;
- }
- }
+ iov[0].iov_len = sizeof(net_msg);
+ iov[0].iov_base = msg;
+ memcpy(&iov[1], caller_iov, caller_iovlen * sizeof(struct iovec));
/* does the caller want to wait for a simple status? */
if (status) {
@@ -1113,7 +974,8 @@
/* finally, convert the message header to network byte-order and send */
net_msg_to_net(msg);
- ret = net_send_tcp_msg(inode, NULL, msg, packet_len);
+ ret = net_send_tcp_msg(inode, NULL, iov, iovlen,
+ sizeof(net_msg) + caller_bytes);
if (status) {
if (ret >= 0) {
@@ -1142,16 +1004,65 @@
done:
if (handler)
net_put_handler(handler);
+ if (iov)
+ kfree(iov);
if (msg)
kfree(msg);
return ret;
}
-EXPORT_SYMBOL(net_send_message_arr);
+EXPORT_SYMBOL(net_send_message_iov);
+/*
+ * net_send_message
+ *
+ * - this is probably the function you are looking for
+ * - it will package up the message for you, verifying that
+ * the message handler is there and the length is ok,
+ * connect to the other node if there is not already a
+ * socket for it, and optionally wait on a status return
+ * from the other node
+ * - all you need prior to this call is to have inited the
+ * net stuff, to have a valid inode for the node to contact
+ * in nm, and to have registered the message handler
+ * - if status was requested, it will be returned to the caller
+ * already converted to host byteorder
+ */
+int net_send_message(u32 msg_type, u32 key, void *data, u32 len,
+ struct inode *inode, int *status)
+{
+ struct iovec iov = {
+ .iov_base = data,
+ .iov_len = len,
+ };
+ return net_send_message_iov(msg_type, key, &iov, 1, inode, status);
+}
+EXPORT_SYMBOL(net_send_message);
+static int net_send_status_magic(struct inode *inode, struct socket *sock,
+ net_msg *hdr, int err)
+{
+ struct iovec iov = {
+ .iov_base = hdr,
+ .iov_len = sizeof(net_msg),
+ };
+
+#ifdef BIG_NET_MSG
+ {
+ u16 n = hdr->src_node;
+ hdr->src_node = hdr->dst_node;
+ hdr->dst_node = n;
+ }
+#endif
+ hdr->status = err;
+ hdr->magic = NET_MSG_STATUS_MAGIC; // twiddle the magic
+ hdr->data_len = 0;
+ netprintk("about to send status %d\n", err);
-
+ /* hdr has been in host byteorder this whole time */
+ net_msg_to_net(hdr);
+ return net_send_tcp_msg(inode, sock, &iov, 1, sizeof(net_msg));
+}
/*
* net_receive: receive from and dispatch all sockets with data pending
*/
@@ -1247,19 +1158,7 @@
/* if node has requested status return, do it now */
if (hdr.status) {
-#ifdef BIG_NET_MSG
- u16 n = hdr.src_node;
- hdr.src_node = hdr.dst_node;
- hdr.dst_node = n;
-#endif
- hdr.status = err;
- hdr.magic = NET_MSG_STATUS_MAGIC; // twiddle the magic
- hdr.data_len = 0;
- netprintk("about to send status %d\n", err);
-
- /* hdr has been in host byteorder this whole time */
- net_msg_to_net(&hdr);
- tmperr = net_send_tcp_msg(inode, sock, &hdr, sizeof(net_msg));
+ tmperr = net_send_status_magic(inode, sock, &hdr, err);
netprintk0("yay, sent!\n");
} else if (err < 0) {
netprintk("dispatch (%u/%u) returned %d\n",
Modified: trunk/cluster/tcp.h
===================================================================
--- trunk/cluster/tcp.h 2005-01-13 20:01:47 UTC (rev 1766)
+++ trunk/cluster/tcp.h 2005-01-13 21:20:28 UTC (rev 1767)
@@ -233,20 +233,14 @@
NET_DRIVER_READY,
};
-typedef struct _net_data
-{
- int bytes;
- void *ptr;
-} net_data;
-
int net_register_handler(u32 msg_type, u32 key, int flags,
u32 max_len, net_msg_handler_func *func, void *data, void *buf);
int net_recv_tcp_msg (struct inode *inode, struct socket *sock, void *data, u32 *packet_len);
-int net_send_tcp_msg (struct inode *inode, struct socket *sock, void *data, u32 packet_len);
int net_send_error(struct socket *sock, u32 err_type);
int net_init_tcp_sock(struct inode *inode);
int net_send_message(u32 msg_type, u32 key, void *data, u32 len, struct inode *inode, int *status);
-int net_send_message_arr(u32 msg_type, u32 key, int arrlen, net_data *arr, u32 len, struct inode *inode, int *status);
+int net_send_message_iov(u32 msg_type, u32 key, struct iovec *iov,
+ size_t iovlen, struct inode *inode, int *status);
int net_broadcast_message(u32 msg_type, u32 key, void *data, u32 len, struct inode *group);
net_msg_handler * net_lookup_handler(u32 msg_type, u32 key);
More information about the Ocfs2-commits
mailing list