[Ocfs2-commits] zab commits r1767 - trunk/cluster

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Thu Jan 13 15:20:29 CST 2005


Author: zab
Date: 2005-01-13 15:20:28 -0600 (Thu, 13 Jan 2005)
New Revision: 1767

Modified:
   trunk/cluster/dlmmod.c
   trunk/cluster/tcp.c
   trunk/cluster/tcp.h
Log:
Use iovecs instead of our net_data attempt.  Callers build their packets as an
iovec and send to net_send_message_iov (formerly _arr).  It allocates its own
iovec one larger so it can prepend the net_msg header.  It passes the iovec on
through to sendmsg instead of trying to allocate a full packet and copy all the
data into it.  The formerly duplicate net_send_message() function just becomes
a special case wrapper around _iov that builds a single entry iovec.


Modified: trunk/cluster/dlmmod.c
===================================================================
--- trunk/cluster/dlmmod.c	2005-01-13 20:01:47 UTC (rev 1766)
+++ trunk/cluster/dlmmod.c	2005-01-13 21:20:28 UTC (rev 1767)
@@ -852,11 +852,9 @@
 	int tmpret;
 	dlm_status ret;
 	int status = 0;
+	struct iovec iov[2];
+	size_t iovlen = 1;
 
-	int arrsz = 1, msgsz = sizeof(dlm_unlock_lock);
-	net_data nd[2];
-
-
 	dlmprintk0("\n");
 
 	memset(&unlock, 0, sizeof(unlock));
@@ -866,15 +864,14 @@
 	unlock.namelen = res->lockname.len;
 	strncpy(unlock.name, res->lockname.name, unlock.namelen);
 
-	nd[0].bytes = msgsz;
-	nd[0].ptr = &unlock;
+	iov[0].iov_len = sizeof(dlm_unlock_lock);
+	iov[0].iov_base = &unlock;
 
 	if (flags & LKM_PUT_LVB) {
 		/* extra data to send if we are updating lvb */
-		nd[1].bytes = DLM_LVB_LEN;
-		nd[1].ptr = lock->lksb->lvb;
-		arrsz++;
-		msgsz += DLM_LVB_LEN;
+		iov[1].iov_len = DLM_LVB_LEN;
+		iov[1].iov_base = lock->lksb->lvb;
+		iovlen++;
 	}
 
 	ret = DLM_NOLOCKMGR;
@@ -882,7 +879,8 @@
 	inode = nm_get_group_node_by_index(dlm->group, res->owner);
 	if (inode) {
 		dlm_unlock_lock_to_net(&unlock);
-		tmpret = net_send_message_arr(DLM_UNLOCK_LOCK_MSG, dlm->key, arrsz, nd, msgsz, inode, &status);
+		tmpret = net_send_message_iov(DLM_UNLOCK_LOCK_MSG, dlm->key,
+					      iov, iovlen, inode, &status);
 		if (tmpret >= 0) {
 			// successfully sent and received
 			if (status == DLM_CANCELGRANT)
@@ -1414,10 +1412,11 @@
 
 int dlm_send_proxy_ast(dlm_ctxt *dlm, dlm_lock_resource *res, dlm_lock *lock, int type, int blocked_type)
 {
-	int ret = 0, arrsz = 1, msgsz = sizeof(dlm_proxy_ast);
+	int ret = 0;
 	dlm_proxy_ast past;
 	struct inode *inode = NULL;
-	net_data nd[2];
+	struct iovec iov[2];
+	size_t iovlen = 1;
 	
 	dlmprintk("to=%u, type=%d, blocked_type=%d\n", lock->node, type, blocked_type);
 
@@ -1429,21 +1428,21 @@
 	strncpy(past.name, res->lockname.name, past.namelen);
 	past.cookie = lock->cookie;
 
-	nd[0].bytes = msgsz;
-	nd[0].ptr = &past;
+	iov[0].iov_len = sizeof(dlm_proxy_ast);
+	iov[0].iov_base = &past;
 	if (lock->lksb->flags & DLM_LKSB_GET_LVB) {
 		past.flags |= LKM_GET_LVB;
-		nd[1].bytes = DLM_LVB_LEN;
-		nd[1].ptr = lock->lksb->lvb;
-		arrsz++;
-		msgsz += DLM_LVB_LEN;
+		iov[1].iov_len = DLM_LVB_LEN;
+		iov[1].iov_base = lock->lksb->lvb;
+		iovlen++;
 	}
 
 	ret = -EINVAL;
 	inode = nm_get_group_node_by_index(dlm->group, lock->node);
 	if (inode) {
 		dlm_proxy_ast_to_net(&past);
-		ret = net_send_message_arr(DLM_PROXY_AST_MSG, dlm->key, arrsz, nd, msgsz, inode, NULL);
+		ret = net_send_message_iov(DLM_PROXY_AST_MSG, dlm->key,
+					   iov, iovlen, inode, NULL);
 		iput(inode);
 	}
 	if (ret < 0) {
@@ -1691,8 +1690,8 @@
 	int tmpret;
 	dlm_status ret;
 	int status = 0;
-	int arrsz = 1, msgsz = sizeof(dlm_convert_lock);
-	net_data nd[2];
+	struct iovec iov[2];
+	size_t iovlen = 1;
 
 	dlmprintk0("\n");
 
@@ -1704,22 +1703,22 @@
 	convert.flags = flags;
 	strncpy(convert.name, res->lockname.name, convert.namelen);
 	
-	nd[0].bytes = msgsz;
-	nd[0].ptr = &convert;
+	iov[0].iov_len = sizeof(dlm_convert_lock);
+	iov[0].iov_base = &convert;
 
 	if (flags & LKM_PUT_LVB) {
 		/* extra data to send if we are updating lvb */
-		nd[1].bytes = DLM_LVB_LEN;
-		nd[1].ptr = lock->lksb->lvb;
-		arrsz++;
-		msgsz += DLM_LVB_LEN;
+		iov[1].iov_len = DLM_LVB_LEN;
+		iov[1].iov_base = lock->lksb->lvb;
+		iovlen++;
 	}
 
 	ret = DLM_NOLOCKMGR;
 	inode = nm_get_group_node_by_index(dlm->group, res->owner);
 	if (inode) {
 		dlm_convert_lock_to_net(&convert);
-		tmpret = net_send_message_arr(DLM_CONVERT_LOCK_MSG, dlm->key, arrsz, nd, msgsz, inode, &status);
+		tmpret = net_send_message_iov(DLM_CONVERT_LOCK_MSG, dlm->key,
+					      iov, iovlen, inode, &status);
 		if (tmpret >= 0) {
 			// successfully sent and received
 			ret = status;  // this is already a dlm_status

Modified: trunk/cluster/tcp.c
===================================================================
--- trunk/cluster/tcp.c	2005-01-13 20:01:47 UTC (rev 1766)
+++ trunk/cluster/tcp.c	2005-01-13 21:20:28 UTC (rev 1767)
@@ -39,8 +39,6 @@
  * 	- if spin sending be sure to exclude concurrent racing senders
  * 	- refcounting around sock against tx/teardown/etc
  * 	- get sin/iov/msg off the stack, per sock structures
- * 	- share code between net_send_message{,arr}() 
- * 	- net_send_tcp_msg() callers should pass an iov
  * 	- just rely on rx calls to discover busted sockets?
  * 	- make sk_state another define
  * 	- add trivial version trading message at the start of a conn
@@ -786,7 +784,8 @@
 	return status;
 }				/* net_recv_tcp_msg */
 
-int net_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+static int net_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+			    size_t size)
 {
 	int ret = sock_sendmsg(sock, msg, size);
 
@@ -796,11 +795,8 @@
 	return ret;
 }
 
-/*
- * net_send_tcp_msg()
- *
- */
-int net_send_tcp_msg (struct inode *inode, struct socket *sock, void *data, u32 packet_len)
+static int net_send_tcp_msg(struct inode *inode, struct socket *sock, 
+			    struct iovec *iov, size_t iovlen, size_t total)
 {
 	int status = 0, error;
 	struct sockaddr_in sin;
@@ -827,13 +823,10 @@
 
 	status = -EINVAL;
 	if (sock) {
-		struct iovec iov = {
-			.iov_base = data,
-			.iov_len = packet_len
-		};
+
 		struct msghdr msg = {
-			.msg_iov = &iov,
-			.msg_iovlen = 1,
+			.msg_iov = iov,
+			.msg_iovlen = iovlen,
 			.msg_control = NULL,
 			.msg_controllen = 0,
 			.msg_name = (struct sockaddr *) &sin,
@@ -843,7 +836,7 @@
 		
 		status = 0;	
 		set_fs (get_ds ());
-		error = net_sock_sendmsg (sock, &msg, packet_len);
+		error = net_sock_sendmsg (sock, &msg, total);
 		set_fs (oldfs);
 	
 		if (error < 0) {
@@ -856,7 +849,7 @@
 
 	status = 0;
 	return status;
-}				/* net_send_tcp_msg */
+}
 
 static u64 net_next_msg_num(void)
 {
@@ -871,164 +864,31 @@
 	return ret;
 }
 
-/*
- * net_send_message
- *
- *   - this is probably the function you are looking for
- *   - it will package up the message for you, verifying that
- *       the message handler is there and the length is ok,
- *       connect to the other node if there is not already a
- *       socket for it, and optionally wait on a status return
- *       from the other node 
- *   - all you need prior to this call is to have inited the
- *       net stuff, to have a valid inode for the node to contact 
- *       in nm, and to have registered the message handler
- *   - if status was requested, it will be returned to the caller
- *       already converted to host byteorder
- */
-int net_send_message(u32 msg_type, u32 key, void *data, u32 len, struct inode *inode, int *status)
+int net_send_message_iov(u32 msg_type, u32 key, struct iovec *caller_iov,
+			 size_t caller_iovlen, struct inode *inode,
+			 int *status)
 {
-	int ret = 0, tmpret;
+	int ret, tmpret;
 	net_msg *msg = NULL;
 	net_msg_handler *handler = NULL;
-	u32 packet_len;
 	net_status_ctxt nsc;
 	wait_queue_t sleep;
 	nm_node_inode_private *priv = NULL;
 	net_inode_private *net = NULL;
+	size_t i, iovlen, caller_bytes = 0;
+	struct iovec *iov = NULL;
 
 	if (!inode || !inode->u.generic_ip) {
 		netprintk0("bad inode, cannot send message\n");
-		return -EINVAL;
-	}
-	priv = (nm_node_inode_private *)inode->u.generic_ip;
-	net = &priv->net;
-
-	/* this pattern is awfully awkward */
-	spin_lock(&net->sock_lock); 
-	if (!net->sock) {
-		spin_unlock(&net->sock_lock);
-		ret = net_init_tcp_sock(inode);
-		if (!(ret == 0 || ret == -EEXIST)) {
-			netprintk0("failed to create socket!");
-			return -EINVAL;
-		}
-		spin_lock(&net->sock_lock);
-	}
-	spin_unlock(&net->sock_lock); 
-	
-	handler = net_lookup_handler(msg_type, key);
-	
-	if (!handler) {
-		netprintk("no such message type: %u/%u\n", msg_type, key);
-		return -EINVAL;
-	}
-
-	if (!net_handler_msg_len_ok(handler, len)) {
-		netprintk("len for message type %u incorrect: %u, should be %u\n", 
-		       msg_type, len, handler->max_len);
 		ret = -EINVAL;
 		goto done;
 	}
-	packet_len = len + sizeof(net_msg);
-	msg = kmalloc(packet_len, GFP_KERNEL);
-	if (!msg) {
-		netprintk("failed to allocate %u bytes for message!\n", packet_len);
-		ret = -ENOMEM;
+	if (caller_iovlen == 0) {
+		netprintk0("bad iovec array length\n");
+		ret = -EINVAL;
 		goto done;
 	}
-	memset(msg, 0, packet_len);
-	msg->magic = NET_MSG_MAGIC;
-	msg->data_len = len;
-	msg->msg_type = msg_type;
-	msg->key = key;
-	msg->msg_num = net_next_msg_num();
 
-	if (len > 0)
-		memcpy(&(msg->buf[0]), data, len);
-
-	/* does the caller want to wait for a simple status? */
-	if (status) {
-		msg->status = 1;
-
-		INIT_LIST_HEAD(&nsc.list);
-		init_waitqueue_head(&nsc.wq);
-		atomic_set(&nsc.woken, 0);
-		nsc.msg_num = msg->msg_num;
-		nsc.status = 0;
-		spin_lock(&net_status_lock);
-		list_add(&nsc.list, &net_status_list);
-		spin_unlock(&net_status_lock);
-
-		init_waitqueue_entry(&sleep, current);
-		spin_lock(&net->sock_lock);
-		if (!net->sock) {
-			spin_unlock(&net->sock_lock);
-			netprintk0("caller wanted status return but socket went away!\n");
-			kfree(msg);
-			return -EINVAL;
-		}
-		add_wait_queue(net->sock->sk->sk_sleep, &sleep);
-		spin_unlock(&net->sock_lock); 
-	}
-
-	/* finally, convert the message header to network byte-order and send */
-	net_msg_to_net(msg);
-	ret = net_send_tcp_msg(inode, NULL, msg, packet_len);
-
-	if (status) {
-		if (ret >= 0) {
-			/* wait on other node's handler */
-			tmpret = util_wait_atomic_eq(&nsc.wq, &nsc.woken, 1, 0);
-			if (tmpret==0) {
-				*status = nsc.status;
-				netprintk("status return requested, status is %d\n", *status);
-			} else {
-				ret = tmpret;
-				net_abort_status_return(&nsc);
-				netprintk0("net_abort_status_return called\n");
-				netprintk("status return requested, and error occurred while waiting=%d\n", ret);
-				*status = ret;
-			}
-			remove_wait_queue(recv_sock->sk->sk_sleep, &sleep);
-		} else {
-			netprintk("status return requested, and error returned from net_send_tcp_msg=%d\n", ret);
-			/* return bad status right away */
-			*status = ret;
-		}
-	} else if (ret < 0) {
-		netprintk("no status return requested, but error returned from net_send_tcp_msg=%d\n", ret);
-	}
-	
-done:
-	if (handler)
-		net_put_handler(handler);
-	if (msg)
-		kfree(msg);
-	return ret;
-}
-EXPORT_SYMBOL(net_send_message);
-
-int net_send_message_arr(u32 msg_type, u32 key, int arrlen, net_data *arr, u32 len, struct inode *inode, int *status)
-{
-	int ret = 0, tmpret, i;
-	net_msg *msg = NULL;
-	net_msg_handler *handler = NULL;
-	u32 packet_len;
-	net_status_ctxt nsc;
-	wait_queue_t sleep;
-	nm_node_inode_private *priv = NULL;
-	net_inode_private *net = NULL;
-	char *src, *dest;
-
-	if (!inode || !inode->u.generic_ip) {
-		netprintk0("bad inode, cannot send message\n");
-		return -EINVAL;
-	}
-	if (arrlen <= 0) {
-		netprintk0("bad data array length\n");
-		return -EINVAL;
-	}
 	priv = (nm_node_inode_private *)inode->u.generic_ip;
 	net = &priv->net;
 	spin_lock(&net->sock_lock); 
@@ -1037,54 +897,55 @@
 		ret = net_init_tcp_sock(inode);
 		if (!(ret == 0 || ret == -EEXIST)) {
 			netprintk0("failed to create socket!");
-			return -EINVAL;
+			ret = -EINVAL;
+			goto done;
 		}
 		spin_lock(&net->sock_lock); 
 	}
 	spin_unlock(&net->sock_lock); 
 	
 	handler = net_lookup_handler(msg_type, key);
-	
 	if (!handler) {
 		netprintk("no such message type: %u/%u\n", msg_type, key);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto done;
 	}
 
-	if (!net_handler_msg_len_ok(handler, len)) {
+	for(i = 0; i < caller_iovlen; i++)
+		caller_bytes += caller_iov[i].iov_len;
+
+	if (!net_handler_msg_len_ok(handler, caller_bytes)) {
 		netprintk("len for message type %u incorrect: %u, should be %u\n", 
-		       msg_type, len, handler->max_len);
+		       msg_type, caller_bytes, handler->max_len);
 		ret = -EINVAL;
 		goto done;
 	}
-	packet_len = len + sizeof(net_msg);
-	msg = kmalloc(packet_len, GFP_KERNEL);
+
+	/* build up our iovec */
+	iovlen = caller_iovlen + 1;
+	iov = kmalloc(sizeof(struct iovec) * iovlen, GFP_KERNEL);
+	if (iov == NULL) {
+		netprintk("failed to %zu element iovec!\n", iovlen);
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	msg = kmalloc(sizeof(net_msg), GFP_KERNEL);
 	if (!msg) {
-		netprintk("failed to allocate %u bytes for message!\n", packet_len);
+		netprintk("failed to allocate a net_msg!\n");
 		ret = -ENOMEM;
 		goto done;
 	}
-	memset(msg, 0, packet_len);
+	memset(msg, 0, sizeof(net_msg));
 	msg->magic = NET_MSG_MAGIC;
-	msg->data_len = len;
+	msg->data_len = caller_bytes;
 	msg->msg_type = msg_type;
 	msg->key = key;
 	msg->msg_num = net_next_msg_num();
 
-	if (len > 0) {
-		int tmplen = len;
-		dest = &(msg->buf[0]);
-		for (i=0; i<arrlen; i++) {
-			src = arr[i].ptr;
-			if (arr[i].bytes > tmplen) {
-				netprintk0("data array is too large!\n");
-				kfree(msg);
-				return -EINVAL;
-			}
-			memcpy(dest, src, arr[i].bytes);
-			tmplen -= arr[i].bytes;
-			dest += arr[i].bytes;
-		}
-	}
+	iov[0].iov_len = sizeof(net_msg);
+	iov[0].iov_base = msg;
+	memcpy(&iov[1], caller_iov, caller_iovlen * sizeof(struct iovec));
 
 	/* does the caller want to wait for a simple status? */
 	if (status) {
@@ -1113,7 +974,8 @@
 
 	/* finally, convert the message header to network byte-order and send */
 	net_msg_to_net(msg);
-	ret = net_send_tcp_msg(inode, NULL, msg, packet_len);
+	ret = net_send_tcp_msg(inode, NULL, iov, iovlen,
+			       sizeof(net_msg) + caller_bytes);
 
 	if (status) {
 		if (ret >= 0) {
@@ -1142,16 +1004,65 @@
 done:
 	if (handler)
 		net_put_handler(handler);
+	if (iov)
+		kfree(iov);
 	if (msg)
 		kfree(msg);
 	return ret;
 }
-EXPORT_SYMBOL(net_send_message_arr);
+EXPORT_SYMBOL(net_send_message_iov);
 
+/*
+ * net_send_message
+ *
+ *   - this is probably the function you are looking for
+ *   - it will package up the message for you, verifying that
+ *       the message handler is there and the length is ok,
+ *       connect to the other node if there is not already a
+ *       socket for it, and optionally wait on a status return
+ *       from the other node 
+ *   - all you need prior to this call is to have inited the
+ *       net stuff, to have a valid inode for the node to contact 
+ *       in nm, and to have registered the message handler
+ *   - if status was requested, it will be returned to the caller
+ *       already converted to host byteorder
+ */
+int net_send_message(u32 msg_type, u32 key, void *data, u32 len,
+		     struct inode *inode, int *status)
+{
+	struct iovec iov = {
+		.iov_base = data,
+		.iov_len = len,
+	};
+	return net_send_message_iov(msg_type, key, &iov, 1, inode, status);
+}
+EXPORT_SYMBOL(net_send_message);
 
 
+static int net_send_status_magic(struct inode *inode, struct socket *sock,
+			         net_msg *hdr, int err)
+{
+	struct iovec iov = {
+		.iov_base = hdr,
+		.iov_len = sizeof(net_msg),
+	};
+	       
+#ifdef BIG_NET_MSG
+	{
+		u16 n = hdr->src_node;
+		hdr->src_node = hdr->dst_node;
+		hdr->dst_node = n;
+	}
+#endif
+	hdr->status = err;
+	hdr->magic = NET_MSG_STATUS_MAGIC;  // twiddle the magic
+	hdr->data_len = 0;
+	netprintk("about to send status %d\n", err);
 
-
+	/* hdr has been in host byteorder this whole time */
+	net_msg_to_net(hdr);
+	return net_send_tcp_msg(inode, sock, &iov, 1, sizeof(net_msg));
+}
 /*
  * net_receive: receive from and dispatch all sockets with data pending
  */
@@ -1247,19 +1158,7 @@
 
 		/* if node has requested status return, do it now */
 		if (hdr.status) {
-#ifdef BIG_NET_MSG
-			u16 n = hdr.src_node;
-			hdr.src_node = hdr.dst_node;
-			hdr.dst_node = n;
-#endif
-			hdr.status = err;
-			hdr.magic = NET_MSG_STATUS_MAGIC;  // twiddle the magic
-			hdr.data_len = 0;
-			netprintk("about to send status %d\n", err);
-
-			/* hdr has been in host byteorder this whole time */
-			net_msg_to_net(&hdr);
-			tmperr = net_send_tcp_msg(inode, sock, &hdr, sizeof(net_msg));
+			tmperr = net_send_status_magic(inode, sock, &hdr, err);
 			netprintk0("yay, sent!\n");
 		} else if (err < 0) {
 			netprintk("dispatch (%u/%u) returned %d\n",

Modified: trunk/cluster/tcp.h
===================================================================
--- trunk/cluster/tcp.h	2005-01-13 20:01:47 UTC (rev 1766)
+++ trunk/cluster/tcp.h	2005-01-13 21:20:28 UTC (rev 1767)
@@ -233,20 +233,14 @@
 	NET_DRIVER_READY,
 };
 
-typedef struct _net_data
-{
-	int bytes;
-	void *ptr;
-} net_data;
-
 int net_register_handler(u32 msg_type, u32 key, int flags, 
 			 u32 max_len, net_msg_handler_func *func, void *data, void *buf);
 int net_recv_tcp_msg (struct inode *inode, struct socket *sock, void *data, u32 *packet_len);
-int net_send_tcp_msg (struct inode *inode, struct socket *sock, void *data, u32 packet_len);
 int net_send_error(struct socket *sock, u32 err_type);
 int net_init_tcp_sock(struct inode *inode);
 int net_send_message(u32 msg_type, u32 key, void *data, u32 len, struct inode *inode, int *status);
-int net_send_message_arr(u32 msg_type, u32 key, int arrlen, net_data *arr, u32 len, struct inode *inode, int *status);
+int net_send_message_iov(u32 msg_type, u32 key, struct iovec *iov,
+			 size_t iovlen, struct inode *inode, int *status);
 int net_broadcast_message(u32 msg_type, u32 key, void *data, u32 len, struct inode *group);
 net_msg_handler * net_lookup_handler(u32 msg_type, u32 key);
 



More information about the Ocfs2-commits mailing list