[rds-commits] zab commits r97 - trunk/linux/net/rds

svn-commits@oss.oracle.com svn-commits at oss.oracle.com
Thu Jun 1 19:39:27 CDT 2006


Author: zab
Date: 2006-06-01 19:39:24 -0500 (Thu, 01 Jun 2006)
New Revision: 97

Added:
   trunk/linux/net/rds/stats.c
Modified:
   trunk/linux/net/rds/Makefile
   trunk/linux/net/rds/ack.c
   trunk/linux/net/rds/af_rds.c
   trunk/linux/net/rds/connection.c
   trunk/linux/net/rds/flow.c
   trunk/linux/net/rds/rds.h
   trunk/linux/net/rds/recv.c
   trunk/linux/net/rds/send.c
Log:
Add simple global stat gathering and reporting.


Modified: trunk/linux/net/rds/Makefile
===================================================================
--- trunk/linux/net/rds/Makefile	2006-06-01 21:47:13 UTC (rev 96)
+++ trunk/linux/net/rds/Makefile	2006-06-02 00:39:24 UTC (rev 97)
@@ -1,6 +1,6 @@
 obj-$(CONFIG_RDS) += rds.o
  
 rds-y :=	af_rds.o ack.o bind.o connection.o flow.o message.o	\
-			recv.o send.o sysctl.o transport.o		\
+			recv.o send.o stats.o sysctl.o transport.o	\
 		tcp.o tcp_connect.o tcp_listen.o tcp_send.o tcp_recv.o	\
 		loop.o

Modified: trunk/linux/net/rds/ack.c
===================================================================
--- trunk/linux/net/rds/ack.c	2006-06-01 21:47:13 UTC (rev 96)
+++ trunk/linux/net/rds/ack.c	2006-06-02 00:39:24 UTC (rev 97)
@@ -86,9 +86,11 @@
 	/* if we found it then we just update the current entry */
 	if (low <= high) {
 		ent->e_seq = seq;
+		rds_stats_inc(s_ack_entry_hit);
 		goto out;
 	}
 
+	rds_stats_inc(s_ack_entry_miss);
 	memmove(ent, ent + 1, (nr - i) * sizeof(struct rds_ack_entry));
 	*ent = key;
 	rm->m_len += sizeof(struct rds_ack_entry);
@@ -137,6 +139,7 @@
 				my_rm = rds_message_alloc_ack(gfp, km);
 				if (my_rm == NULL) {
 					/* XXX tunable retry? */
+					rds_stats_inc(s_ack_alloc_fail);
 					conn->c_trans->send_trigger(conn);
 					goto out;
 				}
@@ -159,6 +162,7 @@
 				     inc->i_hdr.h_sequence, km);
 
 		if (rm->m_len == RDS_ACKS_PER_FRAG) {
+			rds_stats_inc(s_ack_message_full);
 			rds_ack_send(conn, km);
 			fastpending = 0;
 		} else if (inc->i_hdr.h_flags & RDS_HEAD_FLAG_FASTACK)
@@ -169,9 +173,15 @@
 
 	}
 
-	if (conn->c_ack_rm && 
-	    (fastpending || time_after_eq(jiffies, conn->c_ack_deadline)))
-		rds_ack_send(conn, km);
+	if (conn->c_ack_rm) {
+		if (fastpending) {
+			rds_stats_inc(s_ack_message_fast);
+			rds_ack_send(conn, km);
+		} else if (time_after_eq(jiffies, conn->c_ack_deadline)) {
+			rds_stats_inc(s_ack_message_deadline);
+			rds_ack_send(conn, km);
+		}
+	}
 
 	spin_unlock_irqrestore(&conn->c_lock, flags);
 out:
@@ -218,6 +228,8 @@
 	u16 nr;
 	pr_debug("got an ack\n");
 
+	rds_stats_inc(s_ack_message_received);
+
 	nr = be16_to_cpu(inc->i_hdr.h_len) / sizeof(struct rds_ack_entry);
 	if (nr)
 		conn->c_trans->inc_process_acks(conn, inc, nr);

Modified: trunk/linux/net/rds/af_rds.c
===================================================================
--- trunk/linux/net/rds/af_rds.c	2006-06-01 21:47:13 UTC (rev 96)
+++ trunk/linux/net/rds/af_rds.c	2006-06-02 00:39:24 UTC (rev 97)
@@ -206,6 +206,27 @@
 	return ret;
 }
 
+static int rds_getsockopt(struct socket *sock, int level, int optname,
+			  char __user *optval, int __user *optlen)
+{
+	int ret = -ENOPROTOOPT;
+
+	if (level != SOL_RDS)
+		goto out;
+
+	switch(optname) {
+		case RDS_STATS_GLOBAL:
+			ret = rds_stats_get(optname, optval, optlen);
+			break;
+		default:
+			break;
+	}
+
+out:
+	return ret;
+
+}
+
 #ifdef KERNEL_HAS_PROTO_REGISTER
 static struct proto rds_proto = {
 	.name	  = "RDS",
@@ -228,7 +249,7 @@
 	.listen =	sock_no_listen,
 	.shutdown =	sock_no_shutdown,
 	.setsockopt =	rds_setsockopt,
-	.getsockopt =	sock_no_getsockopt,
+	.getsockopt =	rds_getsockopt,
 	.sendmsg =	rds_sendmsg,
 	.recvmsg =	rds_recvmsg,
 	.mmap =		sock_no_mmap,

Modified: trunk/linux/net/rds/connection.c
===================================================================
--- trunk/linux/net/rds/connection.c	2006-06-01 21:47:13 UTC (rev 96)
+++ trunk/linux/net/rds/connection.c	2006-06-02 00:39:24 UTC (rev 97)
@@ -26,7 +26,6 @@
 #include "rds.h"
 #include "loop.h"
 
-/* XXX export some stats for this */
 #define RDS_CONNECTION_HASH_BITS 12
 #define RDS_CONNECTION_HASH_ENTRIES (1 << RDS_CONNECTION_HASH_BITS)
 #define RDS_CONNECTION_HASH_MASK (RDS_CONNECTION_HASH_ENTRIES - 1)
@@ -84,6 +83,8 @@
 {
 	unsigned long flags;
 
+	rds_stats_inc(s_conn_reset);
+
 	spin_lock_irqsave(&conn->c_lock, flags);
 
 	rds_send_reset_vec(conn);

Modified: trunk/linux/net/rds/flow.c
===================================================================
--- trunk/linux/net/rds/flow.c	2006-06-01 21:47:13 UTC (rev 96)
+++ trunk/linux/net/rds/flow.c	2006-06-02 00:39:24 UTC (rev 97)
@@ -24,7 +24,6 @@
 
 #include "rds.h"
 
-/* XXX export some stats for this */
 #define RDS_FLOW_HASH_BITS 12
 #define RDS_FLOW_HASH_ENTRIES (1 << RDS_FLOW_HASH_BITS)
 #define RDS_FLOW_HASH_MASK (RDS_FLOW_HASH_ENTRIES - 1)

Modified: trunk/linux/net/rds/rds.h
===================================================================
--- trunk/linux/net/rds/rds.h	2006-06-01 21:47:13 UTC (rev 96)
+++ trunk/linux/net/rds/rds.h	2006-06-02 00:39:24 UTC (rev 97)
@@ -19,6 +19,7 @@
 
 #define RDS_CANCEL_SENT_TO	1
 #define RDS_SNDBUF		2
+#define RDS_STATS_GLOBAL	3
 
 #define RDS_FRAG_SHIFT	12
 #define RDS_FRAG_SIZE	((unsigned int)(1 << RDS_FRAG_SHIFT))
@@ -198,6 +199,25 @@
 }
 #endif /* KERNEL_HAS_PROTO_REGISTER */
 
+struct rds_statistics {
+	unsigned long	s_ack_entry_hit;
+	unsigned long	s_ack_entry_miss;
+	unsigned long	s_ack_message_full;
+	unsigned long	s_ack_message_fast;
+	unsigned long	s_ack_message_deadline;
+	unsigned long	s_ack_message_received;
+	unsigned long	s_ack_alloc_fail;
+	unsigned long	s_conn_reset;
+	unsigned long	s_recv_drop_old_seq;
+	unsigned long	s_recv_drop_orphan_frag;
+	unsigned long	s_recv_drop_no_sock;
+	unsigned long	s_recv_drop_dead_sock;
+	unsigned long	s_recv_deliver_raced;
+	unsigned long	s_recv_delivered;
+	unsigned long	s_recv_queued;
+	unsigned long	s_send_queue_full;
+};
+
 /* ack.c */
 void rds_ack_build(struct rds_connection *conn, gfp_t gfp, enum km_type km);
 void rds_ack_timer(unsigned long data);
@@ -268,6 +288,14 @@
 void rds_send_put_next_message(struct rds_connection *conn,
 			       struct rds_message *rm, int complete);
 
+/* stats.c */
+DECLARE_PER_CPU(struct rds_statistics, rds_stats);
+#define rds_stats_inc(member) do {			\
+	per_cpu(rds_stats, get_cpu()).member++;		\
+	put_cpu();					\
+} while (0)
+int rds_stats_get(int optname, char __user *optval, int __user *optlen);
+
 /* sysctl.c */
 int __init rds_sysctl_init(void);
 void __exit rds_sysctl_exit(void);

Modified: trunk/linux/net/rds/recv.c
===================================================================
--- trunk/linux/net/rds/recv.c	2006-06-01 21:47:13 UTC (rev 96)
+++ trunk/linux/net/rds/recv.c	2006-06-02 00:39:24 UTC (rev 97)
@@ -115,6 +115,7 @@
 		 conn->c_next_rx_seq);
 	if (be64_to_cpu(inc->i_hdr.h_sequence) < conn->c_next_rx_seq) {
 		ack = 1;
+		rds_stats_inc(s_recv_drop_old_seq);
 		goto out;
 	}
 	conn->c_next_rx_seq = be64_to_cpu(inc->i_hdr.h_sequence) + 1;
@@ -127,6 +128,7 @@
 	 */
 	if (is_frag && !conn->c_rx_inc) {
 		ack = 1;
+		rds_stats_inc(s_recv_drop_orphan_frag);
 		goto out;
 	}
 
@@ -145,6 +147,7 @@
 		rs = rds_find_bound(daddr, inc->i_hdr.h_dport);
 		if (rs == NULL) {
 			ack = 1;
+			rds_stats_inc(s_recv_drop_no_sock);
 			goto out;
 		}
 		rds_inc_addref(inc);
@@ -167,9 +170,11 @@
 	write_lock_irqsave(&sk->sk_callback_lock, flags);
 	if (!sock_flag(sk, SOCK_DEAD)) {
 		pr_debug("adding inc %p to rs %p's recv queue\n", inc, rs);
+		rds_stats_inc(s_recv_queued);
 		list_add_tail(&inc->i_item, &rs->rs_recv_queue);
 		wake_up(sk->sk_sleep);
 	} else {
+		rds_stats_inc(s_recv_drop_dead_sock);
 		rds_inc_put(conn->c_rx_inc);
 		ack = 1;
 	}
@@ -281,10 +286,12 @@
 		if (!rds_still_queued(rs, inc, !(msg_flags & MSG_PEEK))) {
 			rds_inc_put(inc);
 			inc = NULL;
-			/* XXX inc some stat */;
+			rds_stats_inc(s_recv_deliver_raced);
 			continue;
 		}
 
+		rds_stats_inc(s_recv_delivered);
+
 		sin = (struct sockaddr_in *)msg->msg_name;
 		if (sin) {
 			sin->sin_family = AF_INET;

Modified: trunk/linux/net/rds/send.c
===================================================================
--- trunk/linux/net/rds/send.c	2006-06-01 21:47:13 UTC (rev 96)
+++ trunk/linux/net/rds/send.c	2006-06-02 00:39:24 UTC (rev 97)
@@ -345,6 +345,7 @@
 
 	while (!rds_send_queue_rm(flow, rm, nr_frags, rs->rs_bound_port, 
 				  usin->sin_port, &queued)) {
+		rds_stats_inc(s_send_queue_full);
 		/* XXX make sure this is reasonable */
 		if (payload_len > rs->rs_sndbuf) {
 			ret = -EMSGSIZE;

Added: trunk/linux/net/rds/stats.c
===================================================================
--- trunk/linux/net/rds/stats.c	2006-06-01 21:47:13 UTC (rev 96)
+++ trunk/linux/net/rds/stats.c	2006-06-02 00:39:24 UTC (rev 97)
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2006 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+
+#include "rds.h"
+
+DEFINE_PER_CPU(struct rds_statistics, rds_stats) ____cacheline_aligned;
+
+/*
+ * The user of the stats interface sees RDS_ sockopts which export an array of
+ * u64s.  The interface is made back and forwards compatible by only copying as
+ * much of the struct as the user asked for in optlen and returning the amount
+ * copied.  New stats are only appended.
+ *
+ * Stats are stored as unsigned longs per-cpu to avoid overhead.  The rare
+ * inconsistencies that come from a lack of locking is ok.  For now we'll be
+ * surprising as the longs wrap and the u64 sums jump around.  If someone
+ * notices we could fix that by suming as stats are incremented and get
+ * close to wrapping.
+ *
+ * Summing a cacheline at a time is a compromise between avoiding allocating an
+ * entire stats struct and avoiding the worst case of hitting each cpu for each
+ * stat.
+ */
+int rds_stats_get(int optname, char __user *optval, int __user *optlen)
+{
+	/* scary, but we're high in the stack */
+	u64 dest[L1_CACHE_BYTES / sizeof(u64)];
+	size_t i, cur, elements;
+	int len, ret, cpu, bytes_to_copy, vals_to_copy, copied;
+	unsigned long *src;
+
+	if (get_user(len, optlen)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	if (len < 0) {
+		ret = -ERANGE;
+		goto out;
+	}
+
+	switch(optname) {
+		case RDS_STATS_GLOBAL:
+			elements = sizeof(struct rds_statistics) /
+				   sizeof(unsigned long);
+			break;
+	}
+
+	cur = 0;
+	copied = 0;
+
+	while (copied < len && cur < elements) {
+		/* first sum a block of stats from the cpus */
+		vals_to_copy = min_t(int, ARRAY_SIZE(dest), elements - cur);
+		memset(dest, 0, sizeof(dest));
+		for_each_online_cpu(cpu) {
+			src = (unsigned long *)&(per_cpu(rds_stats, cpu));
+			for (i = 0; i < vals_to_copy; i++)
+				dest[i] += src[cur + i];
+		}
+		cur += vals_to_copy;
+
+		/* then copy the sums to the user */
+		bytes_to_copy = min_t(int, len - copied, 
+				      vals_to_copy * sizeof(u64));
+		if (copy_to_user(optval + copied, dest, bytes_to_copy)) {
+			ret = -EFAULT;
+			goto out;
+		}
+		copied += bytes_to_copy;
+	}
+
+	if (put_user(copied, optlen))
+		ret = -EFAULT;
+	else
+		ret = 0;
+out:
+	return ret;
+}




More information about the rds-commits mailing list