[rds-commits] zab commits r97 - trunk/linux/net/rds
svn-commits@oss.oracle.com
svn-commits at oss.oracle.com
Thu Jun 1 19:39:27 CDT 2006
Author: zab
Date: 2006-06-01 19:39:24 -0500 (Thu, 01 Jun 2006)
New Revision: 97
Added:
trunk/linux/net/rds/stats.c
Modified:
trunk/linux/net/rds/Makefile
trunk/linux/net/rds/ack.c
trunk/linux/net/rds/af_rds.c
trunk/linux/net/rds/connection.c
trunk/linux/net/rds/flow.c
trunk/linux/net/rds/rds.h
trunk/linux/net/rds/recv.c
trunk/linux/net/rds/send.c
Log:
Add simple global stat gathering and reporting.
Modified: trunk/linux/net/rds/Makefile
===================================================================
--- trunk/linux/net/rds/Makefile 2006-06-01 21:47:13 UTC (rev 96)
+++ trunk/linux/net/rds/Makefile 2006-06-02 00:39:24 UTC (rev 97)
@@ -1,6 +1,6 @@
obj-$(CONFIG_RDS) += rds.o
rds-y := af_rds.o ack.o bind.o connection.o flow.o message.o \
- recv.o send.o sysctl.o transport.o \
+ recv.o send.o stats.o sysctl.o transport.o \
tcp.o tcp_connect.o tcp_listen.o tcp_send.o tcp_recv.o \
loop.o
Modified: trunk/linux/net/rds/ack.c
===================================================================
--- trunk/linux/net/rds/ack.c 2006-06-01 21:47:13 UTC (rev 96)
+++ trunk/linux/net/rds/ack.c 2006-06-02 00:39:24 UTC (rev 97)
@@ -86,9 +86,11 @@
/* if we found it then we just update the current entry */
if (low <= high) {
ent->e_seq = seq;
+ rds_stats_inc(s_ack_entry_hit);
goto out;
}
+ rds_stats_inc(s_ack_entry_miss);
memmove(ent, ent + 1, (nr - i) * sizeof(struct rds_ack_entry));
*ent = key;
rm->m_len += sizeof(struct rds_ack_entry);
@@ -137,6 +139,7 @@
my_rm = rds_message_alloc_ack(gfp, km);
if (my_rm == NULL) {
/* XXX tunable retry? */
+ rds_stats_inc(s_ack_alloc_fail);
conn->c_trans->send_trigger(conn);
goto out;
}
@@ -159,6 +162,7 @@
inc->i_hdr.h_sequence, km);
if (rm->m_len == RDS_ACKS_PER_FRAG) {
+ rds_stats_inc(s_ack_message_full);
rds_ack_send(conn, km);
fastpending = 0;
} else if (inc->i_hdr.h_flags & RDS_HEAD_FLAG_FASTACK)
@@ -169,9 +173,15 @@
}
- if (conn->c_ack_rm &&
- (fastpending || time_after_eq(jiffies, conn->c_ack_deadline)))
- rds_ack_send(conn, km);
+ if (conn->c_ack_rm) {
+ if (fastpending) {
+ rds_stats_inc(s_ack_message_fast);
+ rds_ack_send(conn, km);
+ } else if (time_after_eq(jiffies, conn->c_ack_deadline)) {
+ rds_stats_inc(s_ack_message_deadline);
+ rds_ack_send(conn, km);
+ }
+ }
spin_unlock_irqrestore(&conn->c_lock, flags);
out:
@@ -218,6 +228,8 @@
u16 nr;
pr_debug("got an ack\n");
+ rds_stats_inc(s_ack_message_received);
+
nr = be16_to_cpu(inc->i_hdr.h_len) / sizeof(struct rds_ack_entry);
if (nr)
conn->c_trans->inc_process_acks(conn, inc, nr);
Modified: trunk/linux/net/rds/af_rds.c
===================================================================
--- trunk/linux/net/rds/af_rds.c 2006-06-01 21:47:13 UTC (rev 96)
+++ trunk/linux/net/rds/af_rds.c 2006-06-02 00:39:24 UTC (rev 97)
@@ -206,6 +206,27 @@
return ret;
}
+static int rds_getsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ int ret = -ENOPROTOOPT;
+
+ if (level != SOL_RDS)
+ goto out;
+
+ switch(optname) {
+ case RDS_STATS_GLOBAL:
+ ret = rds_stats_get(optname, optval, optlen);
+ break;
+ default:
+ break;
+ }
+
+out:
+ return ret;
+
+}
+
#ifdef KERNEL_HAS_PROTO_REGISTER
static struct proto rds_proto = {
.name = "RDS",
@@ -228,7 +249,7 @@
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = rds_setsockopt,
- .getsockopt = sock_no_getsockopt,
+ .getsockopt = rds_getsockopt,
.sendmsg = rds_sendmsg,
.recvmsg = rds_recvmsg,
.mmap = sock_no_mmap,
Modified: trunk/linux/net/rds/connection.c
===================================================================
--- trunk/linux/net/rds/connection.c 2006-06-01 21:47:13 UTC (rev 96)
+++ trunk/linux/net/rds/connection.c 2006-06-02 00:39:24 UTC (rev 97)
@@ -26,7 +26,6 @@
#include "rds.h"
#include "loop.h"
-/* XXX export some stats for this */
#define RDS_CONNECTION_HASH_BITS 12
#define RDS_CONNECTION_HASH_ENTRIES (1 << RDS_CONNECTION_HASH_BITS)
#define RDS_CONNECTION_HASH_MASK (RDS_CONNECTION_HASH_ENTRIES - 1)
@@ -84,6 +83,8 @@
{
unsigned long flags;
+ rds_stats_inc(s_conn_reset);
+
spin_lock_irqsave(&conn->c_lock, flags);
rds_send_reset_vec(conn);
Modified: trunk/linux/net/rds/flow.c
===================================================================
--- trunk/linux/net/rds/flow.c 2006-06-01 21:47:13 UTC (rev 96)
+++ trunk/linux/net/rds/flow.c 2006-06-02 00:39:24 UTC (rev 97)
@@ -24,7 +24,6 @@
#include "rds.h"
-/* XXX export some stats for this */
#define RDS_FLOW_HASH_BITS 12
#define RDS_FLOW_HASH_ENTRIES (1 << RDS_FLOW_HASH_BITS)
#define RDS_FLOW_HASH_MASK (RDS_FLOW_HASH_ENTRIES - 1)
Modified: trunk/linux/net/rds/rds.h
===================================================================
--- trunk/linux/net/rds/rds.h 2006-06-01 21:47:13 UTC (rev 96)
+++ trunk/linux/net/rds/rds.h 2006-06-02 00:39:24 UTC (rev 97)
@@ -19,6 +19,7 @@
#define RDS_CANCEL_SENT_TO 1
#define RDS_SNDBUF 2
+#define RDS_STATS_GLOBAL 3
#define RDS_FRAG_SHIFT 12
#define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT))
@@ -198,6 +199,25 @@
}
#endif /* KERNEL_HAS_PROTO_REGISTER */
+struct rds_statistics {
+ unsigned long s_ack_entry_hit;
+ unsigned long s_ack_entry_miss;
+ unsigned long s_ack_message_full;
+ unsigned long s_ack_message_fast;
+ unsigned long s_ack_message_deadline;
+ unsigned long s_ack_message_received;
+ unsigned long s_ack_alloc_fail;
+ unsigned long s_conn_reset;
+ unsigned long s_recv_drop_old_seq;
+ unsigned long s_recv_drop_orphan_frag;
+ unsigned long s_recv_drop_no_sock;
+ unsigned long s_recv_drop_dead_sock;
+ unsigned long s_recv_deliver_raced;
+ unsigned long s_recv_delivered;
+ unsigned long s_recv_queued;
+ unsigned long s_send_queue_full;
+};
+
/* ack.c */
void rds_ack_build(struct rds_connection *conn, gfp_t gfp, enum km_type km);
void rds_ack_timer(unsigned long data);
@@ -268,6 +288,14 @@
void rds_send_put_next_message(struct rds_connection *conn,
struct rds_message *rm, int complete);
+/* stats.c */
+DECLARE_PER_CPU(struct rds_statistics, rds_stats);
+#define rds_stats_inc(member) do { \
+ per_cpu(rds_stats, get_cpu()).member++; \
+ put_cpu(); \
+} while (0)
+int rds_stats_get(int optname, char __user *optval, int __user *optlen);
+
/* sysctl.c */
int __init rds_sysctl_init(void);
void __exit rds_sysctl_exit(void);
Modified: trunk/linux/net/rds/recv.c
===================================================================
--- trunk/linux/net/rds/recv.c 2006-06-01 21:47:13 UTC (rev 96)
+++ trunk/linux/net/rds/recv.c 2006-06-02 00:39:24 UTC (rev 97)
@@ -115,6 +115,7 @@
conn->c_next_rx_seq);
if (be64_to_cpu(inc->i_hdr.h_sequence) < conn->c_next_rx_seq) {
ack = 1;
+ rds_stats_inc(s_recv_drop_old_seq);
goto out;
}
conn->c_next_rx_seq = be64_to_cpu(inc->i_hdr.h_sequence) + 1;
@@ -127,6 +128,7 @@
*/
if (is_frag && !conn->c_rx_inc) {
ack = 1;
+ rds_stats_inc(s_recv_drop_orphan_frag);
goto out;
}
@@ -145,6 +147,7 @@
rs = rds_find_bound(daddr, inc->i_hdr.h_dport);
if (rs == NULL) {
ack = 1;
+ rds_stats_inc(s_recv_drop_no_sock);
goto out;
}
rds_inc_addref(inc);
@@ -167,9 +170,11 @@
write_lock_irqsave(&sk->sk_callback_lock, flags);
if (!sock_flag(sk, SOCK_DEAD)) {
pr_debug("adding inc %p to rs %p's recv queue\n", inc, rs);
+ rds_stats_inc(s_recv_queued);
list_add_tail(&inc->i_item, &rs->rs_recv_queue);
wake_up(sk->sk_sleep);
} else {
+ rds_stats_inc(s_recv_drop_dead_sock);
rds_inc_put(conn->c_rx_inc);
ack = 1;
}
@@ -281,10 +286,12 @@
if (!rds_still_queued(rs, inc, !(msg_flags & MSG_PEEK))) {
rds_inc_put(inc);
inc = NULL;
- /* XXX inc some stat */;
+ rds_stats_inc(s_recv_deliver_raced);
continue;
}
+ rds_stats_inc(s_recv_delivered);
+
sin = (struct sockaddr_in *)msg->msg_name;
if (sin) {
sin->sin_family = AF_INET;
Modified: trunk/linux/net/rds/send.c
===================================================================
--- trunk/linux/net/rds/send.c 2006-06-01 21:47:13 UTC (rev 96)
+++ trunk/linux/net/rds/send.c 2006-06-02 00:39:24 UTC (rev 97)
@@ -345,6 +345,7 @@
while (!rds_send_queue_rm(flow, rm, nr_frags, rs->rs_bound_port,
usin->sin_port, &queued)) {
+ rds_stats_inc(s_send_queue_full);
/* XXX make sure this is reasonable */
if (payload_len > rs->rs_sndbuf) {
ret = -EMSGSIZE;
Added: trunk/linux/net/rds/stats.c
===================================================================
--- trunk/linux/net/rds/stats.c 2006-06-01 21:47:13 UTC (rev 96)
+++ trunk/linux/net/rds/stats.c 2006-06-02 00:39:24 UTC (rev 97)
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2006 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+
+#include "rds.h"
+
+DEFINE_PER_CPU(struct rds_statistics, rds_stats) ____cacheline_aligned;
+
+/*
+ * The user of the stats interface sees RDS_ sockopts which export an array of
+ * u64s. The interface is made back and forwards compatible by only copying as
+ * much of the struct as the user asked for in optlen and returning the amount
+ * copied. New stats are only appended.
+ *
+ * Stats are stored as unsigned longs per-cpu to avoid overhead. The rare
+ * inconsistencies that come from a lack of locking is ok. For now we'll be
+ * surprising as the longs wrap and the u64 sums jump around. If someone
+ * notices we could fix that by suming as stats are incremented and get
+ * close to wrapping.
+ *
+ * Summing a cacheline at a time is a compromise between avoiding allocating an
+ * entire stats struct and avoiding the worst case of hitting each cpu for each
+ * stat.
+ */
+int rds_stats_get(int optname, char __user *optval, int __user *optlen)
+{
+ /* scary, but we're high in the stack */
+ u64 dest[L1_CACHE_BYTES / sizeof(u64)];
+ size_t i, cur, elements;
+ int len, ret, cpu, bytes_to_copy, vals_to_copy, copied;
+ unsigned long *src;
+
+ if (get_user(len, optlen)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ if (len < 0) {
+ ret = -ERANGE;
+ goto out;
+ }
+
+ switch(optname) {
+ case RDS_STATS_GLOBAL:
+ elements = sizeof(struct rds_statistics) /
+ sizeof(unsigned long);
+ break;
+ }
+
+ cur = 0;
+ copied = 0;
+
+ while (copied < len && cur < elements) {
+ /* first sum a block of stats from the cpus */
+ vals_to_copy = min_t(int, ARRAY_SIZE(dest), elements - cur);
+ memset(dest, 0, sizeof(dest));
+ for_each_online_cpu(cpu) {
+ src = (unsigned long *)&(per_cpu(rds_stats, cpu));
+ for (i = 0; i < vals_to_copy; i++)
+ dest[i] += src[cur + i];
+ }
+ cur += vals_to_copy;
+
+ /* then copy the sums to the user */
+ bytes_to_copy = min_t(int, len - copied,
+ vals_to_copy * sizeof(u64));
+ if (copy_to_user(optval + copied, dest, bytes_to_copy)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ copied += bytes_to_copy;
+ }
+
+ if (put_user(copied, optlen))
+ ret = -EFAULT;
+ else
+ ret = 0;
+out:
+ return ret;
+}
More information about the rds-commits
mailing list