[Ocfs2-tools-devel] [PATCH] ocfs2_controld, debugfs.ocfs2: Dump the daemon debug buffer via debugfs.ocfs2.

Joel Becker Joel.Becker at ORACLE.COM
Thu Aug 14 12:27:48 PDT 2008


ocfs2_controld maintains a circular buffer of debug messages.  Until
now, the only way to see it was to run in the foreground via the -D
switch.  That's annoying to have to set up, and not useful in a
production environment.

We introduce the DUMP message to the controld protocol.  This message
requests the entire debug buffer.  ocfs2_controld sends the buffer as a
LIST reply.  Each ITEM in the list is one OCFS2_CONTROLD_MAXLINE packet.
Thus, the ITEMCOUNT is (roughly) "size of dump buffer /
OCFS2_CONTROLD_MAXLINE".

libo2cb gains the o2cb_control_daemon_debug() API.  This function sends
the DUMP message, receives the list, and returns a single string with
the entire buffer concatenated back together.

debugfs.ocfs2 gains the "controld" command.  Currently it only has one
sub-operation, "dump".  This calls o2cb_control_daemon_debug(), then
prints the results via the pager.

Signed-off-by: Joel Becker <joel.becker at oracle.com>
---
 debugfs.ocfs2/commands.c         |   44 ++++++++++++-
 debugfs.ocfs2/main.c             |    5 +-
 include/o2cb/o2cb.h              |    2 +
 include/o2cb/o2cb_client_proto.h |    1 +
 libo2cb/client_proto.c           |    1 +
 libo2cb/o2cb_abi.c               |   77 +++++++++++++++++++++
 ocfs2_controld/main.c            |  136 +++++++++++++++++++++----------------
 7 files changed, 205 insertions(+), 61 deletions(-)

diff --git a/debugfs.ocfs2/commands.c b/debugfs.ocfs2/commands.c
index 0f67b99..83cb5b6 100644
--- a/debugfs.ocfs2/commands.c
+++ b/debugfs.ocfs2/commands.c
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
  * commands.c
  *
  * handles debugfs commands
@@ -70,6 +72,7 @@ static void do_fs_locks (char **args);
 static void do_bmap (char **args);
 static void do_icheck (char **args);
 static void do_dlm_locks (char **args);
+static void do_controld(char **args);
 
 dbgfs_gbls gbls;
 
@@ -79,6 +82,7 @@ static Command commands[] = {
 	{ "cd",		do_cd },
 	{ "chroot",	do_chroot },
 	{ "close",	do_close },
+	{ "controld",   do_controld },
 	{ "curdev",	do_curdev },
 	{ "dlm_locks",	do_dlm_locks },
 	{ "dump",	do_dump },
@@ -824,6 +828,7 @@ static void do_help (char **args)
 	printf ("cd <filespec>\t\t\t\tChange directory\n");
 	printf ("chroot <filespec>\t\t\tChange root\n");
 	printf ("close\t\t\t\t\tClose a device\n");
+	printf ("controld dump\t\t\tObtain information from ocfs2_controld\n");
 	printf ("curdev\t\t\t\t\tShow current device\n");
 	printf ("decode <lockname#> ...\t\t\tDecode block#(s) from the lockname(s)\n");
 	printf ("dlm_locks [-l] lockname\t\t\tShow live dlm locking state\n");
@@ -891,6 +896,43 @@ static void do_lcd (char **args)
 }
 
 /*
+ * do_controld_dump()
+ *
+ */
+static void do_controld_dump(char **args)
+{
+	FILE *out;
+	errcode_t ret;
+	char *debug_buffer;
+
+	ret = o2cb_control_daemon_debug(&debug_buffer);
+	if (ret) {
+		com_err(args[0], ret, "while obtaining the debug buffer");
+		return;
+	}
+
+	out = open_pager(gbls.interactive);
+	fprintf(out, "%s", debug_buffer);
+	close_pager(out);
+	free(debug_buffer);
+}
+
+/*
+ * do_controld()
+ *
+ */
+static void do_controld(char **args)
+{
+	if (!args[1])
+		fprintf(stderr, "%s: Operation required\n", args[0]);
+	else if (!strcmp(args[1], "dump"))
+		do_controld_dump(args);
+	else
+		fprintf(stderr, "%s: Invalid operation: \"%s\"\n",
+			args[0], args[1]);
+}
+
+/*
  * do_curdev()
  *
  */
diff --git a/debugfs.ocfs2/main.c b/debugfs.ocfs2/main.c
index 3588726..722b339 100644
--- a/debugfs.ocfs2/main.c
+++ b/debugfs.ocfs2/main.c
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
  * main.c
  *
  * entry point for debugfs.ocfs2
@@ -430,6 +432,7 @@ int main (int argc, char **argv)
 	dbgfs_opts opts;
 	FILE *cmd = NULL;
 
+	initialize_o2cb_error_table();
 	initialize_ocfs_error_table();
 
 #define INSTALL_SIGNAL(sig)					\
diff --git a/include/o2cb/o2cb.h b/include/o2cb/o2cb.h
index d987d20..688817a 100644
--- a/include/o2cb/o2cb.h
+++ b/include/o2cb/o2cb.h
@@ -68,6 +68,8 @@ void o2cb_free_cluster_list(char **clusters);
 errcode_t o2cb_list_nodes(char *cluster_name, char ***nodes);
 void o2cb_free_nodes_list(char **nodes);
 
+errcode_t o2cb_control_daemon_debug(char **debug);
+
 struct o2cb_cluster_desc {
 	char *c_stack;		/* The cluster stack, NULL for classic */
 	char *c_cluster;	/* The name of the cluster, NULL for the
diff --git a/include/o2cb/o2cb_client_proto.h b/include/o2cb/o2cb_client_proto.h
index b459223..99e379e 100644
--- a/include/o2cb/o2cb_client_proto.h
+++ b/include/o2cb/o2cb_client_proto.h
@@ -42,6 +42,7 @@ typedef enum {
 	CM_LISTCLUSTERS,
 	CM_ITEMCOUNT,
 	CM_ITEM,
+	CM_DUMP,
 } client_message;
 
 int client_listen(const char *path);
diff --git a/libo2cb/client_proto.c b/libo2cb/client_proto.c
index 0811010..0ebb934 100644
--- a/libo2cb/client_proto.c
+++ b/libo2cb/client_proto.c
@@ -45,6 +45,7 @@ DEFINE_MESSAGE(LISTMOUNTS, 2, "%s %s")
 DEFINE_MESSAGE(LISTCLUSTERS, 0, "")
 DEFINE_MESSAGE(ITEMCOUNT, 1, "%u")
 DEFINE_MESSAGE(ITEM, 1, "%s")
+DEFINE_MESSAGE(DUMP, 0, "")
 END_MESSAGES(message_list)
 
 const char *message_to_string(client_message message)
diff --git a/libo2cb/o2cb_abi.c b/libo2cb/o2cb_abi.c
index f7f0148..7ebfa72 100644
--- a/libo2cb/o2cb_abi.c
+++ b/libo2cb/o2cb_abi.c
@@ -1921,6 +1921,83 @@ void o2cb_free_nodes_list(char **nodes)
 	o2cb_free_dir_list(nodes);
 }
 
+static errcode_t dump_list_to_string(char **dump_list, char **debug)
+{
+	int i;
+	size_t len, count = 0;
+	char *ptr;
+
+	for (i = 0; dump_list[i]; i++)
+		count += strlen(dump_list[i]);
+
+	*debug = malloc(sizeof(char) * (count + 1));
+	if (!*debug)
+		return O2CB_ET_NO_MEMORY;
+
+	ptr = *debug;
+	ptr[count] = '\0';
+	for (i = 0; dump_list[i]; i++) {
+		len = strlen(dump_list[i]);
+		memcpy(ptr, dump_list[i], len);
+		ptr += len;
+	}
+
+	return 0;
+}
+
+errcode_t o2cb_control_daemon_debug(char **debug)
+{
+	errcode_t err = O2CB_ET_SERVICE_UNAVAILABLE;
+	int rc, fd = -1;
+	char buf[OCFS2_CONTROLD_MAXLINE];
+	char **dump_list = NULL;
+
+	rc = ocfs2_client_connect();
+	if (rc < 0) {
+		/* fprintf(stderr, "Unable to connect to ocfs2_controld: %s\n",
+			strerror(-rc)); */
+		switch (rc) {
+			case -EACCES:
+			case -EPERM:
+				err = O2CB_ET_PERMISSION_DENIED;
+				break;
+
+			default:
+				err = O2CB_ET_SERVICE_UNAVAILABLE;
+				break;
+		}
+		goto out;
+	}
+	fd = rc;
+
+	rc = send_message(fd, CM_DUMP);
+	if (rc) {
+		/* fprintf(stderr,
+			"Unable to send DUMP message: %s\n",
+			strerror(-rc)); */
+		err = O2CB_ET_IO;
+		goto out;
+	}
+
+	rc = receive_list(fd, buf, &dump_list);
+	if (rc) {
+		/* fprintf(stderr, "Error reading from daemon: %s\n",
+			strerror(-rc)); */
+		err = O2CB_ET_IO;
+		goto out;
+	}
+
+	err = dump_list_to_string(dump_list, debug);
+	o2cb_free_dir_list(dump_list);
+
+out:
+	if (fd != -1)
+		close(fd);
+
+	return err;
+}
+
+
 errcode_t o2cb_get_hb_thread_pid (const char *cluster_name, const char *region_name,
 			   pid_t *pid)
 {
diff --git a/ocfs2_controld/main.c b/ocfs2_controld/main.c
index c16771e..ecff86f 100644
--- a/ocfs2_controld/main.c
+++ b/ocfs2_controld/main.c
@@ -245,44 +245,6 @@ out:
 	return rc;
 }
 
-int do_read(int fd, void *buf, size_t count)
-{
-	int rv, off = 0;
-
-	while (off < count) {
-		rv = read(fd, buf + off, count - off);
-		if (rv == 0)
-			return -1;
-		if (rv == -1 && errno == EINTR)
-			continue;
-		if (rv == -1)
-			return -1;
-		off += rv;
-	}
-	return 0;
-}
-
-int do_write(int fd, void *buf, size_t count)
-{
-	int rv, off = 0;
-
- retry:
-	rv = write(fd, buf + off, count);
-	if (rv == -1 && errno == EINTR)
-		goto retry;
-	if (rv < 0) {
-		log_error("write errno %d", errno);
-		return rv;
-	}
-
-	if (rv != count) {
-		count -= rv;
-		off += rv;
-		goto retry;
-	}
-	return 0;
-}
-
 static int do_mount(int ci, int fd, const char *fstype, const char *uuid,
 		    const char *cluster, const char *device,
 		    const char *service)
@@ -406,18 +368,84 @@ int connection_add(int fd, void (*work)(int ci), void (*dead)(int ci))
 	return -ELOOP;
 }
 
-static int dump_debug(int ci)
+/* 4 characters for "ITEM", 1 for the space, 1 for the '\0' */
+#define DEBUG_BYTES_PER_ITEM	(OCFS2_CONTROLD_MAXLINE - 6)
+static size_t debug_bytes_to_count(size_t bytes)
 {
-	int len = DUMP_SIZE;
+	return (bytes + DEBUG_BYTES_PER_ITEM - 1) / DEBUG_BYTES_PER_ITEM;
+}
+
+static int send_debug(int fd, const char *ptr, size_t bytes)
+{
+	int rc = 0;
+	size_t remain = bytes;
+	size_t itemlen = DEBUG_BYTES_PER_ITEM;
+	char itembuf[DEBUG_BYTES_PER_ITEM + 1];
+
+	while (remain) {
+		if (itemlen > remain)
+			itemlen = remain;
+		memcpy(itembuf, ptr, itemlen);
+		itembuf[itemlen] = '\0';
+		rc = send_message(fd, CM_ITEM, itembuf);
+		if (rc)
+			break;
+
+		ptr += itemlen;
+		remain -= itemlen;
+	}
+
+	return rc;
+}
+
+static int dump_debug(int ci, int fd)
+{
+	int rc, rctmp;
+	char error_msg[100];  /* Arbitrary size smaller than a message */
+	int count = 0;
+
+	if (dump_wrap)
+		count += debug_bytes_to_count(DUMP_SIZE - dump_point);
+	count += debug_bytes_to_count(dump_point);
+
+	rc = send_message(fd, CM_ITEMCOUNT, count);
+	if (rc) {
+		snprintf(error_msg, sizeof(error_msg),
+			 "Unable to send ITEMCOUNT: %s",
+			 strerror(-rc));
+		goto out_status;
+	}
 
 	if (dump_wrap) {
-		len = DUMP_SIZE - dump_point;
-		do_write(client[ci].fd, dump_buf + dump_point, len);
-		len = dump_point;
+		rc = send_debug(fd, dump_buf + dump_point,
+				DUMP_SIZE - dump_point);
+		if (rc) {
+			snprintf(error_msg, sizeof(error_msg),
+				 "Unable to send ITEM: %s",
+				 strerror(-rc));
+			goto out_status;
+		}
+	}
+	rc = send_debug(fd, dump_buf, dump_point);
+	if (rc) {
+		snprintf(error_msg, sizeof(error_msg),
+			 "Unable to send ITEM: %s",
+			 strerror(-rc));
+		goto out_status;
 	}
 
-	do_write(client[ci].fd, dump_buf, len);
-	return 0;
+	strcpy(error_msg, "OK");
+
+out_status:
+	rctmp = send_message(fd, CM_STATUS, -rc, error_msg);
+	if (rctmp) {
+		log_error("Error sending STATUS message: %s",
+			  strerror(-rc));
+		if (!rc)
+			rc = rctmp;
+	}
+
+	return rc;
 }
 
 static int send_filesystems(int ci, int fd, const char *fstype,
@@ -540,6 +568,10 @@ static void process_client(int ci)
 		rv = send_filesystems(ci, fd, argv[0], argv[1]);
 		break;
 
+		case CM_DUMP:
+		rv = dump_debug(ci, fd);
+		break;
+
 		case CM_STATUS:
 		log_error("Someone sent us cm_status!");
 		break;
@@ -548,20 +580,6 @@ static void process_client(int ci)
 		log_error("Invalid message received");
 		break;
 	}
-#if 0
-	if (daemon_debug_opt)
-		dump_state();
-#endif
-
-#if 0
-	} else if (!strcmp(cmd, "dump")) {
-		dump_debug(ci);
-
-	} else {
-		rv = -EINVAL;
-		goto reply;
-	}
-#endif
 
 	return;
 }
-- 
1.5.6.3


-- 

"Glory is fleeting, but obscurity is forever."  
         - Napoleon Bonaparte

Joel Becker
Principal Software Developer
Oracle
E-mail: joel.becker at oracle.com
Phone: (650) 506-8127



More information about the Ocfs2-tools-devel mailing list