[Ocfs2-tools-devel] [PATCH] ocfs2_controld, debugfs.ocfs2: Dump the daemon debug buffer via debugfs.ocfs2.
Sunil Mushran
Sunil.Mushran at oracle.com
Thu Aug 14 13:39:31 PDT 2008
very cool.
Joel Becker wrote:
> ocfs2_controld maintains a circular buffer of debug messages. Until
> now, the only way to see it was to run in the foreground via the -D
> switch. That's annoying to have to set up, and not useful in a
> production environment.
>
> We introduce the DUMP message to the controld protocol. This message
> requests the entire debug buffer. ocfs2_controld sends the buffer as a
> LIST reply. Each ITEM in the list is one OCFS2_CONTROLD_MAXLINE packet.
> Thus, the ITEMCOUNT is (roughly) "size of dump buffer /
> OCFS2_CONTROLD_MAXLINE".
>
> libo2cb gains the o2cb_control_daemon_debug() API. This function sends
> the DUMP message, receives the list, and returns a single string with
> the entire buffer concatenated back together.
>
> debugfs.ocfs2 gains the "controld" command. Currently it only has one
> sub-operation, "dump". This calls o2cb_control_daemon_debug(), then
> prints the results via the pager.
>
> Signed-off-by: Joel Becker <joel.becker at oracle.com>
> ---
> debugfs.ocfs2/commands.c | 44 ++++++++++++-
> debugfs.ocfs2/main.c | 5 +-
> include/o2cb/o2cb.h | 2 +
> include/o2cb/o2cb_client_proto.h | 1 +
> libo2cb/client_proto.c | 1 +
> libo2cb/o2cb_abi.c | 77 +++++++++++++++++++++
> ocfs2_controld/main.c | 136 +++++++++++++++++++++----------------
> 7 files changed, 205 insertions(+), 61 deletions(-)
>
> diff --git a/debugfs.ocfs2/commands.c b/debugfs.ocfs2/commands.c
> index 0f67b99..83cb5b6 100644
> --- a/debugfs.ocfs2/commands.c
> +++ b/debugfs.ocfs2/commands.c
> @@ -1,4 +1,6 @@
> -/*
> +/* -*- mode: c; c-basic-offset: 8; -*-
> + * vim: noexpandtab sw=8 ts=8 sts=0:
> + *
> * commands.c
> *
> * handles debugfs commands
> @@ -70,6 +72,7 @@ static void do_fs_locks (char **args);
> static void do_bmap (char **args);
> static void do_icheck (char **args);
> static void do_dlm_locks (char **args);
> +static void do_controld(char **args);
>
> dbgfs_gbls gbls;
>
> @@ -79,6 +82,7 @@ static Command commands[] = {
> { "cd", do_cd },
> { "chroot", do_chroot },
> { "close", do_close },
> + { "controld", do_controld },
> { "curdev", do_curdev },
> { "dlm_locks", do_dlm_locks },
> { "dump", do_dump },
> @@ -824,6 +828,7 @@ static void do_help (char **args)
> printf ("cd <filespec>\t\t\t\tChange directory\n");
> printf ("chroot <filespec>\t\t\tChange root\n");
> printf ("close\t\t\t\t\tClose a device\n");
> + printf ("controld dump\t\t\tObtain information from ocfs2_controld\n");
> printf ("curdev\t\t\t\t\tShow current device\n");
> printf ("decode <lockname#> ...\t\t\tDecode block#(s) from the lockname(s)\n");
> printf ("dlm_locks [-l] lockname\t\t\tShow live dlm locking state\n");
> @@ -891,6 +896,43 @@ static void do_lcd (char **args)
> }
>
> /*
> + * do_controld_dump()
> + *
> + */
> +static void do_controld_dump(char **args)
> +{
> + FILE *out;
> + errcode_t ret;
> + char *debug_buffer;
> +
> + ret = o2cb_control_daemon_debug(&debug_buffer);
> + if (ret) {
> + com_err(args[0], ret, "while obtaining the debug buffer");
> + return;
> + }
> +
> + out = open_pager(gbls.interactive);
> + fprintf(out, "%s", debug_buffer);
> + close_pager(out);
> + free(debug_buffer);
> +}
> +
> +/*
> + * do_controld()
> + *
> + */
> +static void do_controld(char **args)
> +{
> + if (!args[1])
> + fprintf(stderr, "%s: Operation required\n", args[0]);
> + else if (!strcmp(args[1], "dump"))
> + do_controld_dump(args);
> + else
> + fprintf(stderr, "%s: Invalid operation: \"%s\"\n",
> + args[0], args[1]);
> +}
> +
> +/*
> * do_curdev()
> *
> */
> diff --git a/debugfs.ocfs2/main.c b/debugfs.ocfs2/main.c
> index 3588726..722b339 100644
> --- a/debugfs.ocfs2/main.c
> +++ b/debugfs.ocfs2/main.c
> @@ -1,4 +1,6 @@
> -/*
> +/* -*- mode: c; c-basic-offset: 8; -*-
> + * vim: noexpandtab sw=8 ts=8 sts=0:
> + *
> * main.c
> *
> * entry point for debugfs.ocfs2
> @@ -430,6 +432,7 @@ int main (int argc, char **argv)
> dbgfs_opts opts;
> FILE *cmd = NULL;
>
> + initialize_o2cb_error_table();
> initialize_ocfs_error_table();
>
> #define INSTALL_SIGNAL(sig) \
> diff --git a/include/o2cb/o2cb.h b/include/o2cb/o2cb.h
> index d987d20..688817a 100644
> --- a/include/o2cb/o2cb.h
> +++ b/include/o2cb/o2cb.h
> @@ -68,6 +68,8 @@ void o2cb_free_cluster_list(char **clusters);
> errcode_t o2cb_list_nodes(char *cluster_name, char ***nodes);
> void o2cb_free_nodes_list(char **nodes);
>
> +errcode_t o2cb_control_daemon_debug(char **debug);
> +
> struct o2cb_cluster_desc {
> char *c_stack; /* The cluster stack, NULL for classic */
> char *c_cluster; /* The name of the cluster, NULL for the
> diff --git a/include/o2cb/o2cb_client_proto.h b/include/o2cb/o2cb_client_proto.h
> index b459223..99e379e 100644
> --- a/include/o2cb/o2cb_client_proto.h
> +++ b/include/o2cb/o2cb_client_proto.h
> @@ -42,6 +42,7 @@ typedef enum {
> CM_LISTCLUSTERS,
> CM_ITEMCOUNT,
> CM_ITEM,
> + CM_DUMP,
> } client_message;
>
> int client_listen(const char *path);
> diff --git a/libo2cb/client_proto.c b/libo2cb/client_proto.c
> index 0811010..0ebb934 100644
> --- a/libo2cb/client_proto.c
> +++ b/libo2cb/client_proto.c
> @@ -45,6 +45,7 @@ DEFINE_MESSAGE(LISTMOUNTS, 2, "%s %s")
> DEFINE_MESSAGE(LISTCLUSTERS, 0, "")
> DEFINE_MESSAGE(ITEMCOUNT, 1, "%u")
> DEFINE_MESSAGE(ITEM, 1, "%s")
> +DEFINE_MESSAGE(DUMP, 0, "")
> END_MESSAGES(message_list)
>
> const char *message_to_string(client_message message)
> diff --git a/libo2cb/o2cb_abi.c b/libo2cb/o2cb_abi.c
> index f7f0148..7ebfa72 100644
> --- a/libo2cb/o2cb_abi.c
> +++ b/libo2cb/o2cb_abi.c
> @@ -1921,6 +1921,83 @@ void o2cb_free_nodes_list(char **nodes)
> o2cb_free_dir_list(nodes);
> }
>
> +static errcode_t dump_list_to_string(char **dump_list, char **debug)
> +{
> + int i;
> + size_t len, count = 0;
> + char *ptr;
> +
> + for (i = 0; dump_list[i]; i++)
> + count += strlen(dump_list[i]);
> +
> + *debug = malloc(sizeof(char) * (count + 1));
> + if (!*debug)
> + return O2CB_ET_NO_MEMORY;
> +
> + ptr = *debug;
> + ptr[count] = '\0';
> + for (i = 0; dump_list[i]; i++) {
> + len = strlen(dump_list[i]);
> + memcpy(ptr, dump_list[i], len);
> + ptr += len;
> + }
> +
> + return 0;
> +}
> +
> +errcode_t o2cb_control_daemon_debug(char **debug)
> +{
> + errcode_t err = O2CB_ET_SERVICE_UNAVAILABLE;
> + int rc, fd = -1;
> + char buf[OCFS2_CONTROLD_MAXLINE];
> + char **dump_list = NULL;
> +
> + rc = ocfs2_client_connect();
> + if (rc < 0) {
> + /* fprintf(stderr, "Unable to connect to ocfs2_controld: %s\n",
> + strerror(-rc)); */
> + switch (rc) {
> + case -EACCES:
> + case -EPERM:
> + err = O2CB_ET_PERMISSION_DENIED;
> + break;
> +
> + default:
> + err = O2CB_ET_SERVICE_UNAVAILABLE;
> + break;
> + }
> + goto out;
> + }
> + fd = rc;
> +
> + rc = send_message(fd, CM_DUMP);
> + if (rc) {
> + /* fprintf(stderr,
> + "Unable to send DUMP message: %s\n",
> + strerror(-rc)); */
> + err = O2CB_ET_IO;
> + goto out;
> + }
> +
> + rc = receive_list(fd, buf, &dump_list);
> + if (rc) {
> + /* fprintf(stderr, "Error reading from daemon: %s\n",
> + strerror(-rc)); */
> + err = O2CB_ET_IO;
> + goto out;
> + }
> +
> + err = dump_list_to_string(dump_list, debug);
> + o2cb_free_dir_list(dump_list);
> +
> +out:
> + if (fd != -1)
> + close(fd);
> +
> + return err;
> +}
> +
> +
> errcode_t o2cb_get_hb_thread_pid (const char *cluster_name, const char *region_name,
> pid_t *pid)
> {
> diff --git a/ocfs2_controld/main.c b/ocfs2_controld/main.c
> index c16771e..ecff86f 100644
> --- a/ocfs2_controld/main.c
> +++ b/ocfs2_controld/main.c
> @@ -245,44 +245,6 @@ out:
> return rc;
> }
>
> -int do_read(int fd, void *buf, size_t count)
> -{
> - int rv, off = 0;
> -
> - while (off < count) {
> - rv = read(fd, buf + off, count - off);
> - if (rv == 0)
> - return -1;
> - if (rv == -1 && errno == EINTR)
> - continue;
> - if (rv == -1)
> - return -1;
> - off += rv;
> - }
> - return 0;
> -}
> -
> -int do_write(int fd, void *buf, size_t count)
> -{
> - int rv, off = 0;
> -
> - retry:
> - rv = write(fd, buf + off, count);
> - if (rv == -1 && errno == EINTR)
> - goto retry;
> - if (rv < 0) {
> - log_error("write errno %d", errno);
> - return rv;
> - }
> -
> - if (rv != count) {
> - count -= rv;
> - off += rv;
> - goto retry;
> - }
> - return 0;
> -}
> -
> static int do_mount(int ci, int fd, const char *fstype, const char *uuid,
> const char *cluster, const char *device,
> const char *service)
> @@ -406,18 +368,84 @@ int connection_add(int fd, void (*work)(int ci), void (*dead)(int ci))
> return -ELOOP;
> }
>
> -static int dump_debug(int ci)
> +/* 4 characters for "ITEM", 1 for the space, 1 for the '\0' */
> +#define DEBUG_BYTES_PER_ITEM (OCFS2_CONTROLD_MAXLINE - 6)
> +static size_t debug_bytes_to_count(size_t bytes)
> {
> - int len = DUMP_SIZE;
> + return (bytes + DEBUG_BYTES_PER_ITEM - 1) / DEBUG_BYTES_PER_ITEM;
> +}
> +
> +static int send_debug(int fd, const char *ptr, size_t bytes)
> +{
> + int rc = 0;
> + size_t remain = bytes;
> + size_t itemlen = DEBUG_BYTES_PER_ITEM;
> + char itembuf[DEBUG_BYTES_PER_ITEM + 1];
> +
> + while (remain) {
> + if (itemlen > remain)
> + itemlen = remain;
> + memcpy(itembuf, ptr, itemlen);
> + itembuf[itemlen] = '\0';
> + rc = send_message(fd, CM_ITEM, itembuf);
> + if (rc)
> + break;
> +
> + ptr += itemlen;
> + remain -= itemlen;
> + }
> +
> + return rc;
> +}
> +
> +static int dump_debug(int ci, int fd)
> +{
> + int rc, rctmp;
> + char error_msg[100]; /* Arbitrary size smaller than a message */
> + int count = 0;
> +
> + if (dump_wrap)
> + count += debug_bytes_to_count(DUMP_SIZE - dump_point);
> + count += debug_bytes_to_count(dump_point);
> +
> + rc = send_message(fd, CM_ITEMCOUNT, count);
> + if (rc) {
> + snprintf(error_msg, sizeof(error_msg),
> + "Unable to send ITEMCOUNT: %s",
> + strerror(-rc));
> + goto out_status;
> + }
>
> if (dump_wrap) {
> - len = DUMP_SIZE - dump_point;
> - do_write(client[ci].fd, dump_buf + dump_point, len);
> - len = dump_point;
> + rc = send_debug(fd, dump_buf + dump_point,
> + DUMP_SIZE - dump_point);
> + if (rc) {
> + snprintf(error_msg, sizeof(error_msg),
> + "Unable to send ITEM: %s",
> + strerror(-rc));
> + goto out_status;
> + }
> + }
> + rc = send_debug(fd, dump_buf, dump_point);
> + if (rc) {
> + snprintf(error_msg, sizeof(error_msg),
> + "Unable to send ITEM: %s",
> + strerror(-rc));
> + goto out_status;
> }
>
> - do_write(client[ci].fd, dump_buf, len);
> - return 0;
> + strcpy(error_msg, "OK");
> +
> +out_status:
> + rctmp = send_message(fd, CM_STATUS, -rc, error_msg);
> + if (rctmp) {
> + log_error("Error sending STATUS message: %s",
> + strerror(-rc));
> + if (!rc)
> + rc = rctmp;
> + }
> +
> + return rc;
> }
>
> static int send_filesystems(int ci, int fd, const char *fstype,
> @@ -540,6 +568,10 @@ static void process_client(int ci)
> rv = send_filesystems(ci, fd, argv[0], argv[1]);
> break;
>
> + case CM_DUMP:
> + rv = dump_debug(ci, fd);
> + break;
> +
> case CM_STATUS:
> log_error("Someone sent us cm_status!");
> break;
> @@ -548,20 +580,6 @@ static void process_client(int ci)
> log_error("Invalid message received");
> break;
> }
> -#if 0
> - if (daemon_debug_opt)
> - dump_state();
> -#endif
> -
> -#if 0
> - } else if (!strcmp(cmd, "dump")) {
> - dump_debug(ci);
> -
> - } else {
> - rv = -EINVAL;
> - goto reply;
> - }
> -#endif
>
> return;
> }
>
More information about the Ocfs2-tools-devel
mailing list