[Ocfs2-tools-devel] [PATCH] ocfs2_controld, debugfs.ocfs2: Dump the daemon debug buffer via debugfs.ocfs2.

Sunil Mushran Sunil.Mushran at oracle.com
Thu Aug 14 13:39:31 PDT 2008


very cool.

Joel Becker wrote:
> ocfs2_controld maintains a circular buffer of debug messages.  Until
> now, the only way to see it was to run in the foreground via the -D
> switch.  That's annoying to have to set up, and not useful in a
> production environment.
>
> We introduce the DUMP message to the controld protocol.  This message
> requests the entire debug buffer.  ocfs2_controld sends the buffer as a
> LIST reply.  Each ITEM in the list is one OCFS2_CONTROLD_MAXLINE packet.
> Thus, the ITEMCOUNT is (roughly) "size of dump buffer /
> OCFS2_CONTROLD_MAXLINE".
>
> libo2cb gains the o2cb_control_daemon_debug() API.  This function sends
> the DUMP message, receives the list, and returns a single string with
> the entire buffer concatenated back together.
>
> debugfs.ocfs2 gains the "controld" command.  Currently it only has one
> sub-operation, "dump".  This calls o2cb_control_daemon_debug(), then
> prints the results via the pager.
>
> Signed-off-by: Joel Becker <joel.becker at oracle.com>
> ---
>  debugfs.ocfs2/commands.c         |   44 ++++++++++++-
>  debugfs.ocfs2/main.c             |    5 +-
>  include/o2cb/o2cb.h              |    2 +
>  include/o2cb/o2cb_client_proto.h |    1 +
>  libo2cb/client_proto.c           |    1 +
>  libo2cb/o2cb_abi.c               |   77 +++++++++++++++++++++
>  ocfs2_controld/main.c            |  136 +++++++++++++++++++++----------------
>  7 files changed, 205 insertions(+), 61 deletions(-)
>
> diff --git a/debugfs.ocfs2/commands.c b/debugfs.ocfs2/commands.c
> index 0f67b99..83cb5b6 100644
> --- a/debugfs.ocfs2/commands.c
> +++ b/debugfs.ocfs2/commands.c
> @@ -1,4 +1,6 @@
> -/*
> +/* -*- mode: c; c-basic-offset: 8; -*-
> + * vim: noexpandtab sw=8 ts=8 sts=0:
> + *
>   * commands.c
>   *
>   * handles debugfs commands
> @@ -70,6 +72,7 @@ static void do_fs_locks (char **args);
>  static void do_bmap (char **args);
>  static void do_icheck (char **args);
>  static void do_dlm_locks (char **args);
> +static void do_controld(char **args);
>  
>  dbgfs_gbls gbls;
>  
> @@ -79,6 +82,7 @@ static Command commands[] = {
>  	{ "cd",		do_cd },
>  	{ "chroot",	do_chroot },
>  	{ "close",	do_close },
> +	{ "controld",   do_controld },
>  	{ "curdev",	do_curdev },
>  	{ "dlm_locks",	do_dlm_locks },
>  	{ "dump",	do_dump },
> @@ -824,6 +828,7 @@ static void do_help (char **args)
>  	printf ("cd <filespec>\t\t\t\tChange directory\n");
>  	printf ("chroot <filespec>\t\t\tChange root\n");
>  	printf ("close\t\t\t\t\tClose a device\n");
> +	printf ("controld dump\t\t\tObtain information from ocfs2_controld\n");
>  	printf ("curdev\t\t\t\t\tShow current device\n");
>  	printf ("decode <lockname#> ...\t\t\tDecode block#(s) from the lockname(s)\n");
>  	printf ("dlm_locks [-l] lockname\t\t\tShow live dlm locking state\n");
> @@ -891,6 +896,43 @@ static void do_lcd (char **args)
>  }
>  
>  /*
> + * do_controld_dump()
> + *
> + */
> +static void do_controld_dump(char **args)
> +{
> +	FILE *out;
> +	errcode_t ret;
> +	char *debug_buffer;
> +
> +	ret = o2cb_control_daemon_debug(&debug_buffer);
> +	if (ret) {
> +		com_err(args[0], ret, "while obtaining the debug buffer");
> +		return;
> +	}
> +
> +	out = open_pager(gbls.interactive);
> +	fprintf(out, "%s", debug_buffer);
> +	close_pager(out);
> +	free(debug_buffer);
> +}
> +
> +/*
> + * do_controld()
> + *
> + */
> +static void do_controld(char **args)
> +{
> +	if (!args[1])
> +		fprintf(stderr, "%s: Operation required\n", args[0]);
> +	else if (!strcmp(args[1], "dump"))
> +		do_controld_dump(args);
> +	else
> +		fprintf(stderr, "%s: Invalid operation: \"%s\"\n",
> +			args[0], args[1]);
> +}
> +
> +/*
>   * do_curdev()
>   *
>   */
> diff --git a/debugfs.ocfs2/main.c b/debugfs.ocfs2/main.c
> index 3588726..722b339 100644
> --- a/debugfs.ocfs2/main.c
> +++ b/debugfs.ocfs2/main.c
> @@ -1,4 +1,6 @@
> -/*
> +/* -*- mode: c; c-basic-offset: 8; -*-
> + * vim: noexpandtab sw=8 ts=8 sts=0:
> + *
>   * main.c
>   *
>   * entry point for debugfs.ocfs2
> @@ -430,6 +432,7 @@ int main (int argc, char **argv)
>  	dbgfs_opts opts;
>  	FILE *cmd = NULL;
>  
> +	initialize_o2cb_error_table();
>  	initialize_ocfs_error_table();
>  
>  #define INSTALL_SIGNAL(sig)					\
> diff --git a/include/o2cb/o2cb.h b/include/o2cb/o2cb.h
> index d987d20..688817a 100644
> --- a/include/o2cb/o2cb.h
> +++ b/include/o2cb/o2cb.h
> @@ -68,6 +68,8 @@ void o2cb_free_cluster_list(char **clusters);
>  errcode_t o2cb_list_nodes(char *cluster_name, char ***nodes);
>  void o2cb_free_nodes_list(char **nodes);
>  
> +errcode_t o2cb_control_daemon_debug(char **debug);
> +
>  struct o2cb_cluster_desc {
>  	char *c_stack;		/* The cluster stack, NULL for classic */
>  	char *c_cluster;	/* The name of the cluster, NULL for the
> diff --git a/include/o2cb/o2cb_client_proto.h b/include/o2cb/o2cb_client_proto.h
> index b459223..99e379e 100644
> --- a/include/o2cb/o2cb_client_proto.h
> +++ b/include/o2cb/o2cb_client_proto.h
> @@ -42,6 +42,7 @@ typedef enum {
>  	CM_LISTCLUSTERS,
>  	CM_ITEMCOUNT,
>  	CM_ITEM,
> +	CM_DUMP,
>  } client_message;
>  
>  int client_listen(const char *path);
> diff --git a/libo2cb/client_proto.c b/libo2cb/client_proto.c
> index 0811010..0ebb934 100644
> --- a/libo2cb/client_proto.c
> +++ b/libo2cb/client_proto.c
> @@ -45,6 +45,7 @@ DEFINE_MESSAGE(LISTMOUNTS, 2, "%s %s")
>  DEFINE_MESSAGE(LISTCLUSTERS, 0, "")
>  DEFINE_MESSAGE(ITEMCOUNT, 1, "%u")
>  DEFINE_MESSAGE(ITEM, 1, "%s")
> +DEFINE_MESSAGE(DUMP, 0, "")
>  END_MESSAGES(message_list)
>  
>  const char *message_to_string(client_message message)
> diff --git a/libo2cb/o2cb_abi.c b/libo2cb/o2cb_abi.c
> index f7f0148..7ebfa72 100644
> --- a/libo2cb/o2cb_abi.c
> +++ b/libo2cb/o2cb_abi.c
> @@ -1921,6 +1921,83 @@ void o2cb_free_nodes_list(char **nodes)
>  	o2cb_free_dir_list(nodes);
>  }
>  
> +static errcode_t dump_list_to_string(char **dump_list, char **debug)
> +{
> +	int i;
> +	size_t len, count = 0;
> +	char *ptr;
> +
> +	for (i = 0; dump_list[i]; i++)
> +		count += strlen(dump_list[i]);
> +
> +	*debug = malloc(sizeof(char) * (count + 1));
> +	if (!*debug)
> +		return O2CB_ET_NO_MEMORY;
> +
> +	ptr = *debug;
> +	ptr[count] = '\0';
> +	for (i = 0; dump_list[i]; i++) {
> +		len = strlen(dump_list[i]);
> +		memcpy(ptr, dump_list[i], len);
> +		ptr += len;
> +	}
> +
> +	return 0;
> +}
> +
> +errcode_t o2cb_control_daemon_debug(char **debug)
> +{
> +	errcode_t err = O2CB_ET_SERVICE_UNAVAILABLE;
> +	int rc, fd = -1;
> +	char buf[OCFS2_CONTROLD_MAXLINE];
> +	char **dump_list = NULL;
> +
> +	rc = ocfs2_client_connect();
> +	if (rc < 0) {
> +		/* fprintf(stderr, "Unable to connect to ocfs2_controld: %s\n",
> +			strerror(-rc)); */
> +		switch (rc) {
> +			case -EACCES:
> +			case -EPERM:
> +				err = O2CB_ET_PERMISSION_DENIED;
> +				break;
> +
> +			default:
> +				err = O2CB_ET_SERVICE_UNAVAILABLE;
> +				break;
> +		}
> +		goto out;
> +	}
> +	fd = rc;
> +
> +	rc = send_message(fd, CM_DUMP);
> +	if (rc) {
> +		/* fprintf(stderr,
> +			"Unable to send DUMP message: %s\n",
> +			strerror(-rc)); */
> +		err = O2CB_ET_IO;
> +		goto out;
> +	}
> +
> +	rc = receive_list(fd, buf, &dump_list);
> +	if (rc) {
> +		/* fprintf(stderr, "Error reading from daemon: %s\n",
> +			strerror(-rc)); */
> +		err = O2CB_ET_IO;
> +		goto out;
> +	}
> +
> +	err = dump_list_to_string(dump_list, debug);
> +	o2cb_free_dir_list(dump_list);
> +
> +out:
> +	if (fd != -1)
> +		close(fd);
> +
> +	return err;
> +}
> +
> +
>  errcode_t o2cb_get_hb_thread_pid (const char *cluster_name, const char *region_name,
>  			   pid_t *pid)
>  {
> diff --git a/ocfs2_controld/main.c b/ocfs2_controld/main.c
> index c16771e..ecff86f 100644
> --- a/ocfs2_controld/main.c
> +++ b/ocfs2_controld/main.c
> @@ -245,44 +245,6 @@ out:
>  	return rc;
>  }
>  
> -int do_read(int fd, void *buf, size_t count)
> -{
> -	int rv, off = 0;
> -
> -	while (off < count) {
> -		rv = read(fd, buf + off, count - off);
> -		if (rv == 0)
> -			return -1;
> -		if (rv == -1 && errno == EINTR)
> -			continue;
> -		if (rv == -1)
> -			return -1;
> -		off += rv;
> -	}
> -	return 0;
> -}
> -
> -int do_write(int fd, void *buf, size_t count)
> -{
> -	int rv, off = 0;
> -
> - retry:
> -	rv = write(fd, buf + off, count);
> -	if (rv == -1 && errno == EINTR)
> -		goto retry;
> -	if (rv < 0) {
> -		log_error("write errno %d", errno);
> -		return rv;
> -	}
> -
> -	if (rv != count) {
> -		count -= rv;
> -		off += rv;
> -		goto retry;
> -	}
> -	return 0;
> -}
> -
>  static int do_mount(int ci, int fd, const char *fstype, const char *uuid,
>  		    const char *cluster, const char *device,
>  		    const char *service)
> @@ -406,18 +368,84 @@ int connection_add(int fd, void (*work)(int ci), void (*dead)(int ci))
>  	return -ELOOP;
>  }
>  
> -static int dump_debug(int ci)
> +/* 4 characters for "ITEM", 1 for the space, 1 for the '\0' */
> +#define DEBUG_BYTES_PER_ITEM	(OCFS2_CONTROLD_MAXLINE - 6)
> +static size_t debug_bytes_to_count(size_t bytes)
>  {
> -	int len = DUMP_SIZE;
> +	return (bytes + DEBUG_BYTES_PER_ITEM - 1) / DEBUG_BYTES_PER_ITEM;
> +}
> +
> +static int send_debug(int fd, const char *ptr, size_t bytes)
> +{
> +	int rc = 0;
> +	size_t remain = bytes;
> +	size_t itemlen = DEBUG_BYTES_PER_ITEM;
> +	char itembuf[DEBUG_BYTES_PER_ITEM + 1];
> +
> +	while (remain) {
> +		if (itemlen > remain)
> +			itemlen = remain;
> +		memcpy(itembuf, ptr, itemlen);
> +		itembuf[itemlen] = '\0';
> +		rc = send_message(fd, CM_ITEM, itembuf);
> +		if (rc)
> +			break;
> +
> +		ptr += itemlen;
> +		remain -= itemlen;
> +	}
> +
> +	return rc;
> +}
> +
> +static int dump_debug(int ci, int fd)
> +{
> +	int rc, rctmp;
> +	char error_msg[100];  /* Arbitrary size smaller than a message */
> +	int count = 0;
> +
> +	if (dump_wrap)
> +		count += debug_bytes_to_count(DUMP_SIZE - dump_point);
> +	count += debug_bytes_to_count(dump_point);
> +
> +	rc = send_message(fd, CM_ITEMCOUNT, count);
> +	if (rc) {
> +		snprintf(error_msg, sizeof(error_msg),
> +			 "Unable to send ITEMCOUNT: %s",
> +			 strerror(-rc));
> +		goto out_status;
> +	}
>  
>  	if (dump_wrap) {
> -		len = DUMP_SIZE - dump_point;
> -		do_write(client[ci].fd, dump_buf + dump_point, len);
> -		len = dump_point;
> +		rc = send_debug(fd, dump_buf + dump_point,
> +				DUMP_SIZE - dump_point);
> +		if (rc) {
> +			snprintf(error_msg, sizeof(error_msg),
> +				 "Unable to send ITEM: %s",
> +				 strerror(-rc));
> +			goto out_status;
> +		}
> +	}
> +	rc = send_debug(fd, dump_buf, dump_point);
> +	if (rc) {
> +		snprintf(error_msg, sizeof(error_msg),
> +			 "Unable to send ITEM: %s",
> +			 strerror(-rc));
> +		goto out_status;
>  	}
>  
> -	do_write(client[ci].fd, dump_buf, len);
> -	return 0;
> +	strcpy(error_msg, "OK");
> +
> +out_status:
> +	rctmp = send_message(fd, CM_STATUS, -rc, error_msg);
> +	if (rctmp) {
> +		log_error("Error sending STATUS message: %s",
> +			  strerror(-rc));
> +		if (!rc)
> +			rc = rctmp;
> +	}
> +
> +	return rc;
>  }
>  
>  static int send_filesystems(int ci, int fd, const char *fstype,
> @@ -540,6 +568,10 @@ static void process_client(int ci)
>  		rv = send_filesystems(ci, fd, argv[0], argv[1]);
>  		break;
>  
> +		case CM_DUMP:
> +		rv = dump_debug(ci, fd);
> +		break;
> +
>  		case CM_STATUS:
>  		log_error("Someone sent us cm_status!");
>  		break;
> @@ -548,20 +580,6 @@ static void process_client(int ci)
>  		log_error("Invalid message received");
>  		break;
>  	}
> -#if 0
> -	if (daemon_debug_opt)
> -		dump_state();
> -#endif
> -
> -#if 0
> -	} else if (!strcmp(cmd, "dump")) {
> -		dump_debug(ci);
> -
> -	} else {
> -		rv = -EINVAL;
> -		goto reply;
> -	}
> -#endif
>  
>  	return;
>  }
>   




More information about the Ocfs2-tools-devel mailing list