[Ocfs2-tools-devel] [PATCH 1/1] o2hbmonitor: Disk heartbeat monitor

tristan tristan.ye at oracle.com
Wed Oct 27 04:35:08 PDT 2010


Hi Sunil,

Trivial comments inlined, not for the logic of program indeed;)

Sunil Mushran wrote:
> o2hbmonitor is a very light utility that monitors o2cb disk heartbeat.
> It periodically reads o2hb debugfs file, elapsed_time_in_ms, and checks
> whether the time is greater than the warn threshold. if so, it prints a
> message in syslog.
>
> Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com>
> ---
>  Makefile                                  |    2 +-
>  o2monitor/.gitignore                      |    3 +
>  o2monitor/Makefile                        |   26 ++
>  o2monitor/o2hbmonitor.c                   |  430 +++++++++++++++++++++++++++++
>  vendor/common/ocfs2-tools.spec-generic.in |    1 +
>  5 files changed, 461 insertions(+), 1 deletions(-)
>  create mode 100644 o2monitor/.gitignore
>  create mode 100644 o2monitor/Makefile
>  create mode 100644 o2monitor/o2hbmonitor.c
>
> diff --git a/Makefile b/Makefile
> index 88106fb..65c13f9 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -20,7 +20,7 @@ CHKCONFIG_DEP = chkconfig
>  COMPILE_PY = 1
>  endif
>  
> -SUBDIRS = include libtools-internal libo2dlm libo2cb libocfs2 fsck.ocfs2 mkfs.ocfs2 mounted.ocfs2 tunefs.ocfs2 debugfs.ocfs2 o2cb_ctl ocfs2_hb_ctl mount.ocfs2 ocfs2_controld o2image listuuid sizetest extras fswreck patches
> +SUBDIRS = include libtools-internal libo2dlm libo2cb libocfs2 fsck.ocfs2 mkfs.ocfs2 mounted.ocfs2 tunefs.ocfs2 debugfs.ocfs2 o2cb_ctl ocfs2_hb_ctl mount.ocfs2 ocfs2_controld o2image o2monitor listuuid sizetest extras fswreck patches
>  
>  ifdef BUILD_OCFS2CONSOLE
>  SUBDIRS += ocfs2console
> diff --git a/o2monitor/.gitignore b/o2monitor/.gitignore
> new file mode 100644
> index 0000000..323fba6
> --- /dev/null
> +++ b/o2monitor/.gitignore
> @@ -0,0 +1,3 @@
> +.*.sw?
> +*.d
> +o2hbmonitor
> diff --git a/o2monitor/Makefile b/o2monitor/Makefile
> new file mode 100644
> index 0000000..961eafa
> --- /dev/null
> +++ b/o2monitor/Makefile
> @@ -0,0 +1,26 @@
> +TOPDIR = ..
> +
> +include $(TOPDIR)/Preamble.make
> +
> +sbindir = $(root_sbindir)
> +SBIN_PROGRAMS = o2hbmonitor
> +
> +WARNINGS = -Wall -Wstrict-prototypes -Wno-format -Wmissing-prototypes \
> +           -Wmissing-declarations
> +
> +CFLAGS = $(OPTS) $(WARNINGS)
> +
> +INCLUDES = -I$(TOPDIR)/include -I.
> +
> +DEFINES = -DVERSION=\"$(VERSION)\"
> +
> +CFILES = o2hbmonitor.c
> +
> +OBJS = $(subst .c,.o,$(CFILES))
> +
> +DIST_FILES = $(CFILES) $(HFILES)
> +
> +o2hbmonitor: $(OBJS)
> +	$(LINK)
> +
> +include $(TOPDIR)/Postamble.make
> diff --git a/o2monitor/o2hbmonitor.c b/o2monitor/o2hbmonitor.c
> new file mode 100644
> index 0000000..dd74f44
> --- /dev/null
> +++ b/o2monitor/o2hbmonitor.c
> @@ -0,0 +1,430 @@
> +/* -*- mode: c; c-basic-offset: 8; -*-
> + * vim: noexpandtab sw=8 ts=8 sts=0:
> + *
> + * o2hbmonitor.c
> + *
> + * Monitors o2hb
> + *
> + * Copyright (C) 2010 Oracle.  All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + * 
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * General Public License for more details.
> + */
> +
> +/*
> + * This utility requires the o2hb debugfs file elapsed_time_in_ms which shows
> + * the time since the o2hb heartbeat timer was last armed.  This file was added
> + * in the mainline kernel via commit 43695d095dfaf266a8a940d9b07eed7f46076b49.
> + *
> + * This utility scans the configfs to load all the heartbeat regions and the
> + * dead_threshold. If no cluster is found, it checks after CONFIG_POLL_IN_SECS.
> + *
> + * The debugfs file, elapsed_time_in_ms, for each region is scanned every
> + * SLOW_POLL_IN_SECS. If a region is found to have an elpased time > warn
> + * threshold, a message is logged to syslog.
> + */
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <ctype.h>
> +#include <linux/types.h>
> +#include <sys/stat.h>
> +#include <dirent.h>
> +#include <fcntl.h>
> +#include <string.h>
> +#include <libgen.h>
> +#include <syslog.h>
> +#include <errno.h>
> +
> +#define SYS_CONFIG_DIR			"/sys/kernel/config"
> +#define O2HB_CLUSTER_DIR		SYS_CONFIG_DIR"/cluster"
> +#define O2HB_HEARTBEAT_DIR		O2HB_CLUSTER_DIR"/%s/heartbeat"
> +#define O2HB_DEAD_THRESHOLD		O2HB_HEARTBEAT_DIR"/dead_threshold"
> +#define O2HB_DEVICE			O2HB_HEARTBEAT_DIR"/%s/dev"
> +
> +#define SYS_DEBUG_DIR			"/sys/kernel/debug"
> +#define O2HB_DEBUG_DIR			SYS_DEBUG_DIR"/o2hb"
> +#define O2HB_ELAPSED_TIME		O2HB_DEBUG_DIR"/%s/elapsed_time_in_ms"
> +
> +#define MAX_REGIONS			200
> +
> +#define DEAD_THRESHOLD_IN_MSECS(a)	(((a) - 1) * 2000)
> +#define WARN_THRESHOLD_PERCENT		50
> +
> +#define CONFIG_POLL_IN_SECS		60
> +#define SLOW_POLL_IN_SECS		10
> +#define FAST_POLL_IN_SECS		2
> +
> +char *progname;
> +int interactive;
> +int warn_threshold_percent;
> +int verbose;
> +
> +struct o2hb_region {
> +	char *name;
> +	char *device;
> +	unsigned long elapsed_in_ms;
> +};
> +
> +struct o2hb_config {
> +	char *cluster_name;
> +	unsigned long dead_threshold_in_ms;
> +	unsigned long warn_threshold_in_ms;
> +	unsigned long poll_in_secs;
> +	int num_regions;
> +	struct o2hb_region regions[MAX_REGIONS];
> +};
> +
> +static void show_version(void)
> +{
> +	fprintf(stderr, "%s %s\n", progname, VERSION);
> +}
> +
> +static void free_config(struct o2hb_config **oc)
> +{
> +	int i;
> +
> +	if (!*oc)
> +		return;
> +
> +	if ((*oc)->cluster_name)
> +		free((*oc)->cluster_name);
> +
> +	for (i = 0; i < (*oc)->num_regions; ++i) {
> +		if ((*oc)->regions[i].name)
> +			free((*oc)->regions[i].name);
> +		if ((*oc)->regions[i].device)
> +			free((*oc)->regions[i].device);
> +	}
> +
> +	free(*oc);
> +	*oc = NULL;
> +}


I guess we'd betther have a signal handler in the program to
deal with SIGINT/SIGTERM/SIGKILL correctly doing some cleanups,
such as free_config(), especially for a dameon, which normally
is going to be killed by an arbitrary 'kill -9'


> +
> +static int alloc_config(struct o2hb_config **oc)
> +{
> +	*oc = calloc(sizeof(struct o2hb_config), 1);
> +	if (!*oc)
> +		return -1;
> +
> +	return 0;
> +}
> +
> +static char *do_strchomp(char *str)
> +{
> +	int len = strlen(str);
> +	char *p;
> +
> +	if (!len)
> +		return str;
> +
> +	p = str + len - 1;

I guess you're trying to remove all trailing whitespace here,
so if the string is terminated by a null-whitespace char, then
we're safe to leave:

if (!isspace(*p))
return str;

> +	while ((isspace(*p) || *p == '\n') && len--)

Also there is a corner bug when the target string was nothing but all
consists of whitespaces:when the 'len' reaches 0, '*p' is actually
accessing the invalid byte which is ahead of string address. it should be:

+	while ((len--) && (isspace(*p) || (*p == '\n')))



> +		*p-- = '\0';
> +
> +	return str;
> +}
> +
> +static int get_value(char *path, char *value, int count)
> +{
> +	int fd = -1, ret = -1;
> +	char *p = value;
> +
> +	fd = open(path, O_RDONLY);
> +	if (fd > 0)
> +		ret = read(fd, value, count);

else
return ret;

> +	if (ret > 0) {
> +		p += ret;
> +		*p = '\0';
> +		ret = 0;
> +	}
> +
> +	if (!ret)
> +		do_strchomp(value);
> +
> +	if (fd > -1)
> +		close(fd);
> +	return ret;
> +}
> +
> +static int populate_elapsed_time(struct o2hb_config *oc)
> +{
> +	int i, ret;

ret = -1;

> +	char val[32];
> +	char path[PATH_MAX];
> +	struct o2hb_region *reg;
> +
> +	for (i = 0; i < oc->num_regions; ++i) {
> +		reg = &(oc->regions[i]);
> +		sprintf(path, O2HB_ELAPSED_TIME, reg->name);
> +		ret = get_value(path, val, sizeof(val));
> +		if (ret)
> +			goto bail;
> +		reg->elapsed_in_ms = strtoul(val, NULL, 0);
> +	}
> +
> +	ret = 0;
> +
> +bail:
> +	return ret;
> +
> +}
> +
> +static int populate_thresholds(struct o2hb_config *oc)
> +{
> +	int ret;
> +	char val[32];
> +	char path[PATH_MAX];
> +
> +	sprintf(path, O2HB_DEAD_THRESHOLD, oc->cluster_name);
> +	ret = get_value(path, val, sizeof(val));
> +	if (!ret) {
> +		oc->dead_threshold_in_ms =
> +			DEAD_THRESHOLD_IN_MSECS(strtoul(val, NULL, 0));
> +		oc->warn_threshold_in_ms =
> +			(oc->dead_threshold_in_ms * warn_threshold_percent / 100);
> +	}
> +
> +	return ret;
> +}
> +
> +static int populate_devices(struct o2hb_config *oc)
> +{
> +	int i, ret;
> +	char val[255];
> +	char path[PATH_MAX];
> +	struct o2hb_region *reg;
> +
> +	for (i = 0; i < oc->num_regions; ++i) {
> +		reg = &(oc->regions[i]);
> +		sprintf(path, O2HB_DEVICE, oc->cluster_name, reg->name);
> +		ret = get_value(path, val, sizeof(val));
> +		if (ret)
> +			goto bail;
> +		reg->device = strdup(val);
> +		if (!reg->device) {
> +			ret = -1;
> +			goto bail;
> +		}
> +	}
> +
> +	ret = 0;
> +
> +bail:
> +	return ret;
> +}
> +
> +static int populate_regions(struct o2hb_config *oc)
> +{
> +	int ret = -1;
> +	DIR *dir = NULL;
> +	struct dirent *ent;
> +	struct o2hb_region *reg;
> +	char path[PATH_MAX];
> +
> +	sprintf(path, O2HB_HEARTBEAT_DIR, oc->cluster_name);
> +
> +	dir = opendir(path);
> +	if (!dir)
> +		goto bail;
> +
> +	do {
> +		ent = readdir(dir);
> +
> +		if (ent && ent->d_type == 4 && strcmp(ent->d_name, ".") &&
> +		    strcmp(ent->d_name, "..")) {
> +			reg = &(oc->regions[oc->num_regions]);
> +			reg->name = strdup(ent->d_name);
> +			if (!reg->name)
> +				goto bail;
> +			++oc->num_regions;
> +		}
> +	} while (ent);


/* in case the directory was empty, except for '.' and '..' */
if (!oc->num_regions)
goto bail;


> +
> +	ret = 0;
> +
> +bail:
> +	if (dir)
> +		closedir(dir);
> +	return ret;
> +}
> +
> +static int populate_cluster_name(struct o2hb_config *oc)
> +{
> +	DIR *dir;
> +	struct dirent *ent;
> +
> +	dir = opendir(O2HB_CLUSTER_DIR);
> +	if (!dir)
> +		goto bail;
> +
> +	do {
> +		ent = readdir(dir);
> +		if (ent && ent->d_type == 4 && strcmp(ent->d_name, ".") &&
> +		    strcmp(ent->d_name, "..")) {
> +			oc->cluster_name = strdup(ent->d_name);
> +			break;
> +		}
> +	} while (ent);
> +
> +	closedir(dir);
> +
> +bail:
> +	if (!oc->cluster_name)
> +		return -1;
> +	return 0;
> +}
> +
> +/*
> + * Reads config for all the heartbeat regions
> + */
> +static int load_config(struct o2hb_config **oc)
> +{
> +	struct stat buf;
> +	int ret;
> +	static int warn_count = 0;
> +
> +	ret = stat(O2HB_CLUSTER_DIR, &buf);
> +	if (ret)
> +		goto bail;
> +
> +	ret = stat(O2HB_DEBUG_DIR, &buf);
> +	if (ret) {
> +		if (!(warn_count++ % 60))
> +			syslog(LOG_WARNING,
> +			       "mount debugfs at /sys/kernel/debug");
> +		goto bail;
> +	}
> +
> +	ret = alloc_config(oc);
> +	if (!ret)
> +		ret = populate_cluster_name(*oc);
> +	if (!ret)
> +		ret = populate_thresholds(*oc);
> +	if (!ret)
> +		ret = populate_regions(*oc);
> +	if (!ret)
> +		ret = populate_devices(*oc);
> +
> +bail:
> +	return ret;
> +}
> +
> +static void scan_elapsed_time(struct o2hb_config *oc)
> +{
> +	int i;
> +	struct o2hb_region *reg;
> +
> +	oc->poll_in_secs = SLOW_POLL_IN_SECS;
> +	for (i = 0; i < oc->num_regions; ++i) {
> +		reg = &(oc->regions[i]);
> +		if (verbose)
> +			fprintf(stdout, "%s, %s, %lu\n", reg->name, reg->device,
> +				reg->elapsed_in_ms);
> +		if (reg->elapsed_in_ms < oc->warn_threshold_in_ms)
> +			continue;
> +		oc->poll_in_secs = FAST_POLL_IN_SECS;
> +		syslog(LOG_WARNING, "Last ping %lu msecs ago on /dev/%s, %s\n",
> +		       reg->elapsed_in_ms, reg->device, reg->name);
> +	}
> +}
> +
> +static void monitor(void)
> +{
> +	int ret = -1;
> +	struct o2hb_config *oc = NULL;
> +
> +	while (1) {
> +		if (!oc) {
> +			ret = load_config(&oc);
> +			if (ret) {
> +				free_config(&oc);
> +				sleep(CONFIG_POLL_IN_SECS);
> +				continue;
> +			}
> +		}
> +
> +		ret = populate_elapsed_time(oc);
> +		if (ret) {
> +			free_config(&oc);
> +			continue;
> +		}
> +
> +		scan_elapsed_time(oc);
> +
> +		sleep(oc->poll_in_secs);
> +	}
> +}
> +
> +static void usage(void)
> +{
> +	fprintf(stderr, "usage: %s [-w percent] -[ivV]\n", progname);
> +	fprintf(stderr, "\t -w, Warn threshold percent (default 50%%)\n");
> +	fprintf(stderr, "\t -i, Interactive\n");
> +	fprintf(stderr, "\t -v, Verbose\n");
> +	fprintf(stderr, "\t -V, Version\n");
> +	exit(1);
> +}
> +
> +int main(int argc, char **argv)
> +{
> +	int c, ret, version = 0;
> +
> +	/* init globals */
> +	progname = basename(argv[0]);
> +	interactive = 0;
> +	warn_threshold_percent = WARN_THRESHOLD_PERCENT;
> +	verbose = 0;
> +
> +	while (1) {
> +		c = getopt(argc, argv, "w:i?hvV");
> +		if (c == -1)
> +			break;
> +		switch (c) {
> +		case 'i':
> +			interactive = 1;
> +			break;
> +		case 'v':
> +			++verbose;
> +			break;
> +		case 'w':
> +			warn_threshold_percent = strtoul(optarg, NULL, 0);
> +			if (warn_threshold_percent < 1 ||
> +			    warn_threshold_percent > 99)
> +				warn_threshold_percent = WARN_THRESHOLD_PERCENT;
> +			break;
> +		case 'V':
> +			version = 1;
> +			break;
> +		case '?':
> +		case 'h':
> +		default:
> +			usage();
> +			break;
> +		}
> +	}
> +
> +	if (version)
> +		show_version();
> +
> +	if (!interactive) {
> +		ret = daemon(0, verbose);
> +		if (ret)
> +			fprintf(stderr, "Unable to daemonize, %s\n",
> +				strerror(errno));
> +	}
> +
> +	openlog(progname, LOG_CONS|LOG_NDELAY, LOG_DAEMON);
> +	monitor();
> +	closelog();
> +
> +	return 0;
> +}
> diff --git a/vendor/common/ocfs2-tools.spec-generic.in b/vendor/common/ocfs2-tools.spec-generic.in
> index 3e9b46a..44a65cd 100644
> --- a/vendor/common/ocfs2-tools.spec-generic.in
> +++ b/vendor/common/ocfs2-tools.spec-generic.in
> @@ -118,6 +118,7 @@ fi
>  /sbin/o2cb_ctl
>  /sbin/mount.ocfs2
>  /sbin/o2image
> +/usr/sbin/o2hbmonitor
>  /sbin/ocfs2_hb_ctl
>  /etc/init.d/o2cb
>  /etc/init.d/ocfs2

This light utility was just lack of a mechanism to log detailed log if 
user want, I mean not to
pollute the syslog, just wanna let user know if syscalls in the program 
like open/read/readdir
succeeded or not, a tricky way is to output this kind of log via socket, 
sending the UDP logs to the
localhost, it's all up to the user to choose listen or not. while having 
said that, we're also going
to get a good chance to avoid launching 2 or more 'o2hbmonitor' daemons 
in the same node, by judging
if a UNIQUE port was using by the daemon, we usually don't want to run 2 
'o2hbmonitor' at the same time,
do we?


Tristan.



More information about the Ocfs2-tools-devel mailing list