[Ocfs2-tools-devel] [PATCH 1/2] o2hbmonitor: Disk heartbeat monitor

Tristan Ye tristan.ye at oracle.com
Thu Jan 6 18:29:47 PST 2011


Patch looks good to me, except for following trivial comments;-)


Srinivas Eeda wrote:
> From: Sunil Mushran <sunil.mushran at oracle.com>
>
> o2hbmonitor monitors o2cb disk heartbeat. It periodically reads o2hb
> debugfs file, elapsed_time_in_ms, and checks whether the time is greater
> than the warn threshold. if so, it prints a message in syslog.
>
> Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com>
> ---
>  Makefile                                  |    2 +-
>  o2monitor/.gitignore                      |    3 +
>  o2monitor/Makefile                        |   26 ++
>  o2monitor/o2hbmonitor.c                   |  367 +++++++++++++++++++++++++++++
>  vendor/common/ocfs2-tools.spec-generic.in |    1 +

    To make things complete, you may add corresponding stuffs in 
following files:

 CREDITS                                   |    3 +
 debian/ocfs2-tools.install                |    2 +


>  5 files changed, 398 insertions(+), 1 deletions(-)
>  create mode 100644 o2monitor/.gitignore
>  create mode 100644 o2monitor/Makefile
>  create mode 100644 o2monitor/o2hbmonitor.c
>
> diff --git a/Makefile b/Makefile
> index 8b71e72..63d5bd1 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -20,7 +20,7 @@ CHKCONFIG_DEP = chkconfig
>  COMPILE_PY = 1
>  endif
>  
> -SUBDIRS = include libtools-internal libo2dlm libo2cb libocfs2 fsck.ocfs2 mkfs.ocfs2 mounted.ocfs2 tunefs.ocfs2 debugfs.ocfs2 o2cb_ctl ocfs2_hb_ctl mount.ocfs2 ocfs2_controld o2image o2info listuuid sizetest extras fswreck patches
> +SUBDIRS = include libtools-internal libo2dlm libo2cb libocfs2 fsck.ocfs2 mkfs.ocfs2 mounted.ocfs2 tunefs.ocfs2 debugfs.ocfs2 o2cb_ctl ocfs2_hb_ctl mount.ocfs2 ocfs2_controld o2image o2info o2monitor listuuid sizetest extras fswreck patches
>  
>  ifdef BUILD_OCFS2CONSOLE
>  SUBDIRS += ocfs2console
> diff --git a/o2monitor/.gitignore b/o2monitor/.gitignore
> new file mode 100644
> index 0000000..323fba6
> --- /dev/null
> +++ b/o2monitor/.gitignore
> @@ -0,0 +1,3 @@
> +.*.sw?
> +*.d
> +o2hbmonitor
> diff --git a/o2monitor/Makefile b/o2monitor/Makefile
> new file mode 100644
> index 0000000..961eafa
> --- /dev/null
> +++ b/o2monitor/Makefile
> @@ -0,0 +1,26 @@
> +TOPDIR = ..
> +
> +include $(TOPDIR)/Preamble.make
> +
> +sbindir = $(root_sbindir)
> +SBIN_PROGRAMS = o2hbmonitor
> +
> +WARNINGS = -Wall -Wstrict-prototypes -Wno-format -Wmissing-prototypes \
> +           -Wmissing-declarations
> +
> +CFLAGS = $(OPTS) $(WARNINGS)
> +
> +INCLUDES = -I$(TOPDIR)/include -I.
> +
> +DEFINES = -DVERSION=\"$(VERSION)\"
> +
> +CFILES = o2hbmonitor.c
> +
> +OBJS = $(subst .c,.o,$(CFILES))
> +
> +DIST_FILES = $(CFILES) $(HFILES)
> +
> +o2hbmonitor: $(OBJS)
> +	$(LINK)
> +
> +include $(TOPDIR)/Postamble.make
> diff --git a/o2monitor/o2hbmonitor.c b/o2monitor/o2hbmonitor.c
> new file mode 100644
> index 0000000..5bb5590
> --- /dev/null
> +++ b/o2monitor/o2hbmonitor.c
> @@ -0,0 +1,367 @@
> +/* -*- mode: c; c-basic-offset: 8; -*-
> + * vim: noexpandtab sw=8 ts=8 sts=0:
> + *
> + * o2hbmonitor.c
> + *
> + * Monitors o2hb
> + *
> + * Copyright (C) 2010 Oracle.  All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * General Public License for more details.
> + */
> +
> +/*
> + * This utility requires the o2hb debugfs file elapsed_time_in_ms which shows
> + * the time since the o2hb heartbeat timer was last armed.  This file was added
> + * in the mainline kernel via commit 43695d095dfaf266a8a940d9b07eed7f46076b49.
> + *
> + * This utility scans the configfs to see if the cluster is up. If not up, it
> + * checks again after CONFIG_POLL_IN_SECS.
> + *
> + * If up, it loads the dead threshold and then scans the debugfs file,
> + * elapsed_time_in_ms, of each heartbeat region. If the elapsed time is
> + * greater than the warn threshold, it logs a message in syslog.
> + */
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <ctype.h>
> +#include <linux/types.h>
> +#include <sys/stat.h>
> +#include <dirent.h>
> +#include <fcntl.h>
> +#include <string.h>
> +#include <libgen.h>
> +#include <syslog.h>
> +#include <errno.h>
> +
> +#define SYS_CONFIG_DIR			"/sys/kernel/config"
> +#define O2HB_CLUSTER_DIR		SYS_CONFIG_DIR"/cluster"
> +#define O2HB_HEARTBEAT_DIR		O2HB_CLUSTER_DIR"/%s/heartbeat"
> +#define O2HB_DEAD_THRESHOLD		O2HB_HEARTBEAT_DIR"/dead_threshold"
> +#define O2HB_DEVICE			O2HB_HEARTBEAT_DIR"/%s/dev"
> +
> +#define SYS_DEBUG_DIR			"/sys/kernel/debug"
> +#define O2HB_DEBUG_DIR			SYS_DEBUG_DIR"/o2hb"
> +#define O2HB_ELAPSED_TIME		O2HB_DEBUG_DIR"/%s/elapsed_time_in_ms"
> +
> +#define DEAD_THRESHOLD_IN_MSECS(a)	(((a) - 1) * 2000)
> +#define WARN_THRESHOLD_PERCENT		50
> +
> +#define CONFIG_POLL_IN_SECS		60
> +#define SLOW_POLL_IN_SECS		10
> +#define FAST_POLL_IN_SECS		2
> +
> +char *progname;
> +int interactive;
> +int warn_threshold_percent;
> +int verbose;
> +
> +char *cluster_name;
> +unsigned long dead_threshold_in_ms;
> +unsigned long warn_threshold_in_ms;
> +unsigned long poll_in_secs;
> +
> +static void show_version(void)
> +{
> +	fprintf(stderr, "%s %s\n", progname, VERSION);
> +}
> +
> +static char *do_strchomp(char *str)
> +{
> +	int len = strlen(str);
> +	char *p;
> +
> +	if (!len)
> +		return str;
> +
> +	p = str + len - 1;
> +	while ((len--) && (isspace(*p) || (*p == '\n')))
> +		*p-- = '\0';
> +
> +	return str;
> +}
> +
> +static int get_value(char *path, char *value, int count)
> +{
> +	int fd = -1, ret = -1;
> +	char *p = value;
> +
> +	fd = open(path, O_RDONLY);
> +	if (fd > 0)
> +		ret = read(fd, value, count);
> +	if (ret > 0) {
> +		p += ret;
> +		*p = '\0';
> +		ret = 0;
> +	}
> +
> +	if (!ret)
> +		do_strchomp(value);
> +
> +	if (fd > -1)
> +		close(fd);
> +	return ret;
> +}
> +
> +static void get_device_name(char *region, char **device)
> +{
> +	int ret;
> +	char val[255];
> +	char path[PATH_MAX];
> +
> +	sprintf(path, O2HB_DEVICE, cluster_name, region);
> +	ret = get_value(path, val, sizeof(val));
> +	if (ret)
> +		goto bail;
> +	*device = strdup(val);
> +
> +bail:
> +	return ;
> +}
> +
> +static void process_elapsed_time(char *region, unsigned long elapsed)
> +{
> +	int warn = 0;
> +	char *device = NULL;
> +
> +	if (elapsed >= warn_threshold_in_ms)
> +		warn++;
> +
> +	if (!verbose && !warn)
> +		return;
> +
> +	get_device_name(region, &device);
> +
> +	if (verbose)
> +		fprintf(stdout, "Last ping %lu msecs ago on /dev/%s, %s\n",
> +		       elapsed, device, region);
> +
> +	if (warn) {
> +		poll_in_secs = FAST_POLL_IN_SECS;
> +		syslog(LOG_WARNING, "Last ping %lu msecs ago on /dev/%s, %s\n",
> +		       elapsed, device, region);
> +	}
> +
> +	if (device)
> +		free(device);
> +}
> +
> +static int read_elapsed_time(char *region, unsigned long *elapsed)
> +{
> +	int ret;
> +	char val[32];
> +	char path[PATH_MAX];
> +
> +	*elapsed = 0;
> +
> +	sprintf(path, O2HB_ELAPSED_TIME, region);
> +	ret = get_value(path, val, sizeof(val));
> +	if (ret)
> +		goto bail;
> +	*elapsed = strtoul(val, NULL, 0);
> +
> +	ret = 0;
> +
> +bail:
> +	return ret;
> +}
> +
> +static void scan_heartbeat_regions(void)
> +{
> +	int ret = -1;
> +	DIR *dir = NULL;
> +	struct dirent *ent;
> +	char path[PATH_MAX];
> +	unsigned long elapsed;
> +
> +	sprintf(path, O2HB_DEBUG_DIR);
> +
> +	dir = opendir(path);
> +	if (!dir)
> +		return;
> +
> +	do {
> +		ent = readdir(dir);
> +		if (ent && ent->d_type == 4 && strcmp(ent->d_name, ".") &&

    Will 'ent->d_type == DT_DIR' make things clear?

> +		    strcmp(ent->d_name, "..")) {
> +			ret = read_elapsed_time(ent->d_name, &elapsed);
> +			if (!ret)
> +				process_elapsed_time(ent->d_name, elapsed);
> +		}
> +	} while (ent);
> +
> +	if (dir)
> +		closedir(dir);
> +}
> +
> +static int populate_thresholds(void)
> +{
> +	int ret;
> +	char val[32];
> +	char path[PATH_MAX];
> +
> +	sprintf(path, O2HB_DEAD_THRESHOLD, cluster_name);
> +	ret = get_value(path, val, sizeof(val));
> +	if (!ret) {
> +		dead_threshold_in_ms =
> +			DEAD_THRESHOLD_IN_MSECS(strtoul(val, NULL, 0));
> +		warn_threshold_in_ms =
> +			(dead_threshold_in_ms * warn_threshold_percent / 100);
> +	}
> +
> +	return ret;
> +}
> +
> +static int populate_cluster(void)
> +{
> +	DIR *dir;
> +	struct dirent *ent;
> +
> +	if (cluster_name) {
> +		free(cluster_name);
> +		cluster_name = NULL;
> +	}
> +
> +	dir = opendir(O2HB_CLUSTER_DIR);
> +	if (!dir)
> +		return -1;
> +
> +	do {
> +		ent = readdir(dir);
> +		if (ent && ent->d_type == 4 && strcmp(ent->d_name, ".") &&
> +		    strcmp(ent->d_name, "..")) {
> +			cluster_name = strdup(ent->d_name);
> +			break;
> +		}
> +	} while (ent);
> +
> +	closedir(dir);
> +
> +	if (cluster_name)
> +		return 0;

    Ok, I'm still tangling on releasing the 'cluster_name' if monitor 
dies by SIGINT or SIGTERM,
does an explicit signal handler for cleanup really hurts the simplicity? 
it may look an overkill,
while it does make the logic clear, at least offers us a way to declaim 
resources if any by the end
of world.

> +
> +	return -1;
> +}
> +
> +static int is_cluster_up(void)
> +{
> +	struct stat buf;
> +	int status;
> +	static int warn_count = 0;
> +
> +	status = stat(O2HB_CLUSTER_DIR, &buf);
> +	if (status)
> +		return 0;
> +
> +	status = stat(O2HB_DEBUG_DIR, &buf);
> +	if (status) {
> +		if (!(warn_count++ % 10))
> +			syslog(LOG_WARNING,
> +			       "mount debugfs at /sys/kernel/debug");
> +		return 0;
> +	}
> +
> +	return 1;
> +}
> +
> +static void monitor(void)
> +{
> +	int ret;
> +
> +	while (1) {
> +		if (!is_cluster_up()) {
> +			sleep(CONFIG_POLL_IN_SECS);
> +			continue;
> +		}
> +
> +		ret = populate_cluster();
> +		if (!ret)
> +			ret = populate_thresholds();
> +		if (ret) {
> +			sleep(CONFIG_POLL_IN_SECS);
> +			continue;
> +		}
> +
> +		poll_in_secs = SLOW_POLL_IN_SECS;
> +
> +		scan_heartbeat_regions();
> +
> +		sleep(poll_in_secs);
> +	}
> +}
> +
> +static void usage(void)
> +{
> +	fprintf(stderr, "usage: %s [-w percent] -[ivV]\n", progname);
> +	fprintf(stderr, "\t -w, Warn threshold percent (default 50%%)\n");
> +	fprintf(stderr, "\t -i, Interactive\n");
> +	fprintf(stderr, "\t -v, Verbose\n");
> +	fprintf(stderr, "\t -V, Version\n");
> +	exit(1);
> +}
> +
> +int main(int argc, char **argv)
> +{
> +	int c, ret, version = 0;
> +
> +	/* init globals */
> +	progname = basename(argv[0]);
> +	interactive = 0;
> +	warn_threshold_percent = WARN_THRESHOLD_PERCENT;
> +	verbose = 0;
> +	cluster_name = NULL;
> +
> +	while (1) {
> +		c = getopt(argc, argv, "w:i?hvV");
> +		if (c == -1)
> +			break;
> +		switch (c) {
> +		case 'i':
> +			interactive = 1;
> +			break;
> +		case 'v':
> +			++verbose;
> +			break;
> +		case 'w':
> +			warn_threshold_percent = strtoul(optarg, NULL, 0);
> +			if (warn_threshold_percent < 1 ||
> +			    warn_threshold_percent > 99)
> +				warn_threshold_percent = WARN_THRESHOLD_PERCENT;
> +			break;
> +		case 'V':
> +			version = 1;
> +			break;
> +		case '?':
> +		case 'h':
> +		default:
> +			usage();
> +			break;
> +		}
> +	}
> +
> +	if (version)
> +		show_version();
> +
> +	if (!interactive) {
> +		ret = daemon(0, verbose);
> +		if (ret)
> +			fprintf(stderr, "Unable to daemonize, %s\n",
> +				strerror(errno));
> +	}
> +
> +	openlog(progname, LOG_CONS|LOG_NDELAY, LOG_DAEMON);
> +	monitor();
> +	closelog();
> +
> +	return 0;
> +}
> diff --git a/vendor/common/ocfs2-tools.spec-generic.in b/vendor/common/ocfs2-tools.spec-generic.in
> index 0192c48..9a4c4dd 100644
> --- a/vendor/common/ocfs2-tools.spec-generic.in
> +++ b/vendor/common/ocfs2-tools.spec-generic.in
> @@ -120,6 +120,7 @@ fi
>  /sbin/o2image
>  /sbin/ocfs2_hb_ctl
>  /usr/bin/o2info
> +/usr/sbin/o2hbmonitor
>  /etc/init.d/o2cb
>  /etc/init.d/ocfs2
>  %config(noreplace) /etc/sysconfig/o2cb




More information about the Ocfs2-tools-devel mailing list