[Ocfs2-tools-devel] [PATCH 2/2] o2hbmonitor: Limit number of active instances

Tristan Ye tristan.ye at oracle.com
Thu Dec 2 19:15:18 PST 2010


Sunil Mushran wrote:
> Patch attempts to disallow multiple instances of o2hbmonitor
> running at the same time.
>
> Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com>
> ---
>  o2monitor/Makefile      |    2 +-
>  o2monitor/o2hbmonitor.c |   85 +++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 86 insertions(+), 1 deletions(-)
>
> diff --git a/o2monitor/Makefile b/o2monitor/Makefile
> index 961eafa..6392b96 100644
> --- a/o2monitor/Makefile
> +++ b/o2monitor/Makefile
> @@ -21,6 +21,6 @@ OBJS = $(subst .c,.o,$(CFILES))
>  DIST_FILES = $(CFILES) $(HFILES)
>  
>  o2hbmonitor: $(OBJS)
> -	$(LINK)
> +	$(LINK) -lrt
>  
>  include $(TOPDIR)/Postamble.make
> diff --git a/o2monitor/o2hbmonitor.c b/o2monitor/o2hbmonitor.c
> index 58e9280..44acb3a 100644
> --- a/o2monitor/o2hbmonitor.c
> +++ b/o2monitor/o2hbmonitor.c
> @@ -43,6 +43,9 @@
>  #include <libgen.h>
>  #include <syslog.h>
>  #include <errno.h>
> +#include <sys/ipc.h>
> +#include <semaphore.h>
> +#include <signal.h>
>  
>  #define SYS_CONFIG_DIR			"/sys/kernel/config"
>  #define O2HB_CLUSTER_DIR		SYS_CONFIG_DIR"/cluster"
> @@ -71,6 +74,22 @@ unsigned long dead_threshold_in_ms;
>  unsigned long warn_threshold_in_ms;
>  unsigned long poll_in_secs;
>  
> +sem_t *sem;
> +char sem_name[NAME_MAX - 4];
> +int sem_taken;
> +
> +static void handler(int sig)
> +{
> +	if (sem_taken) {
> +		sem_unlink(sem_name);
> +		sem_post(sem);
> +		sem_close(sem);
> +	}
> +
> +	syslog(LOG_INFO, "Exiting\n");
> +	exit(0);
> +}
> +
>  static void show_version(void)
>  {
>  	fprintf(stderr, "%s %s\n", progname, VERSION);
> @@ -278,6 +297,8 @@ static void monitor(void)
>  {
>  	int ret;
>  
> +	syslog(LOG_INFO, "Starting\n");
> +
>  	while (1) {
>  		if (!is_cluster_up()) {
>  			sleep(CONFIG_POLL_IN_SECS);
> @@ -300,6 +321,45 @@ static void monitor(void)
>  	}
>  }
>  
> +/* Returns -1 if already running, 0 if not, 1 if unknown */
> +static int is_already_running(void)
> +{
> +	int ret;
> +
> +	sem = sem_open(sem_name, O_CREAT, 0644, 1);
> +	if (sem <= 0) {
> +		fprintf(stderr, "%s\n", strerror(errno));
> +		return 1;
> +	}
> +
> +	ret = sem_trywait(sem);
> +	if (ret) {
> +		if (errno == EAGAIN)
> +			return -1;
> +		return 1;
> +	}
> +
> +	sem_taken = 1;
> +
> +	return 0;
> +}
> +
> +static int setup_signals(void)
> +{
> +	int ret = 0;
> +
> +	struct sigaction act = { .sa_handler = handler };
> +
> +	sigemptyset(&act.sa_mask);
> +	ret = sigaction(SIGTERM, &act, NULL);
> +	ret += sigaction(SIGINT, &act, NULL);
> +	ret += sigaction(SIGHUP, &act, NULL);
> +	ret += sigaction(SIGQUIT, &act, NULL);
> +	ret += sigaction(SIGSEGV, &act, NULL);

    Just wondering here how it would behave against 'SIGKILL', in that 
case, we won't be able to do cleanup for semaphore,

and next run of instance may still gets kind of such error:'sorry, we've 
already got one instance running...'

> +
> +	return ret;
> +}
> +
>  static void usage(void)
>  {
>  	fprintf(stderr, "usage: %s [-w percent] -[ivV]\n", progname);
> @@ -320,6 +380,9 @@ int main(int argc, char **argv)
>  	warn_threshold_percent = WARN_THRESHOLD_PERCENT;
>  	verbose = 0;
>  	cluster_name = NULL;
> +	sem = NULL;
> +	sem_taken = 0;
> +	snprintf(sem_name, sizeof(sem_name), "/%s", progname);
>  
>  	while (1) {
>  		c = getopt(argc, argv, "w:i?hvV");
> @@ -352,6 +415,27 @@ int main(int argc, char **argv)
>  	if (version)
>  		show_version();
>  
> +	ret = setup_signals();
> +	if (ret) {
> +		fprintf(stderr, "Unable to set up signal handlers. %s. "
> +			"Aborting.\n", strerror(errno));
> +		goto bail;
> +	}
> +
> +	ret = is_already_running();
> +	switch (ret) {
> +	case -1:
> +		fprintf(stderr, "Another instance of %s is already running. "
> +			"Aborting.\n", progname);
> +		goto bail;
> +	case 1:
> +		fprintf(stderr, "Unable to determine if %s is already "
> +			"running. Starting a new instance.\n", progname);
> +	case 0:
> +	default:
> +		break;
> +	}
> +
>  	if (!interactive) {
>  		ret = daemon(0, verbose);
>  		if (ret)
> @@ -363,5 +447,6 @@ int main(int argc, char **argv)
>  	monitor();
>  	closelog();
>  
> +bail:
>  	return 0;
>  }




More information about the Ocfs2-tools-devel mailing list