[Ocfs2-tools-devel] [PATCH 2/2] o2hbmonitor: Limit number of active instances

Sunil Mushran sunil.mushran at oracle.com
Wed Dec 8 12:27:18 PST 2010


Tristan,

Did you get a chance to play with this. I am wondering if I can get
any confirmation whether this works for them or not. It is working
on my box. kill -9 is trapped and the named semaphore is cleaned up.
Wondering if someone else is seeing the same or not.

Sunil

On 12/02/2010 07:15 PM, Tristan Ye wrote:
> Sunil Mushran wrote:
>> Patch attempts to disallow multiple instances of o2hbmonitor
>> running at the same time.
>>
>> Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com>
>> ---
>>  o2monitor/Makefile      |    2 +-
>>  o2monitor/o2hbmonitor.c |   85 +++++++++++++++++++++++++++++++++++++++++++++++
>>  2 files changed, 86 insertions(+), 1 deletions(-)
>>
>> diff --git a/o2monitor/Makefile b/o2monitor/Makefile
>> index 961eafa..6392b96 100644
>> --- a/o2monitor/Makefile
>> +++ b/o2monitor/Makefile
>> @@ -21,6 +21,6 @@ OBJS = $(subst .c,.o,$(CFILES))
>>  DIST_FILES = $(CFILES) $(HFILES)
>>
>>  o2hbmonitor: $(OBJS)
>> -    $(LINK)
>> +    $(LINK) -lrt
>>
>>  include $(TOPDIR)/Postamble.make
>> diff --git a/o2monitor/o2hbmonitor.c b/o2monitor/o2hbmonitor.c
>> index 58e9280..44acb3a 100644
>> --- a/o2monitor/o2hbmonitor.c
>> +++ b/o2monitor/o2hbmonitor.c
>> @@ -43,6 +43,9 @@
>>  #include <libgen.h>
>>  #include <syslog.h>
>>  #include <errno.h>
>> +#include <sys/ipc.h>
>> +#include <semaphore.h>
>> +#include <signal.h>
>>
>>  #define SYS_CONFIG_DIR            "/sys/kernel/config"
>>  #define O2HB_CLUSTER_DIR        SYS_CONFIG_DIR"/cluster"
>> @@ -71,6 +74,22 @@ unsigned long dead_threshold_in_ms;
>>  unsigned long warn_threshold_in_ms;
>>  unsigned long poll_in_secs;
>>
>> +sem_t *sem;
>> +char sem_name[NAME_MAX - 4];
>> +int sem_taken;
>> +
>> +static void handler(int sig)
>> +{
>> +    if (sem_taken) {
>> +        sem_unlink(sem_name);
>> +        sem_post(sem);
>> +        sem_close(sem);
>> +    }
>> +
>> +    syslog(LOG_INFO, "Exiting\n");
>> +    exit(0);
>> +}
>> +
>>  static void show_version(void)
>>  {
>>      fprintf(stderr, "%s %s\n", progname, VERSION);
>> @@ -278,6 +297,8 @@ static void monitor(void)
>>  {
>>      int ret;
>>
>> +    syslog(LOG_INFO, "Starting\n");
>> +
>>      while (1) {
>>          if (!is_cluster_up()) {
>>              sleep(CONFIG_POLL_IN_SECS);
>> @@ -300,6 +321,45 @@ static void monitor(void)
>>      }
>>  }
>>
>> +/* Returns -1 if already running, 0 if not, 1 if unknown */
>> +static int is_already_running(void)
>> +{
>> +    int ret;
>> +
>> +    sem = sem_open(sem_name, O_CREAT, 0644, 1);
>> +    if (sem <= 0) {
>> +        fprintf(stderr, "%s\n", strerror(errno));
>> +        return 1;
>> +    }
>> +
>> +    ret = sem_trywait(sem);
>> +    if (ret) {
>> +        if (errno == EAGAIN)
>> +            return -1;
>> +        return 1;
>> +    }
>> +
>> +    sem_taken = 1;
>> +
>> +    return 0;
>> +}
>> +
>> +static int setup_signals(void)
>> +{
>> +    int ret = 0;
>> +
>> +    struct sigaction act = { .sa_handler = handler };
>> +
>> +    sigemptyset(&act.sa_mask);
>> +    ret = sigaction(SIGTERM, &act, NULL);
>> +    ret += sigaction(SIGINT, &act, NULL);
>> +    ret += sigaction(SIGHUP, &act, NULL);
>> +    ret += sigaction(SIGQUIT, &act, NULL);
>> +    ret += sigaction(SIGSEGV, &act, NULL);
>
>    Just wondering here how it would behave against 'SIGKILL', in that case, we won't be able to do cleanup for semaphore,
>
> and next run of instance may still gets kind of such error:'sorry, we've already got one instance running...'
>
>> +
>> +    return ret;
>> +}
>> +
>>  static void usage(void)
>>  {
>>      fprintf(stderr, "usage: %s [-w percent] -[ivV]\n", progname);
>> @@ -320,6 +380,9 @@ int main(int argc, char **argv)
>>      warn_threshold_percent = WARN_THRESHOLD_PERCENT;
>>      verbose = 0;
>>      cluster_name = NULL;
>> +    sem = NULL;
>> +    sem_taken = 0;
>> +    snprintf(sem_name, sizeof(sem_name), "/%s", progname);
>>
>>      while (1) {
>>          c = getopt(argc, argv, "w:i?hvV");
>> @@ -352,6 +415,27 @@ int main(int argc, char **argv)
>>      if (version)
>>          show_version();
>>
>> +    ret = setup_signals();
>> +    if (ret) {
>> +        fprintf(stderr, "Unable to set up signal handlers. %s. "
>> +            "Aborting.\n", strerror(errno));
>> +        goto bail;
>> +    }
>> +
>> +    ret = is_already_running();
>> +    switch (ret) {
>> +    case -1:
>> +        fprintf(stderr, "Another instance of %s is already running. "
>> +            "Aborting.\n", progname);
>> +        goto bail;
>> +    case 1:
>> +        fprintf(stderr, "Unable to determine if %s is already "
>> +            "running. Starting a new instance.\n", progname);
>> +    case 0:
>> +    default:
>> +        break;
>> +    }
>> +
>>      if (!interactive) {
>>          ret = daemon(0, verbose);
>>          if (ret)
>> @@ -363,5 +447,6 @@ int main(int argc, char **argv)
>>      monitor();
>>      closelog();
>>
>> +bail:
>>      return 0;
>>  }
>




More information about the Ocfs2-tools-devel mailing list