[Ocfs2-tools-devel] [PATCH 2/2] o2hbmonitor: Limit number of active instances

Tristan Ye tristan.ye at oracle.com
Wed Dec 8 17:25:12 PST 2010


Sunil Mushran wrote:
> Tristan,
>
> Did you get a chance to play with this. I am wondering if I can get
> any confirmation whether this works for them or not. It is working
> on my box. kill -9 is trapped and the named semaphore is cleaned up.
> Wondering if someone else is seeing the same or not.

Sure,

    Let me taste your dishes;-)



>
> Sunil
>
> On 12/02/2010 07:15 PM, Tristan Ye wrote:
>> Sunil Mushran wrote:
>>> Patch attempts to disallow multiple instances of o2hbmonitor
>>> running at the same time.
>>>
>>> Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com>
>>> ---
>>>  o2monitor/Makefile      |    2 +-
>>>  o2monitor/o2hbmonitor.c |   85 
>>> +++++++++++++++++++++++++++++++++++++++++++++++
>>>  2 files changed, 86 insertions(+), 1 deletions(-)
>>>
>>> diff --git a/o2monitor/Makefile b/o2monitor/Makefile
>>> index 961eafa..6392b96 100644
>>> --- a/o2monitor/Makefile
>>> +++ b/o2monitor/Makefile
>>> @@ -21,6 +21,6 @@ OBJS = $(subst .c,.o,$(CFILES))
>>>  DIST_FILES = $(CFILES) $(HFILES)
>>>
>>>  o2hbmonitor: $(OBJS)
>>> -    $(LINK)
>>> +    $(LINK) -lrt
>>>
>>>  include $(TOPDIR)/Postamble.make
>>> diff --git a/o2monitor/o2hbmonitor.c b/o2monitor/o2hbmonitor.c
>>> index 58e9280..44acb3a 100644
>>> --- a/o2monitor/o2hbmonitor.c
>>> +++ b/o2monitor/o2hbmonitor.c
>>> @@ -43,6 +43,9 @@
>>>  #include <libgen.h>
>>>  #include <syslog.h>
>>>  #include <errno.h>
>>> +#include <sys/ipc.h>
>>> +#include <semaphore.h>
>>> +#include <signal.h>
>>>
>>>  #define SYS_CONFIG_DIR            "/sys/kernel/config"
>>>  #define O2HB_CLUSTER_DIR        SYS_CONFIG_DIR"/cluster"
>>> @@ -71,6 +74,22 @@ unsigned long dead_threshold_in_ms;
>>>  unsigned long warn_threshold_in_ms;
>>>  unsigned long poll_in_secs;
>>>
>>> +sem_t *sem;
>>> +char sem_name[NAME_MAX - 4];
>>> +int sem_taken;
>>> +
>>> +static void handler(int sig)
>>> +{
>>> +    if (sem_taken) {
>>> +        sem_unlink(sem_name);
>>> +        sem_post(sem);
>>> +        sem_close(sem);
>>> +    }
>>> +
>>> +    syslog(LOG_INFO, "Exiting\n");
>>> +    exit(0);
>>> +}
>>> +
>>>  static void show_version(void)
>>>  {
>>>      fprintf(stderr, "%s %s\n", progname, VERSION);
>>> @@ -278,6 +297,8 @@ static void monitor(void)
>>>  {
>>>      int ret;
>>>
>>> +    syslog(LOG_INFO, "Starting\n");
>>> +
>>>      while (1) {
>>>          if (!is_cluster_up()) {
>>>              sleep(CONFIG_POLL_IN_SECS);
>>> @@ -300,6 +321,45 @@ static void monitor(void)
>>>      }
>>>  }
>>>
>>> +/* Returns -1 if already running, 0 if not, 1 if unknown */
>>> +static int is_already_running(void)
>>> +{
>>> +    int ret;
>>> +
>>> +    sem = sem_open(sem_name, O_CREAT, 0644, 1);
>>> +    if (sem <= 0) {
>>> +        fprintf(stderr, "%s\n", strerror(errno));
>>> +        return 1;
>>> +    }
>>> +
>>> +    ret = sem_trywait(sem);
>>> +    if (ret) {
>>> +        if (errno == EAGAIN)
>>> +            return -1;
>>> +        return 1;
>>> +    }
>>> +
>>> +    sem_taken = 1;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static int setup_signals(void)
>>> +{
>>> +    int ret = 0;
>>> +
>>> +    struct sigaction act = { .sa_handler = handler };
>>> +
>>> +    sigemptyset(&act.sa_mask);
>>> +    ret = sigaction(SIGTERM, &act, NULL);
>>> +    ret += sigaction(SIGINT, &act, NULL);
>>> +    ret += sigaction(SIGHUP, &act, NULL);
>>> +    ret += sigaction(SIGQUIT, &act, NULL);
>>> +    ret += sigaction(SIGSEGV, &act, NULL);
>>
>>    Just wondering here how it would behave against 'SIGKILL', in that 
>> case, we won't be able to do cleanup for semaphore,
>>
>> and next run of instance may still gets kind of such error:'sorry, 
>> we've already got one instance running...'
>>
>>> +
>>> +    return ret;
>>> +}
>>> +
>>>  static void usage(void)
>>>  {
>>>      fprintf(stderr, "usage: %s [-w percent] -[ivV]\n", progname);
>>> @@ -320,6 +380,9 @@ int main(int argc, char **argv)
>>>      warn_threshold_percent = WARN_THRESHOLD_PERCENT;
>>>      verbose = 0;
>>>      cluster_name = NULL;
>>> +    sem = NULL;
>>> +    sem_taken = 0;
>>> +    snprintf(sem_name, sizeof(sem_name), "/%s", progname);
>>>
>>>      while (1) {
>>>          c = getopt(argc, argv, "w:i?hvV");
>>> @@ -352,6 +415,27 @@ int main(int argc, char **argv)
>>>      if (version)
>>>          show_version();
>>>
>>> +    ret = setup_signals();
>>> +    if (ret) {
>>> +        fprintf(stderr, "Unable to set up signal handlers. %s. "
>>> +            "Aborting.\n", strerror(errno));
>>> +        goto bail;
>>> +    }
>>> +
>>> +    ret = is_already_running();
>>> +    switch (ret) {
>>> +    case -1:
>>> +        fprintf(stderr, "Another instance of %s is already running. "
>>> +            "Aborting.\n", progname);
>>> +        goto bail;
>>> +    case 1:
>>> +        fprintf(stderr, "Unable to determine if %s is already "
>>> +            "running. Starting a new instance.\n", progname);
>>> +    case 0:
>>> +    default:
>>> +        break;
>>> +    }
>>> +
>>>      if (!interactive) {
>>>          ret = daemon(0, verbose);
>>>          if (ret)
>>> @@ -363,5 +447,6 @@ int main(int argc, char **argv)
>>>      monitor();
>>>      closelog();
>>>
>>> +bail:
>>>      return 0;
>>>  }
>>
>




More information about the Ocfs2-tools-devel mailing list