[Ocfs2-tools-devel] [PATCH 2/2] o2hbmonitor: Limit number of active instances
Tristan Ye
tristan.ye at oracle.com
Wed Dec 8 17:25:12 PST 2010
Sunil Mushran wrote:
> Tristan,
>
> Did you get a chance to play with this. I am wondering if I can get
> any confirmation whether this works for them or not. It is working
> on my box. kill -9 is trapped and the named semaphore is cleaned up.
> Wondering if someone else is seeing the same or not.
Sure,
Let me taste your dishes;-)
>
> Sunil
>
> On 12/02/2010 07:15 PM, Tristan Ye wrote:
>> Sunil Mushran wrote:
>>> Patch attempts to disallow multiple instances of o2hbmonitor
>>> running at the same time.
>>>
>>> Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com>
>>> ---
>>> o2monitor/Makefile | 2 +-
>>> o2monitor/o2hbmonitor.c | 85
>>> +++++++++++++++++++++++++++++++++++++++++++++++
>>> 2 files changed, 86 insertions(+), 1 deletions(-)
>>>
>>> diff --git a/o2monitor/Makefile b/o2monitor/Makefile
>>> index 961eafa..6392b96 100644
>>> --- a/o2monitor/Makefile
>>> +++ b/o2monitor/Makefile
>>> @@ -21,6 +21,6 @@ OBJS = $(subst .c,.o,$(CFILES))
>>> DIST_FILES = $(CFILES) $(HFILES)
>>>
>>> o2hbmonitor: $(OBJS)
>>> - $(LINK)
>>> + $(LINK) -lrt
>>>
>>> include $(TOPDIR)/Postamble.make
>>> diff --git a/o2monitor/o2hbmonitor.c b/o2monitor/o2hbmonitor.c
>>> index 58e9280..44acb3a 100644
>>> --- a/o2monitor/o2hbmonitor.c
>>> +++ b/o2monitor/o2hbmonitor.c
>>> @@ -43,6 +43,9 @@
>>> #include <libgen.h>
>>> #include <syslog.h>
>>> #include <errno.h>
>>> +#include <sys/ipc.h>
>>> +#include <semaphore.h>
>>> +#include <signal.h>
>>>
>>> #define SYS_CONFIG_DIR "/sys/kernel/config"
>>> #define O2HB_CLUSTER_DIR SYS_CONFIG_DIR"/cluster"
>>> @@ -71,6 +74,22 @@ unsigned long dead_threshold_in_ms;
>>> unsigned long warn_threshold_in_ms;
>>> unsigned long poll_in_secs;
>>>
>>> +sem_t *sem;
>>> +char sem_name[NAME_MAX - 4];
>>> +int sem_taken;
>>> +
>>> +static void handler(int sig)
>>> +{
>>> + if (sem_taken) {
>>> + sem_unlink(sem_name);
>>> + sem_post(sem);
>>> + sem_close(sem);
>>> + }
>>> +
>>> + syslog(LOG_INFO, "Exiting\n");
>>> + exit(0);
>>> +}
>>> +
>>> static void show_version(void)
>>> {
>>> fprintf(stderr, "%s %s\n", progname, VERSION);
>>> @@ -278,6 +297,8 @@ static void monitor(void)
>>> {
>>> int ret;
>>>
>>> + syslog(LOG_INFO, "Starting\n");
>>> +
>>> while (1) {
>>> if (!is_cluster_up()) {
>>> sleep(CONFIG_POLL_IN_SECS);
>>> @@ -300,6 +321,45 @@ static void monitor(void)
>>> }
>>> }
>>>
>>> +/* Returns -1 if already running, 0 if not, 1 if unknown */
>>> +static int is_already_running(void)
>>> +{
>>> + int ret;
>>> +
>>> + sem = sem_open(sem_name, O_CREAT, 0644, 1);
>>> + if (sem <= 0) {
>>> + fprintf(stderr, "%s\n", strerror(errno));
>>> + return 1;
>>> + }
>>> +
>>> + ret = sem_trywait(sem);
>>> + if (ret) {
>>> + if (errno == EAGAIN)
>>> + return -1;
>>> + return 1;
>>> + }
>>> +
>>> + sem_taken = 1;
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static int setup_signals(void)
>>> +{
>>> + int ret = 0;
>>> +
>>> + struct sigaction act = { .sa_handler = handler };
>>> +
>>> + sigemptyset(&act.sa_mask);
>>> + ret = sigaction(SIGTERM, &act, NULL);
>>> + ret += sigaction(SIGINT, &act, NULL);
>>> + ret += sigaction(SIGHUP, &act, NULL);
>>> + ret += sigaction(SIGQUIT, &act, NULL);
>>> + ret += sigaction(SIGSEGV, &act, NULL);
>>
>> Just wondering here how it would behave against 'SIGKILL', in that
>> case, we won't be able to do cleanup for semaphore,
>>
>> and next run of instance may still gets kind of such error:'sorry,
>> we've already got one instance running...'
>>
>>> +
>>> + return ret;
>>> +}
>>> +
>>> static void usage(void)
>>> {
>>> fprintf(stderr, "usage: %s [-w percent] -[ivV]\n", progname);
>>> @@ -320,6 +380,9 @@ int main(int argc, char **argv)
>>> warn_threshold_percent = WARN_THRESHOLD_PERCENT;
>>> verbose = 0;
>>> cluster_name = NULL;
>>> + sem = NULL;
>>> + sem_taken = 0;
>>> + snprintf(sem_name, sizeof(sem_name), "/%s", progname);
>>>
>>> while (1) {
>>> c = getopt(argc, argv, "w:i?hvV");
>>> @@ -352,6 +415,27 @@ int main(int argc, char **argv)
>>> if (version)
>>> show_version();
>>>
>>> + ret = setup_signals();
>>> + if (ret) {
>>> + fprintf(stderr, "Unable to set up signal handlers. %s. "
>>> + "Aborting.\n", strerror(errno));
>>> + goto bail;
>>> + }
>>> +
>>> + ret = is_already_running();
>>> + switch (ret) {
>>> + case -1:
>>> + fprintf(stderr, "Another instance of %s is already running. "
>>> + "Aborting.\n", progname);
>>> + goto bail;
>>> + case 1:
>>> + fprintf(stderr, "Unable to determine if %s is already "
>>> + "running. Starting a new instance.\n", progname);
>>> + case 0:
>>> + default:
>>> + break;
>>> + }
>>> +
>>> if (!interactive) {
>>> ret = daemon(0, verbose);
>>> if (ret)
>>> @@ -363,5 +447,6 @@ int main(int argc, char **argv)
>>> monitor();
>>> closelog();
>>>
>>> +bail:
>>> return 0;
>>> }
>>
>
More information about the Ocfs2-tools-devel
mailing list