[Ocfs2-tools-devel] [PATCH 2/2] o2hbmonitor: Limit number of active instances
Sunil Mushran
sunil.mushran at oracle.com
Wed Dec 8 12:27:18 PST 2010
Tristan,
Did you get a chance to play with this. I am wondering if I can get
any confirmation whether this works for them or not. It is working
on my box. kill -9 is trapped and the named semaphore is cleaned up.
Wondering if someone else is seeing the same or not.
Sunil
On 12/02/2010 07:15 PM, Tristan Ye wrote:
> Sunil Mushran wrote:
>> Patch attempts to disallow multiple instances of o2hbmonitor
>> running at the same time.
>>
>> Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com>
>> ---
>> o2monitor/Makefile | 2 +-
>> o2monitor/o2hbmonitor.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 86 insertions(+), 1 deletions(-)
>>
>> diff --git a/o2monitor/Makefile b/o2monitor/Makefile
>> index 961eafa..6392b96 100644
>> --- a/o2monitor/Makefile
>> +++ b/o2monitor/Makefile
>> @@ -21,6 +21,6 @@ OBJS = $(subst .c,.o,$(CFILES))
>> DIST_FILES = $(CFILES) $(HFILES)
>>
>> o2hbmonitor: $(OBJS)
>> - $(LINK)
>> + $(LINK) -lrt
>>
>> include $(TOPDIR)/Postamble.make
>> diff --git a/o2monitor/o2hbmonitor.c b/o2monitor/o2hbmonitor.c
>> index 58e9280..44acb3a 100644
>> --- a/o2monitor/o2hbmonitor.c
>> +++ b/o2monitor/o2hbmonitor.c
>> @@ -43,6 +43,9 @@
>> #include <libgen.h>
>> #include <syslog.h>
>> #include <errno.h>
>> +#include <sys/ipc.h>
>> +#include <semaphore.h>
>> +#include <signal.h>
>>
>> #define SYS_CONFIG_DIR "/sys/kernel/config"
>> #define O2HB_CLUSTER_DIR SYS_CONFIG_DIR"/cluster"
>> @@ -71,6 +74,22 @@ unsigned long dead_threshold_in_ms;
>> unsigned long warn_threshold_in_ms;
>> unsigned long poll_in_secs;
>>
>> +sem_t *sem;
>> +char sem_name[NAME_MAX - 4];
>> +int sem_taken;
>> +
>> +static void handler(int sig)
>> +{
>> + if (sem_taken) {
>> + sem_unlink(sem_name);
>> + sem_post(sem);
>> + sem_close(sem);
>> + }
>> +
>> + syslog(LOG_INFO, "Exiting\n");
>> + exit(0);
>> +}
>> +
>> static void show_version(void)
>> {
>> fprintf(stderr, "%s %s\n", progname, VERSION);
>> @@ -278,6 +297,8 @@ static void monitor(void)
>> {
>> int ret;
>>
>> + syslog(LOG_INFO, "Starting\n");
>> +
>> while (1) {
>> if (!is_cluster_up()) {
>> sleep(CONFIG_POLL_IN_SECS);
>> @@ -300,6 +321,45 @@ static void monitor(void)
>> }
>> }
>>
>> +/* Returns -1 if already running, 0 if not, 1 if unknown */
>> +static int is_already_running(void)
>> +{
>> + int ret;
>> +
>> + sem = sem_open(sem_name, O_CREAT, 0644, 1);
>> + if (sem <= 0) {
>> + fprintf(stderr, "%s\n", strerror(errno));
>> + return 1;
>> + }
>> +
>> + ret = sem_trywait(sem);
>> + if (ret) {
>> + if (errno == EAGAIN)
>> + return -1;
>> + return 1;
>> + }
>> +
>> + sem_taken = 1;
>> +
>> + return 0;
>> +}
>> +
>> +static int setup_signals(void)
>> +{
>> + int ret = 0;
>> +
>> + struct sigaction act = { .sa_handler = handler };
>> +
>> + sigemptyset(&act.sa_mask);
>> + ret = sigaction(SIGTERM, &act, NULL);
>> + ret += sigaction(SIGINT, &act, NULL);
>> + ret += sigaction(SIGHUP, &act, NULL);
>> + ret += sigaction(SIGQUIT, &act, NULL);
>> + ret += sigaction(SIGSEGV, &act, NULL);
>
> Just wondering here how it would behave against 'SIGKILL', in that case, we won't be able to do cleanup for semaphore,
>
> and next run of instance may still gets kind of such error:'sorry, we've already got one instance running...'
>
>> +
>> + return ret;
>> +}
>> +
>> static void usage(void)
>> {
>> fprintf(stderr, "usage: %s [-w percent] -[ivV]\n", progname);
>> @@ -320,6 +380,9 @@ int main(int argc, char **argv)
>> warn_threshold_percent = WARN_THRESHOLD_PERCENT;
>> verbose = 0;
>> cluster_name = NULL;
>> + sem = NULL;
>> + sem_taken = 0;
>> + snprintf(sem_name, sizeof(sem_name), "/%s", progname);
>>
>> while (1) {
>> c = getopt(argc, argv, "w:i?hvV");
>> @@ -352,6 +415,27 @@ int main(int argc, char **argv)
>> if (version)
>> show_version();
>>
>> + ret = setup_signals();
>> + if (ret) {
>> + fprintf(stderr, "Unable to set up signal handlers. %s. "
>> + "Aborting.\n", strerror(errno));
>> + goto bail;
>> + }
>> +
>> + ret = is_already_running();
>> + switch (ret) {
>> + case -1:
>> + fprintf(stderr, "Another instance of %s is already running. "
>> + "Aborting.\n", progname);
>> + goto bail;
>> + case 1:
>> + fprintf(stderr, "Unable to determine if %s is already "
>> + "running. Starting a new instance.\n", progname);
>> + case 0:
>> + default:
>> + break;
>> + }
>> +
>> if (!interactive) {
>> ret = daemon(0, verbose);
>> if (ret)
>> @@ -363,5 +447,6 @@ int main(int argc, char **argv)
>> monitor();
>> closelog();
>>
>> +bail:
>> return 0;
>> }
>
More information about the Ocfs2-tools-devel
mailing list