[Ocfs2-tools-devel] [PATCH] o2hbmonitor: add semaphore

Sunil Mushran sunil.mushran at oracle.com
Mon Jan 3 13:41:07 PST 2011


comments inlined.

On 01/03/2011 10:48 AM, Srinivas Eeda wrote:
> add semaphore to limit to one o2hbmonitor per node
>
> Signed-off-by: Srinivas Eeda<srinivas.eeda at oracle.com>
> ---
>   o2monitor/o2hbmonitor.c |   63 +++++++++++++++++++++++++++++++++++++++++++++++
>   1 files changed, 63 insertions(+), 0 deletions(-)
>
> diff --git a/o2monitor/o2hbmonitor.c b/o2monitor/o2hbmonitor.c
> index 5bb5590..4b83126 100644
> --- a/o2monitor/o2hbmonitor.c
> +++ b/o2monitor/o2hbmonitor.c
> @@ -43,6 +43,8 @@
>   #include<libgen.h>
>   #include<syslog.h>
>   #include<errno.h>
> +#include<sys/ipc.h>
> +#include<sys/sem.h>
>
>   #define SYS_CONFIG_DIR			"/sys/kernel/config"
>   #define O2HB_CLUSTER_DIR		SYS_CONFIG_DIR"/cluster"
> @@ -61,6 +63,8 @@
>   #define SLOW_POLL_IN_SECS		10
>   #define FAST_POLL_IN_SECS		2
>
> +#define O2HB_SEM_MAGIC_KEY		0x6F326862
> +
>   char *progname;
>   int interactive;
>   int warn_threshold_percent;
> @@ -300,6 +304,53 @@ static void monitor(void)
>   	}
>   }
>
> +static int islocked(void)
> +{
> +	int semid;
> +	struct sembuf trylock[1] = {
> +		{.sem_num = 0, .sem_op = 0, .sem_flg = SEM_UNDO|IPC_NOWAIT},
> +	};
> +
> +	semid = semget(O2HB_SEM_MAGIC_KEY, 1, 0);
> +	if (semid<  0)
> +		return 0;
> +	if (semop(semid, trylock, 1)<  0)
> +		return 1;
> +	return 0;
> +}
> +
> +static int getlock(void)
> +{
> +	int ret, semid, vals[1] = { 0 };
> +	struct sembuf trylock[2] = {
> +		{.sem_num = 0, .sem_op = 0, .sem_flg = SEM_UNDO|IPC_NOWAIT},
> +		{.sem_num = 0, .sem_op = 1, .sem_flg = SEM_UNDO|IPC_NOWAIT},
> +	};
> +
> +	semid = semget(O2HB_SEM_MAGIC_KEY, 1, 0);
> +	if (semid<  0) {
> +		semid = semget(O2HB_SEM_MAGIC_KEY, 1,
> +			       IPC_CREAT|IPC_EXCL|S_IRUSR);
> +		if (semid<  0)
> +			goto out;
> +		semctl(semid, 0, SETALL, vals);
> +		if (semop(semid, trylock, 2)<  0)
> +			goto out;
> +		else
> +			return 0;
> +	}
> +	if (semop(semid, trylock, 2)<  0)
> +		goto out;
> +	return 0;
> +out:
> +	ret = errno;
> +	if (ret != EAGAIN) {
> +		fprintf(stderr, "o2hbmonitor failed to start\n");
> +		syslog(LOG_WARNING, "o2hbmonitor failed to start\n");

There are three conditions.
1. Get the lock successfully.
2. Unable to get the lock.
3. Error obtaining the lock.

We should continue to monitor in cases 1 and 3. We abort only in 2.

Also, use progname in the message and only syslog the message.

> +	}
> +	return ret;
> +}
> +
>   static void usage(void)
>   {
>   	fprintf(stderr, "usage: %s [-w percent] -[ivV]\n", progname);
> @@ -352,6 +403,12 @@ int main(int argc, char **argv)
>   	if (version)
>   		show_version();
>
> +	if (islocked()) {
> +		fprintf(stderr, "o2monitor already running\n");

+               fprintf(stderr, "Another instance of %s is already running. "
+                       "Aborting.\n", progname);


> +		return 1;
> +	}
> +	fprintf(stdout, "starting o2monitor daemon\n");

Starting daemon message is not useful because it can still abort.

> +
>   	if (!interactive) {
>   		ret = daemon(0, verbose);
>   		if (ret)
> @@ -360,6 +417,12 @@ int main(int argc, char **argv)
>   	}
>
>   	openlog(progname, LOG_CONS|LOG_NDELAY, LOG_DAEMON);
> +	ret = getlock();
> +	if (ret) {
> +		closelog();
> +		return ret;
> +	}
> +

Add this at the top of monitor().
+       syslog(LOG_INFO, "Starting\n");

>   	monitor();
>   	closelog();
>




More information about the Ocfs2-tools-devel mailing list