[Ocfs2-tools-devel] [PATCH 9/9] ocfs2_controld: Notify dlm_controld when we have a node go down.
Joel Becker
joel.becker at oracle.com
Wed Aug 13 17:15:39 PDT 2008
ocfs2 must start recovery before the dlm completes its recovery.
Otherwise, the dlm might give out locks that belonged to the dead node
before ocfs2 has had a chance to replay its journal.
libdlmcontrol implements this ordering. When ocfs2_controld sees a node
go down, it tells dlm_controld via dlmc_fs_notified(). dlm_controld
responds with success if it also has seen the node go down. It
responds nonzero otherwise. dlm_controld knows to wait until it has
seen the notification from ocfs2_controld.
The ocfs2_controld implementation is simple. It first notifies ocfs2,
then sends the notification to dlm_controld. ocfs2_controld will keep
resending it while dlm_controld responds in the negative. When
dlm_controld finally gets the node down event, ocfs2_controld will stop
sending. We keep such a simple busy-looping scheme because the
back-and-forth traffic actually means context switches and process
sleeping.
Signed-off-by: Joel Becker <joel.becker at oracle.com>
---
ocfs2_controld/dlmcontrol.c | 122 ++++++++++++++++++++++++++++++++++++++-
ocfs2_controld/mount.c | 2 +
ocfs2_controld/ocfs2_controld.h | 1 +
3 files changed, 123 insertions(+), 2 deletions(-)
diff --git a/ocfs2_controld/dlmcontrol.c b/ocfs2_controld/dlmcontrol.c
index 889f138..bc49853 100644
--- a/ocfs2_controld/dlmcontrol.c
+++ b/ocfs2_controld/dlmcontrol.c
@@ -34,10 +34,17 @@
struct dlmcontrol_fs {
struct list_head df_list;
char df_name[DLM_LOCKSPACE_LEN+1];
+ struct list_head df_notifications;
void (*df_result)(int status, void *user_data);
void *df_user_data;
};
+/* Keeps track of pending node-down notifications */
+struct dlmcontrol_notification {
+ struct list_head dn_list;
+ int dn_nodeid;
+};
+
static int dlmcontrol_ci; /* Client number in the main loop */
static int dlmcontrol_fd = -1; /* fd for dlm_controld connection */
@@ -61,6 +68,7 @@ int dlmcontrol_register(const char *name,
}
snprintf(df->df_name, DLM_LOCKSPACE_LEN+1, "%s", name);
+ INIT_LIST_HEAD(&df->df_notifications);
df->df_result = result_func;
df->df_user_data = user_data;
@@ -101,6 +109,47 @@ static struct dlmcontrol_fs *find_fs(const char *name)
return NULL;
}
+static struct dlmcontrol_notification *find_notification(struct dlmcontrol_fs *df,
+ int nodeid)
+{
+ struct list_head *pos;
+ struct dlmcontrol_notification *dn;
+
+ list_for_each(pos, &df->df_notifications) {
+ dn = list_entry(pos, struct dlmcontrol_notification,
+ dn_list);
+ if (dn->dn_nodeid == nodeid)
+ return dn;
+ }
+
+ return NULL;
+}
+
+static void complete_notification(struct dlmcontrol_fs *df, int nodeid)
+{
+ struct dlmcontrol_notification *dn;
+
+ dn = find_notification(df, nodeid);
+ if (dn) {
+ log_debug("Completing notification on \"%s\" for node %d\n",
+ df->df_name, nodeid);
+ list_del(&dn->dn_list);
+ free(dn);
+ }
+}
+
+static void complete_all_notifications(struct dlmcontrol_fs *df)
+{
+ struct list_head *pos, *n;
+ struct dlmcontrol_notification *dn;
+
+ list_for_each_safe(pos, n, &df->df_notifications) {
+ dn = list_entry(pos, struct dlmcontrol_notification,
+ dn_list);
+ complete_notification(df, dn->dn_nodeid);
+ }
+}
+
int dlmcontrol_unregister(const char *name)
{
int rc;
@@ -120,6 +169,8 @@ int dlmcontrol_unregister(const char *name)
* the face of errors.
*/
log_debug("Unregistering \"%s\" from dlm_controld", name);
+
+ complete_all_notifications(df);
rc = dlmc_fs_unregister(dlmcontrol_fd, df->df_name);
if (rc) {
rc = -errno;
@@ -146,6 +197,72 @@ static void dlmcontrol_unregister_all(void)
}
}
+/*
+ * This is a fire and forget function. It will queue the notification and
+ * send it to dlm_controld. If dlm_controld responds in the negative, this
+ * function is called again to send. This goes on until dlm_controld sees
+ * the node go down.
+ *
+ * We don't sleep because the process switching should be good enough.
+ *
+ * If there is any error, malloc/network/whatever, we kill the daemon. We
+ * can't continue safely if we're not interacting with dlm_controld.
+ */
+void dlmcontrol_node_down(const char *name, int nodeid)
+{
+ int rc;
+ struct dlmcontrol_fs *df;
+ struct dlmcontrol_notification *dn;
+
+ df = find_fs(name);
+ if (!df) {
+ log_error("Name \"%s\" is unknown", name);
+ return;
+ }
+
+ dn = find_notification(df, nodeid);
+ if (!dn) {
+ dn = malloc(sizeof(struct dlmcontrol_notification));
+ if (!dn) {
+ log_error("Unable to allocate memory");
+ goto fail;
+ }
+ dn->dn_nodeid = nodeid;
+ list_add(&dn->dn_list, &df->df_notifications);
+ }
+
+ log_debug("Sending notification of node %d for \"%s\"",
+ dn->dn_nodeid, df->df_name);
+ rc = dlmc_fs_notified(dlmcontrol_fd, df->df_name, dn->dn_nodeid);
+ if (rc) {
+ log_error("Unable to send notification for node %d on "
+ "\"%s\": %s",
+ dn->dn_nodeid, df->df_name, strerror(errno));
+ goto fail;
+ }
+
+ return;
+
+fail:
+ shutdown_daemon();
+}
+
+static void notify_result(struct dlmcontrol_fs *df, int nodeid, int status)
+{
+ if (!find_notification(df, nodeid)) {
+ log_error("Notified for nodeid %d on \"%s\", but we never asked for it!",
+ nodeid, df->df_name);
+ return;
+ }
+
+ if (!status) {
+ complete_notification(df, nodeid);
+ return;
+ }
+
+ dlmcontrol_node_down(df->df_name, nodeid);
+}
+
static void dead_dlmcontrol(int ci)
{
if (ci != dlmcontrol_ci) {
@@ -195,8 +312,9 @@ static void process_dlmcontrol(int ci)
break;
case DLMC_RESULT_NOTIFIED:
- log_debug("Notified for \"%s\"", name);
- /* XXX: handle */
+ log_debug("Notified for \"%s\", node %d, status %d",
+ name, nodeid, status);
+ notify_result(df, nodeid, status);
break;
default:
diff --git a/ocfs2_controld/mount.c b/ocfs2_controld/mount.c
index c4ce5b4..3accd28 100644
--- a/ocfs2_controld/mount.c
+++ b/ocfs2_controld/mount.c
@@ -422,6 +422,8 @@ static void mount_node_down(int nodeid, void *user_data)
if (err)
log_debug("%s while trying to send DOWN message",
error_message(err));
+
+ dlmcontrol_node_down(mg->mg_uuid, nodeid);
}
static void force_node_down(int nodeid, void *user_data)
diff --git a/ocfs2_controld/ocfs2_controld.h b/ocfs2_controld/ocfs2_controld.h
index 4296639..8243513 100644
--- a/ocfs2_controld/ocfs2_controld.h
+++ b/ocfs2_controld/ocfs2_controld.h
@@ -104,6 +104,7 @@ int dlmcontrol_register(const char *name,
void (*result_func)(int status, void *user_data),
void *user_data);
int dlmcontrol_unregister(const char *name);
+void dlmcontrol_node_down(const char *name, int nodeid);
/* mount.c */
void init_mounts(void);
--
1.5.6.3
More information about the Ocfs2-tools-devel
mailing list