[Ocfs2-tools-devel] [PATCH 37/39] ocfs2_controld: Open and close
checkpoints
Joel Becker
joel.becker at oracle.com
Fri Mar 14 16:53:00 PDT 2008
Open and close checkpoint handles.
Signed-off-by: Joel Becker <joel.becker at oracle.com>
---
ocfs2_controld/ckpt.c | 277 ++++++++++++++++++++++++++++++++++++++++++++++--
1 files changed, 265 insertions(+), 12 deletions(-)
diff --git a/ocfs2_controld/ckpt.c b/ocfs2_controld/ckpt.c
index a86fe62..8eb881b 100644
--- a/ocfs2_controld/ckpt.c
+++ b/ocfs2_controld/ckpt.c
@@ -28,11 +28,21 @@
#include "ocfs2_controld.h"
-/* Ought to be enough for most things */
-#define RETRY_TRIES 5
+/*
+ * A tentative retry is something we don't want to spend a lot of time on;
+ * it works or we error. A serious retry we really want to complete.
+ */
+#define TENTATIVE_RETRY_TRIES 2
+#define SERIOUS_RETRY_TRIES 5
+
+struct ckpt_handle {
+ SaNameT ch_name;
+ SaCkptCheckpointHandleT ch_handle;
+};
-static SaCkptHandleT handle;
+static SaCkptHandleT daemon_handle;
+struct ckpt_handle *global_handle;
/* This is the version OpenAIS supports */
static SaVersionT version = { 'B', 1, 1 };
@@ -42,6 +52,28 @@ static SaCkptCallbacksT callbacks = {
NULL,
};
+/*
+ * All of our checkpoints store 4K of data in 32 sections of 128bytes. We
+ * probably won't actually use more than one section of each checkpoint,
+ * but we spec them larger so that we can use space later compatibly. Note
+ * that data space is only allocated when needed, so if we store one section
+ * of 10 bytes, the checkpoint uses 10 bytes, not 4K.
+ *
+ * Retention time is 0 - when a daemon exits, it should disappear.
+ *
+ * Max section ID size is basically big enough to hold a uuid (32
+ * characters) plus something extra. We don't use uuids in section names
+ * yet, but just in case.
+ */
+static SaCkptCheckpointCreationAttributesT ckpt_attributes = {
+ .creationFlags = SA_CKPT_WR_ALL_REPLICAS,
+ .checkpointSize = 4096,
+ .retentionDuration = 0LL,
+ .maxSections = 32,
+ .maxSectionSize = 128,
+ .maxSectionIdSize = 40,
+};
+
static void ais_err_to_errno(SaAisErrorT error, int *rc, char **reason)
{
switch (error) {
@@ -77,12 +109,214 @@ static void ais_err_to_errno(SaAisErrorT error, int *rc, char **reason)
*rc = -EPROTOTYPE;
*reason = "Protocol not compatible";
break;
+ case SA_AIS_ERR_BAD_HANDLE:
+ *rc = -EINVAL;
+ *reason = "Bad Ckpt handle";
+ break;
+ case SA_AIS_ERR_INIT:
+ *rc = -ENODEV;
+ *reason = "Initialization not complete";
+ break;
+ case SA_AIS_ERR_NOT_EXIST:
+ *rc = -ENOENT;
+ *reason = "Object does not exist";
+ break;
+ case SA_AIS_ERR_EXIST:
+ *rc = -EEXIST;
+ *reason = "Object already exists";
+ break;
+ case SA_AIS_ERR_BAD_FLAGS:
+ *rc = -EINVAL;
+ *reason = "Invalid flags";
+ break;
+ case SA_AIS_ERR_ACCESS:
+ *rc = -EACCES;
+ *reason = "Permission denied";
+ break;
default:
*rc = -ENOSYS;
*reason = "Unknown error";
- log_error("Unknown error seen!");
+ log_error("Unknown error seen! (%d)", error);
+ break;
+ }
+}
+
+/*
+ * Our retention-time scheme of 0 means that we need to create any
+ * checkpoint we want to update. Nobody is writing to the same checkpoint
+ * at the same time.
+ */
+static int call_ckpt_open(struct ckpt_handle *handle, int write)
+{
+ int rc, retrycount;
+ char *reason;
+ SaAisErrorT error;
+ int flags = SA_CKPT_CHECKPOINT_READ;
+
+ if (write)
+ flags |= (SA_CKPT_CHECKPOINT_WRITE |
+ SA_CKPT_CHECKPOINT_CREATE);
+
+ for (retrycount = 0; retrycount < TENTATIVE_RETRY_TRIES; retrycount++) {
+ log_debug("Opening checkpoint \"%.*s\" (try %d)",
+ handle->ch_name.length, handle->ch_name.value,
+ retrycount + 1);
+ error = saCkptCheckpointOpen(daemon_handle,
+ &handle->ch_name,
+ write ? &ckpt_attributes : NULL,
+ flags, 0, &handle->ch_handle);
+ ais_err_to_errno(error, &rc, &reason);
+ if (!rc) {
+ log_debug("Opened checkpoint \"%.*s\" with handle 0x%llx",
+ handle->ch_name.length,
+ handle->ch_name.value,
+ handle->ch_handle);
+ break;
+ }
+ if ((rc != -EAGAIN) &&
+ (!write || (rc != -EEXIST))){
+ log_error("Unable to open checkpoint \"%.*s\": %s",
+ handle->ch_name.length,
+ handle->ch_name.value,
+ reason);
+ break;
+ }
+ if (write && (rc == -EEXIST))
+ log_debug("Checkpoint \"%.*s\" exists, retrying after delay",
+ handle->ch_name.length,
+ handle->ch_name.value);
+
+ if ((retrycount + 1) < TENTATIVE_RETRY_TRIES)
+ sleep(1);
+ else
+ log_error("Unable to open checkpoint \"%.*s\": "
+ "too many tries",
+ handle->ch_name.length,
+ handle->ch_name.value);
+ }
+
+ return rc;
+}
+
+static void call_ckpt_close(struct ckpt_handle *handle)
+{
+ int rc, retrycount;
+ char *reason;
+ SaAisErrorT error;
+
+ for (retrycount = 0; retrycount < TENTATIVE_RETRY_TRIES; retrycount++) {
+ log_debug("Closing checkpoint \"%.*s\" (try %d)",
+ handle->ch_name.length, handle->ch_name.value,
+ retrycount + 1);
+ error = saCkptCheckpointClose(handle->ch_handle);
+ ais_err_to_errno(error, &rc, &reason);
+ if (!rc) {
+ log_debug("Closed checkpoint \"%.*s\"",
+ handle->ch_name.length,
+ handle->ch_name.value);
break;
+ }
+ if (rc != -EAGAIN) {
+ log_error("Unable to close checkpoint \"%.*s\": %s",
+ handle->ch_name.length,
+ handle->ch_name.value,
+ reason);
+ break;
+ }
+ if ((retrycount + 1) < TENTATIVE_RETRY_TRIES)
+ sleep(1);
+ else
+ log_error("Unable to close checkpoint \"%.*s\": "
+ "too many tries",
+ handle->ch_name.length,
+ handle->ch_name.value);
+ }
+}
+
+/*
+ * We name our ckeckpoints in one of three ways, all prefixed with 'ocfs2:'.
+ *
+ * The global checkpoint is named 'ocfs2:controld'.
+ * The node info checkpoint is named 'ocfs2:controld:<8-hex-char-nodeid>'
+ * A mount checkpoint is named 'ocfs2:<uuid>:<8-hex-char-nodeid>'
+ */
+#define CKPT_PREFIX "ocfs2:"
+static int ckpt_new(const char *name, int write, struct ckpt_handle **handle)
+{
+ int rc;
+ size_t namelen = strlen(name) + strlen(CKPT_PREFIX);
+ struct ckpt_handle *h;
+
+ if (namelen > SA_MAX_NAME_LENGTH) {
+ log_error("Checkpoint name \"%s\" too long", name);
+ return -EINVAL;
+ }
+
+ h = malloc(sizeof(struct ckpt_handle));
+ if (!h) {
+ log_error("Unable to allocate checkpoint handle");
+ return -ENOMEM;
}
+
+ memset(h, 0, sizeof(struct ckpt_handle));
+ h->ch_name.length = snprintf((char *)(h->ch_name.value),
+ SA_MAX_NAME_LENGTH, "%s%s",
+ CKPT_PREFIX, name);
+
+ rc = call_ckpt_open(h, write);
+ if (!rc)
+ *handle = h;
+ else
+ free(h);
+
+ return rc;
+}
+
+static void ckpt_free(struct ckpt_handle *handle)
+{
+ if (handle->ch_handle)
+ call_ckpt_close(handle);
+
+ free(handle);
+}
+
+static int ckpt_open_global(int write)
+{
+ if (global_handle)
+ return 0;
+
+ return ckpt_new("controld", write, &global_handle);
+}
+
+static void ckpt_close_global(void)
+{
+ if (global_handle) {
+ ckpt_free(global_handle);
+ global_handle = NULL;
+ }
+}
+
+int ckpt_open_node(int nodeid, struct ckpt_handle **handle)
+{
+ char name[SA_MAX_NAME_LENGTH];
+
+ snprintf(name, SA_MAX_NAME_LENGTH, "controld:%08x", nodeid);
+
+ return ckpt_new(name, 0, handle);
+}
+
+int ckpt_open_this_node(struct ckpt_handle **handle)
+{
+ char name[SA_MAX_NAME_LENGTH];
+
+ snprintf(name, SA_MAX_NAME_LENGTH, "controld:%08x", our_nodeid);
+
+ return ckpt_new(name, 1, handle);
+}
+
+void ckpt_close(struct ckpt_handle *handle)
+{
+ ckpt_free(handle);
}
int setup_ckpt(void)
@@ -91,21 +325,22 @@ int setup_ckpt(void)
char *reason;
SaAisErrorT error;
- for (retrycount = 0; retrycount < RETRY_TRIES; retrycount++) {
+ for (retrycount = 0; retrycount < SERIOUS_RETRY_TRIES; retrycount++) {
log_debug("Initializing CKPT service (try %d)",
retrycount + 1);
- error = saCkptInitialize(&handle, &callbacks, &version);
+ error = saCkptInitialize(&daemon_handle, &callbacks,
+ &version);
ais_err_to_errno(error, &rc, &reason);
if (!rc) {
log_debug("Connected to CKPT service with handle 0x%llx",
- handle);
+ daemon_handle);
break;
}
if (rc != -EAGAIN) {
log_error("Unable to connect to CKPT: %s", reason);
break;
}
- if ((retrycount + 1) < RETRY_TRIES)
+ if ((retrycount + 1) < SERIOUS_RETRY_TRIES)
sleep(1);
else
log_error("Unable to connect to CKPT: too many tries");
@@ -120,13 +355,13 @@ void exit_ckpt(void)
char *reason;
SaAisErrorT error;
- if (!handle)
+ if (!daemon_handle)
return;
- for (retrycount = 0; retrycount < RETRY_TRIES; retrycount++) {
+ for (retrycount = 0; retrycount < SERIOUS_RETRY_TRIES; retrycount++) {
log_debug("Disconnecting from CKPT service (try %d)",
retrycount + 1);
- error = saCkptFinalize(handle);
+ error = saCkptFinalize(daemon_handle);
ais_err_to_errno(error, &rc, &reason);
if (!rc) {
log_debug("Disconnected from CKPT service");
@@ -137,7 +372,7 @@ void exit_ckpt(void)
reason);
break;
}
- if ((retrycount + 1) < RETRY_TRIES)
+ if ((retrycount + 1) < SERIOUS_RETRY_TRIES)
sleep(1);
else
log_error("Unable to disconnect from CKPT: too many tries");
@@ -163,14 +398,32 @@ void daemon_dump_save(void)
}
}
+int our_nodeid = 2;
int main(int argc, char *argv[])
{
int rc;
+ struct ckpt_handle *h;
rc = setup_ckpt();
if (rc)
goto out;
+ rc = ckpt_open_global(1);
+ if (rc)
+ goto out_exit;
+ ckpt_close_global();
+
+ rc = ckpt_open_this_node(&h);
+ if (rc)
+ goto out_exit;
+ ckpt_close(h);
+
+ rc = ckpt_open_node(4, &h);
+ if (rc)
+ goto out_exit;
+ ckpt_close(h);
+
+out_exit:
exit_ckpt();
out:
--
1.5.3.8
More information about the Ocfs2-tools-devel
mailing list