[Ocfs2-commits] mfasheh commits r1997 - branches/usysfsify/fs/ocfs2/dlm

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Thu Mar 17 16:22:08 CST 2005


Author: mfasheh
Date: 2005-03-17 16:22:07 -0600 (Thu, 17 Mar 2005)
New Revision: 1997

Modified:
   branches/usysfsify/fs/ocfs2/dlm/dlmast.c
   branches/usysfsify/fs/ocfs2/dlm/dlmconvert.c
   branches/usysfsify/fs/ocfs2/dlm/dlmlock.c
   branches/usysfsify/fs/ocfs2/dlm/dlmmaster.c
   branches/usysfsify/fs/ocfs2/dlm/dlmmod.c
   branches/usysfsify/fs/ocfs2/dlm/dlmmod.h
   branches/usysfsify/fs/ocfs2/dlm/dlmrecovery.c
   branches/usysfsify/fs/ocfs2/dlm/dlmunlock.c
Log:
* Merged 1992 from trunk
        - [1992] teach the dlm how to safely leave a domain.



Modified: branches/usysfsify/fs/ocfs2/dlm/dlmast.c
===================================================================
--- branches/usysfsify/fs/ocfs2/dlm/dlmast.c	2005-03-17 21:41:37 UTC (rev 1996)
+++ branches/usysfsify/fs/ocfs2/dlm/dlmast.c	2005-03-17 22:22:07 UTC (rev 1997)
@@ -195,6 +195,8 @@
 	if (!dlm_grab(dlm))
 		return DLM_REJECTED;
 
+	DLM_ASSERT(dlm_domain_fully_joined(dlm));
+
 	dlm_proxy_ast_to_host(past);
 	name = past->name;
 	locklen = past->namelen;

Modified: branches/usysfsify/fs/ocfs2/dlm/dlmconvert.c
===================================================================
--- branches/usysfsify/fs/ocfs2/dlm/dlmconvert.c	2005-03-17 21:41:37 UTC (rev 1996)
+++ branches/usysfsify/fs/ocfs2/dlm/dlmconvert.c	2005-03-17 22:22:07 UTC (rev 1997)
@@ -372,6 +372,8 @@
 	if (!dlm_grab(dlm))
 		return DLM_REJECTED;
 
+	DLM_ASSERT(dlm_domain_fully_joined(dlm));
+
 	dlm_convert_lock_to_host(cnv);
 
 	if (cnv->namelen > DLM_LOCKID_NAME_MAX) {

Modified: branches/usysfsify/fs/ocfs2/dlm/dlmlock.c
===================================================================
--- branches/usysfsify/fs/ocfs2/dlm/dlmlock.c	2005-03-17 21:41:37 UTC (rev 1996)
+++ branches/usysfsify/fs/ocfs2/dlm/dlmlock.c	2005-03-17 22:22:07 UTC (rev 1997)
@@ -267,6 +267,8 @@
 	if (!dlm_grab(dlm))
 		return DLM_REJECTED;
 
+	DLM_ASSERT(dlm_domain_fully_joined(dlm));
+
 	dlm_create_lock_to_host(create);
 	name = create->name;
 	namelen = create->namelen;

Modified: branches/usysfsify/fs/ocfs2/dlm/dlmmaster.c
===================================================================
--- branches/usysfsify/fs/ocfs2/dlm/dlmmaster.c	2005-03-17 21:41:37 UTC (rev 1996)
+++ branches/usysfsify/fs/ocfs2/dlm/dlmmaster.c	2005-03-17 22:22:07 UTC (rev 1997)
@@ -885,6 +885,11 @@
 	if (!dlm_grab(dlm))
 		return DLM_MASTER_RESP_NO;
 
+	if (!dlm_domain_fully_joined(dlm)) {
+		response = DLM_MASTER_RESP_NO;
+		goto send_response;
+	}
+
 	dlm_master_request_to_host(request);
 	name = request->name;
 	namelen = request->namelen;

Modified: branches/usysfsify/fs/ocfs2/dlm/dlmmod.c
===================================================================
--- branches/usysfsify/fs/ocfs2/dlm/dlmmod.c	2005-03-17 21:41:37 UTC (rev 1996)
+++ branches/usysfsify/fs/ocfs2/dlm/dlmmod.c	2005-03-17 22:22:07 UTC (rev 1997)
@@ -74,7 +74,10 @@
 static int dlm_query_join_handler(net_msg *msg, u32 len, void *data);
 static int dlm_assert_joined_handler(net_msg *msg, u32 len, void *data);
 static int dlm_cancel_join_handler(net_msg *msg, u32 len, void *data);
+static int dlm_exit_domain_handler(net_msg *msg, u32 len, void *data);
 
+static void __dlm_print_nodes(dlm_ctxt *dlm);
+
 LIST_HEAD(dlm_domains);
 spinlock_t dlm_domain_lock = SPIN_LOCK_UNLOCKED;
 DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events);
@@ -668,27 +671,20 @@
 	spin_unlock(&dlm_domain_lock);
 }
 
-static void dlm_leave_domain(dlm_ctxt *dlm)
+int dlm_domain_fully_joined(dlm_ctxt *dlm)
 {
+	int ret;
+
 	spin_lock(&dlm_domain_lock);
-
-	BUG_ON(dlm->dlm_state != DLM_CTXT_JOINED);
-	BUG_ON(!dlm->num_joins);
-
-	dlm->num_joins--;
-	if (dlm->num_joins) {
-		spin_unlock(&dlm_domain_lock);
-		return;
-	}
-
-	dlmprintk("shutting down domain %s\n", dlm->name);
-
-	dlm->dlm_state = DLM_CTXT_LEAVING;
+	ret = (dlm->dlm_state == DLM_CTXT_JOINED) ||
+		(dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN);
 	spin_unlock(&dlm_domain_lock);
 
-	/* TODO: Any network communication involving shutting this guy
-	 * down happens here. */
+	return ret;
+}
 
+static void dlm_complete_dlm_shutdown(dlm_ctxt *dlm)
+{
 	hb_unregister_callback(&dlm->dlm_hb_up);
 	hb_unregister_callback(&dlm->dlm_hb_down);
 
@@ -707,11 +703,177 @@
 	wake_up(&dlm_domain_events);
 }
 
+static void dlm_migrate_all_locks(dlm_ctxt *dlm)
+{
+	dlmprintk("Migrating locks from domain %s\n", dlm->name);
+	/* TODO: Migrate locks here. */
+}
+
+static int dlm_no_joining_node(dlm_ctxt *dlm)
+{
+	int ret;
+
+	spin_lock(&dlm->spinlock);
+	ret = dlm->joining_node == DLM_LOCK_RES_OWNER_UNKNOWN;
+	spin_unlock(&dlm->spinlock);
+
+	return ret;
+}
+
+static void dlm_mark_domain_leaving(dlm_ctxt *dlm)
+{
+	/* Yikes, a double spinlock! I need domain_lock for the dlm
+	 * state and the dlm spinlock for join state... Sorry! */
+again:
+	spin_lock(&dlm_domain_lock);
+	spin_lock(&dlm->spinlock);
+
+	if (dlm->joining_node != DLM_LOCK_RES_OWNER_UNKNOWN) {
+		dlmprintk("Node %d is joining, we wait on it.\n",
+			  dlm->joining_node);
+		spin_unlock(&dlm->spinlock);
+		spin_unlock(&dlm_domain_lock);
+
+		wait_event(dlm->dlm_join_events, dlm_no_joining_node(dlm));
+		goto again;
+	}
+
+	dlm->dlm_state = DLM_CTXT_LEAVING;
+	spin_unlock(&dlm->spinlock);
+	spin_unlock(&dlm_domain_lock);
+}
+
+static int dlm_exit_domain_handler(net_msg *msg, u32 len, void *data)
+{
+	dlm_ctxt *dlm = data;
+	unsigned int node;
+	dlm_exit_domain *exit_msg = (dlm_exit_domain *) msg->buf;
+
+	dlmprintk0("\n");
+
+	if (!dlm_grab(dlm))
+		return 0;
+
+	dlm_exit_domain_to_host(exit_msg);
+
+	node = exit_msg->node_idx;
+
+	dlmprintk("Node %u leaves domain %s\n", node, dlm->name);
+
+	spin_lock(&dlm->spinlock);
+	clear_bit(node, dlm->domain_map);
+	__dlm_print_nodes(dlm);
+	spin_unlock(&dlm->spinlock);
+
+	dlm_put(dlm);
+
+	return 0;
+}
+
+static int dlm_send_one_domain_exit(dlm_ctxt *dlm,
+				    unsigned int node)
+{
+	int status;
+	dlm_exit_domain leave_msg;
+
+	dlmprintk("Asking node %u if we can leave the domain %s me = %u\n",
+		  node, dlm->name, dlm->node_num);
+
+	memset(&leave_msg, 0, sizeof(leave_msg));
+	leave_msg.node_idx = dlm->node_num;
+
+	dlm_exit_domin_to_net(&leave_msg);
+
+	status = net_send_message(DLM_EXIT_DOMAIN_MSG, dlm->key,
+				  &leave_msg, sizeof(leave_msg), node,
+				  NULL);
+
+	dlmprintk("status return %d from net_send_message\n", status);
+
+	return status;
+}
+
+static void dlm_leave_domain(dlm_ctxt *dlm)
+{
+	int node, clear_node, status;
+
+	/* At this point we've migrated away all our locks and won't
+	 * accept mastership of new ones. The dlm is responsible for
+	 * almost nothing now. We make sure not to confuse any joining
+	 * nodes and then commence shutdown procedure. */
+
+	spin_lock(&dlm->spinlock);
+	/* Clear ourselves from the domain map */
+	clear_bit(dlm->node_num, dlm->domain_map);
+	while ((node = find_next_bit(dlm->domain_map, NM_MAX_NODES, 0)) 
+	       != -1) {
+		if (node > NM_MAX_NODES)
+			break;
+
+		/* Drop the dlm spinlock. This is safe wrt the domain_map.
+		 * -nodes cannot be added now as the
+                 *   query_join_handlers knows to respond with OK_NO_MAP
+		 * -we catch the right network errors if a node is
+                 *   removed from the map while we're sending him the
+                 *   exit message. */
+		spin_unlock(&dlm->spinlock);
+
+		clear_node = 1;
+
+		status = dlm_send_one_domain_exit(dlm, node);
+		if (status < 0 &&
+		    status != -ENOPROTOOPT &&
+		    status != -ENOTCONN) {
+			printk("dlm_leave_domain: Error %d sending "
+			       "domain exit message to node %d\n", status,
+			       node);
+
+			/* Not sure what to do here but lets sleep for
+			 * a bit in case this was a transient
+			 * error... */
+			schedule();
+			clear_node = 0;
+		}
+
+		spin_lock(&dlm->spinlock);
+		/* If we're not clearing the node bit then we intend
+		 * to loop back around to try again. */
+		if (clear_node)
+			clear_bit(node, dlm->domain_map);
+	}
+	spin_unlock(&dlm->spinlock);
+}
+
 void dlm_unregister_domain(dlm_ctxt *dlm)
 {
+	int leave = 0;
+
 	BUG_ON(!dlm);
 
-	dlm_leave_domain(dlm);
+	spin_lock(&dlm_domain_lock);
+	BUG_ON(dlm->dlm_state != DLM_CTXT_JOINED);
+	BUG_ON(!dlm->num_joins);
+
+	dlm->num_joins--;
+	if (!dlm->num_joins) {
+		/* We mark it "in shutdown" now so new register
+		 * requests wait until we've completely left the
+		 * domain. Don't use DLM_CTXT_LEAVING yet as we still
+		 * want new domain joins to communicate with us at
+		 * least until we've completed migration of our
+		 * resources. */
+		dlm->dlm_state = DLM_CTXT_IN_SHUTDOWN;
+		leave = 1;
+	}
+	spin_unlock(&dlm_domain_lock);
+
+	if (leave) {
+		dlmprintk("shutting down domain %s\n", dlm->name);
+		dlm_migrate_all_locks(dlm);
+		dlm_mark_domain_leaving(dlm);
+		dlm_leave_domain(dlm);
+		dlm_complete_dlm_shutdown(dlm);
+	}
 	dlm_put(dlm);
 }
 EXPORT_SYMBOL(dlm_unregister_domain);
@@ -749,7 +911,9 @@
 
 	spin_lock(&dlm_domain_lock);
 	dlm = __dlm_lookup_domain_full(query->domain, query->name_len);
-	if (dlm) {
+	/* Once the dlm ctxt is marked as leaving then we don't want
+	 * to be put in someone's domain map. */
+	if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) {
 		spin_lock(&dlm->spinlock);
 
 		if (dlm->dlm_state == DLM_CTXT_NEW &&
@@ -767,7 +931,7 @@
 			 * and indicate to him that needs to be fixed
 			 * up. */
 			response = JOIN_OK;
-			dlm->joining_node = query->node_idx;
+			__dlm_set_joining_node(dlm, query->node_idx);
 		}
 
 		spin_unlock(&dlm->spinlock);
@@ -801,7 +965,7 @@
 		 * leftover join state. */
 		BUG_ON(dlm->joining_node != assert->node_idx);
 		set_bit(assert->node_idx, dlm->domain_map);
-		dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN;
+		__dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN);
 
 		__dlm_print_nodes(dlm);
 
@@ -832,7 +996,7 @@
 		/* Yikes, this guy wants to cancel his join. No
 		 * problem, we simply cleanup our join state. */
 		BUG_ON(dlm->joining_node != cancel->node_idx);
-		dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN;
+		__dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN);
 
 		spin_unlock(&dlm->spinlock);
 	}
@@ -852,6 +1016,8 @@
 	cancel_msg.name_len = strlen(dlm->name);
 	strncpy(cancel_msg.domain, dlm->name, cancel_msg.name_len);
 
+	dlm_cancel_join_to_net(&cancel_msg);
+
 	status = net_send_message(DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY,
 				  &cancel_msg, sizeof(cancel_msg), node,
 				  NULL);
@@ -1058,7 +1224,7 @@
 	spin_lock(&dlm->spinlock);
 	memcpy(ctxt->live_map, dlm->live_nodes_map, sizeof(ctxt->live_map));
 
-	dlm->joining_node = dlm->node_num;
+	__dlm_set_joining_node(dlm, dlm->node_num);
 
 	spin_unlock(&dlm->spinlock);
 
@@ -1103,7 +1269,7 @@
 	dlm_send_join_asserts(dlm, ctxt->yes_resp_map);
 
 	spin_lock(&dlm->spinlock);
-	dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN;
+	__dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN);
 	__dlm_print_nodes(dlm);
 	spin_unlock(&dlm->spinlock);
 
@@ -1184,6 +1350,14 @@
 				      DLM_PROXY_AST_MAX_LEN,
 				      dlm_proxy_ast_handler,
 				      dlm);
+	if (status)
+		goto bail;
+
+	status = net_register_handler(DLM_EXIT_DOMAIN_MSG, dlm->key, 0,
+				      sizeof(dlm_exit_domain),
+				      dlm_exit_domain_handler,
+				      dlm);
+
 bail:
 	return status;
 }
@@ -1297,6 +1471,7 @@
 	init_rwsem(&dlm->recovery_sem);
 
 	dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN;
+	init_waitqueue_head(&dlm->dlm_join_events);
 
 	dlm->reco.new_master = NM_INVALID_SLOT_NUM;
 	dlm->reco.dead_node = NM_INVALID_SLOT_NUM;

Modified: branches/usysfsify/fs/ocfs2/dlm/dlmmod.h
===================================================================
--- branches/usysfsify/fs/ocfs2/dlm/dlmmod.h	2005-03-17 21:41:37 UTC (rev 1996)
+++ branches/usysfsify/fs/ocfs2/dlm/dlmmod.h	2005-03-17 22:22:07 UTC (rev 1997)
@@ -196,8 +196,9 @@
 
 typedef enum _dlm_ctxt_state {
 	DLM_CTXT_NEW = 0,
+	DLM_CTXT_JOINED,
+	DLM_CTXT_IN_SHUTDOWN,
 	DLM_CTXT_LEAVING,
-	DLM_CTXT_JOINED
 } dlm_ctxt_state;
 
 struct _dlm_ctxt
@@ -215,6 +216,7 @@
 	u8 node_num;
 	u32 key;
 	u8  joining_node;
+	wait_queue_head_t dlm_join_events;
 	unsigned long live_nodes_map[BITS_TO_LONGS(NM_MAX_NODES)];
 	unsigned long domain_map[BITS_TO_LONGS(NM_MAX_NODES)];
 	unsigned long recovery_map[BITS_TO_LONGS(NM_MAX_NODES)];
@@ -228,16 +230,26 @@
 	atomic_t remote_resources;
 	atomic_t unknown_resources;
 
-	/* dlm_refs and dlm_state are protected by dlm_domain_lock */
+	/* NOTE: Next three are protected by dlm_domain_lock */
 	struct kref dlm_refs;
 	dlm_ctxt_state dlm_state;
 	unsigned int num_joins;
+
 	struct hb_callback_func dlm_hb_up;
 	struct hb_callback_func dlm_hb_down;
 	struct task_struct *dlm_thread_task;
 	wait_queue_head_t dlm_thread_wq;
 };
 
+static inline void __dlm_set_joining_node(struct _dlm_ctxt *dlm,
+					  u8 node)
+{
+	assert_spin_locked(&dlm->spinlock);
+
+	dlm->joining_node = node;
+	wake_up(&dlm->dlm_join_events);
+}
+
 #define DLM_LOCK_RES_UNINITED             0x00000001
 #define DLM_LOCK_RES_RECOVERING           0x00000002
 #define DLM_LOCK_RES_READY                0x00000004
@@ -382,6 +394,7 @@
 #define DLM_QUERY_JOIN_MSG		510
 #define DLM_ASSERT_JOINED_MSG		511
 #define DLM_CANCEL_JOIN_MSG		512
+#define DLM_EXIT_DOMAIN_MSG		513
 
 typedef struct _dlm_reco_node_data
 {
@@ -534,6 +547,12 @@
 	u8 domain[NM_MAX_NAME_LEN];
 } dlm_cancel_join;
 
+typedef struct _dlm_exit_domain
+{
+	u8 node_idx;
+	u8 pad1[3];
+} dlm_exit_domain;
+
 static inline void dlm_query_join_request_to_net(dlm_query_join_request *m)
 {
 	/* do nothing */
@@ -558,6 +577,14 @@
 {
 	/* do nothing */
 }
+static inline void dlm_exit_domin_to_net(dlm_exit_domain *m)
+{
+	/* do nothing */
+}
+static inline void dlm_exit_domain_to_host(dlm_exit_domain *m)
+{
+	/* do nothing */
+}
 static inline void dlm_master_request_to_net(dlm_master_request *m)
 {
 	/* do nothing */
@@ -695,6 +722,7 @@
 void dlm_get(dlm_ctxt *dlm);
 void dlm_put(dlm_ctxt *dlm);
 dlm_ctxt *dlm_grab(dlm_ctxt *dlm);
+int dlm_domain_fully_joined(dlm_ctxt *dlm);
 
 void dlm_lockres_calc_usage(dlm_ctxt *dlm,
 			    dlm_lock_resource *res);

Modified: branches/usysfsify/fs/ocfs2/dlm/dlmrecovery.c
===================================================================
--- branches/usysfsify/fs/ocfs2/dlm/dlmrecovery.c	2005-03-17 21:41:37 UTC (rev 1996)
+++ branches/usysfsify/fs/ocfs2/dlm/dlmrecovery.c	2005-03-17 22:22:07 UTC (rev 1997)
@@ -129,7 +129,7 @@
 	/* Clean up join state on node death. */
 	if (dlm->joining_node == idx) {
 		dlmprintk("Clearing join state for node %u\n", idx);
-		dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN;
+		__dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN);
 	}
 
 	/* notify any mles attached to the heartbeat events */

Modified: branches/usysfsify/fs/ocfs2/dlm/dlmunlock.c
===================================================================
--- branches/usysfsify/fs/ocfs2/dlm/dlmunlock.c	2005-03-17 21:41:37 UTC (rev 1996)
+++ branches/usysfsify/fs/ocfs2/dlm/dlmunlock.c	2005-03-17 22:22:07 UTC (rev 1997)
@@ -291,6 +291,8 @@
 	if (!dlm_grab(dlm))
 		return DLM_REJECTED;
 
+	DLM_ASSERT(dlm_domain_fully_joined(dlm));
+
 	dlmprintk("lvb: %s\n", flags & LKM_PUT_LVB ? "put lvb" : "none");
 
 	status = DLM_IVLOCKID;



More information about the Ocfs2-commits mailing list