[Ocfs2-commits] zab commits r2359 - trunk/fs/ocfs2

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Fri Jun 3 13:29:13 CDT 2005


Author: zab
Signed-off-by: mfasheh
Date: 2005-06-03 13:29:11 -0500 (Fri, 03 Jun 2005)
New Revision: 2359

Modified:
   trunk/fs/ocfs2/aio.c
   trunk/fs/ocfs2/ocfs2.h
   trunk/fs/ocfs2/super.c
Log:
o avoid masking irq spinlocks when managing iocb->private by moving from
  list_heads under a lock to atomic_t and some xchg-driven singly linked lists

Signed-off-by: mfasheh


Modified: trunk/fs/ocfs2/aio.c
===================================================================
--- trunk/fs/ocfs2/aio.c	2005-06-03 05:24:44 UTC (rev 2358)
+++ trunk/fs/ocfs2/aio.c	2005-06-03 18:29:11 UTC (rev 2359)
@@ -48,9 +48,8 @@
 
 
 struct ocfs2_kiocb_private {
-	struct list_head	kp_teardown_item;
+	struct ocfs2_kiocb_private *kp_teardown_next;
 	ocfs2_super		*kp_osb;
-	struct list_head	kp_pending_item;
 	unsigned		kp_have_alloc_sem:1,
 				kp_have_write_locks:1;
 	struct inode		*kp_inode;
@@ -63,8 +62,6 @@
 	mlog(0, "okp %p\n", okp);
 
 	BUG_ON(okp->kp_inode == NULL);
-	BUG_ON(!list_empty(&okp->kp_teardown_item));
-	BUG_ON(!list_empty(&okp->kp_pending_item));
 
 	if (okp->kp_info.wl_unlock_ctxt)
 		ocfs2_unlock_buffer_inodes(&okp->kp_ctxt);
@@ -78,41 +75,42 @@
 void okp_teardown_from_list(void *data)
 {
 	ocfs2_super *osb = data;
-	unsigned long flags;
-	struct list_head *pos, *tmp;
-	struct ocfs2_kiocb_private *okp;
-	LIST_HEAD(my_list);
+	struct ocfs2_kiocb_private *okp, *next;
 
-	spin_lock_irqsave(&osb->osb_okp_teardown_lock, flags);
-	list_splice_init(&osb->osb_okp_teardown_list, &my_list);
-	spin_unlock_irqrestore(&osb->osb_okp_teardown_lock, flags);
+	for (okp = xchg(&osb->osb_okp_teardown_next, NULL); okp != NULL;
+	     okp = next) {
 
-	list_for_each_safe(pos, tmp, &my_list) {
-		okp = list_entry(pos, struct ocfs2_kiocb_private,
-				 kp_teardown_item);
-		list_del_init(&okp->kp_teardown_item);
-
+		next = okp->kp_teardown_next;
 		okp_teardown(okp);
 	}
 }
 
 /*
- * This releases the dlm locks we held across an aio operation.
+ * This releases the dlm locks we held across an aio operation and frees the
+ * space we were tracking them in.
  *
- * While aio operations are in flight they have a vfsmnt reference for the
- * file which prevents unmount.  This dtor gets called *after* that
- * ref is dropped, however, so we have to make sure to account for 
- * pending work we have here in the unmount path.  The race starts when
- * aio does its fputs, before it calls dtor which queues work, so just
- * synchronizing with the work queue could miss that first phase.  So unmount
- * first waits for the list to go empty.  Then it has to wait for keventd
- * to finish the work, so it flushes the work queue which only proceeds
- * after keventd comes out of the func and updates some sequence numbers.
+ * While aio operations are in flight they have a vfsmnt reference for the file
+ * which prevents unmount.  This dtor gets called *after* that ref is dropped,
+ * however, so we have to make sure to account for pending work we have here in
+ * the unmount path.  The race starts when aio does its fputs, before it calls
+ * dtor which queues work, so just synchronizing with the work queue could miss
+ * that first phase.  So unmount first waits for the pending count to drop.
+ * Then it has to wait for keventd to finish the work freeing the okps.
+ *
+ * _dtor can be called from just about any context and lock teardown is
+ * anything but interrupt safe.  We used to hand the okps to
+ * okp_teardown_from_list with a normal list_head and irq masking lock but we
+ * want to avoid masking interrupts so it was shifted to the {cmp,}xchg() and
+ * atomic_t.
+ *
+ * Adding to the singly linked ->next list is only a little tricky.  We have to
+ * watch for races between sampling the head to assign ->next in the inserting
+ * okp and a new head being written before we point the head to the inserting
+ * okp.
  */
 static void ocfs2_ki_dtor(struct kiocb *iocb)
 {
-	struct ocfs2_kiocb_private *okp = iocb->private;
-	unsigned long flags;
+	struct ocfs2_kiocb_private *next, *okp = iocb->private;
 	ocfs2_super *osb = okp->kp_osb;
 
 	mlog(0, "iocb %p okp %p\n", iocb, okp);
@@ -121,46 +119,29 @@
 	 * it was able to alloc the okp and get an inode reference */
 	BUG_ON(okp == NULL);
 	BUG_ON(okp->kp_inode == NULL);
-	BUG_ON(!list_empty(&okp->kp_teardown_item));
-	BUG_ON(list_empty(&okp->kp_pending_item));
 
 	/* we had better not try to work with this iocb again */
 	iocb->private = NULL;
 
-	spin_lock_irqsave(&osb->osb_okp_teardown_lock, flags);
+	 /* once this cmpxchg succeeds the okp can be freed so we have to be
+	  * careful not to deref it when testing success */
+	do {
+		next = osb->osb_okp_teardown_next;
+		okp->kp_teardown_next = next;
+	} while (cmpxchg(&osb->osb_okp_teardown_next, next, okp) != next);
 
-	list_del_init(&okp->kp_pending_item);
-	if (list_empty(&osb->osb_okp_pending_list))
-		wake_up(&osb->osb_okp_pending_wq);
-
-	/* 
-	 * there is very little in the teardown that is interrupt-safe and we
-	 * want to be able to sync with its completion so we unconditionally
-	 * push it to keventd.
-	 */
-	list_add_tail(&okp->kp_teardown_item, &osb->osb_okp_teardown_list);
 	schedule_work(&osb->osb_okp_teardown_work);
 
-	spin_unlock_irqrestore(&osb->osb_okp_teardown_lock, flags);
+	if (atomic_dec_and_test(&osb->osb_okp_pending))
+		wake_up(&osb->osb_okp_pending_wq);
 }
 
-static int okp_pending_empty(ocfs2_super *osb)
-{
-	unsigned long flags;
-	int empty;
-
-	spin_lock_irqsave(&osb->osb_okp_teardown_lock, flags);
-	empty = list_empty(&osb->osb_okp_pending_list);
-	spin_unlock_irqrestore(&osb->osb_okp_teardown_lock, flags);
-
-	return empty;
-}
-
-/* see ocfs2_ki_dtor */
+/* see ocfs2_ki_dtor() */
 void ocfs2_wait_for_okp_destruction(ocfs2_super *osb)
 {
 	/* first wait for okps to enter the work queue */
-	wait_event(osb->osb_okp_pending_wq, okp_pending_empty(osb));
+	wait_event(osb->osb_okp_pending_wq,
+		   atomic_read(&osb->osb_okp_pending) == 0);
 	/* 
 	 * then wait for keventd to finish with all its work, including ours.
 	 * 
@@ -185,7 +166,6 @@
 {
 	struct inode *inode = iocb->ki_filp->f_dentry->d_inode;
 	struct ocfs2_kiocb_private *okp;
-	unsigned long flags;
 	ocfs2_super *osb;
 
 	okp = kcalloc(1, sizeof(*okp), GFP_KERNEL);
@@ -211,10 +191,7 @@
 	iocb->ki_cancel = ocfs2_ki_cancel;
 	INIT_BUFFER_LOCK_CTXT(&okp->kp_ctxt);
 
-	INIT_LIST_HEAD(&okp->kp_teardown_item);
-	spin_lock_irqsave(&osb->osb_okp_teardown_lock, flags);
-	list_add_tail(&okp->kp_pending_item, &osb->osb_okp_pending_list);
-	spin_unlock_irqrestore(&okp->kp_osb->osb_okp_teardown_lock, flags);
+	atomic_inc(&osb->osb_okp_pending);
 out:
 	mlog(0, "iocb %p returning %p\n", iocb, okp);
 	return okp;

Modified: trunk/fs/ocfs2/ocfs2.h
===================================================================
--- trunk/fs/ocfs2/ocfs2.h	2005-06-03 05:24:44 UTC (rev 2358)
+++ trunk/fs/ocfs2/ocfs2.h	2005-06-03 18:29:11 UTC (rev 2359)
@@ -256,12 +256,10 @@
 
 	struct list_head	osb_net_handlers;
 
-	/* see ocfs2_ki_dtor.  _lock is grabbed in interrupt context in
-	 * _dtor, maybe, so be sure to use _irqsave */
-	spinlock_t			osb_okp_teardown_lock;
+	/* see ocfs2_ki_dtor() */
 	struct work_struct		osb_okp_teardown_work;
-	struct list_head		osb_okp_teardown_list;
-	struct list_head		osb_okp_pending_list;
+	struct ocfs2_kiocb_private	*osb_okp_teardown_next;
+	atomic_t			osb_okp_pending;
 	wait_queue_head_t		osb_okp_pending_wq;
 
 	wait_queue_head_t		osb_mount_event;

Modified: trunk/fs/ocfs2/super.c
===================================================================
--- trunk/fs/ocfs2/super.c	2005-06-03 05:24:44 UTC (rev 2358)
+++ trunk/fs/ocfs2/super.c	2005-06-03 18:29:11 UTC (rev 2359)
@@ -1065,9 +1065,8 @@
 	INIT_LIST_HEAD(&osb->vote_list);
 	spin_lock_init(&osb->s_next_gen_lock);
 
-	spin_lock_init(&osb->osb_okp_teardown_lock);
-	INIT_LIST_HEAD(&osb->osb_okp_teardown_list);
-	INIT_LIST_HEAD(&osb->osb_okp_pending_list);
+	osb->osb_okp_teardown_next = NULL;
+	atomic_set(&osb->osb_okp_pending, 0);
 	init_waitqueue_head(&osb->osb_okp_pending_wq);
 	/* we sync with this work queue (and sb ref) on unmount */
 	INIT_WORK(&osb->osb_okp_teardown_work, okp_teardown_from_list, osb);



More information about the Ocfs2-commits mailing list