[Ocfs2-commits] zab commits r2046 - trunk/fs/ocfs2

Thu Mar 24 16:00:18 CST 2005

Author: zab
Signed-off-by: mfasheh
Date: 2005-03-24 16:00:16 -0600 (Thu, 24 Mar 2005)
New Revision: 2046

Modified:
   trunk/fs/ocfs2/aio.c
   trunk/fs/ocfs2/aio.h
   trunk/fs/ocfs2/ocfs.h
   trunk/fs/ocfs2/super.c
Log:
o have unmount sync with kiocb private destruction so we don't potentially
  have bits hanging around after unmount.

Signed-off-by: mfasheh


Modified: trunk/fs/ocfs2/aio.c
===================================================================

--- trunk/fs/ocfs2/aio.c	2005-03-24 20:11:10 UTC (rev 2045)
+++ trunk/fs/ocfs2/aio.c	2005-03-24 22:00:16 UTC (rev 2046)
@@ -50,6 +50,8 @@
 
 struct ocfs2_kiocb_private {
 	struct list_head	kp_teardown_item;
+	ocfs_super		*kp_osb;
+	struct list_head	kp_pending_item;
 	unsigned		kp_have_alloc_sem:1,
 				kp_have_write_locks:1;
 	struct inode		*kp_inode;
@@ -57,11 +59,6 @@
 	struct ocfs2_write_lock_info kp_info;
 };
 
-static void okp_teardown_from_list(void *unused);
-static DECLARE_WORK(okp_teardown_work, okp_teardown_from_list, NULL);
-static LIST_HEAD(okp_teardown_list);
-static spinlock_t okp_teardown_lock = SPIN_LOCK_UNLOCKED;
-
 static struct ocfs2_kiocb_private *okp_from_iocb(struct kiocb *iocb)
 {
 	struct ocfs2_kiocb_private *okp;
@@ -103,16 +100,17 @@
 	kfree(okp);
 }
 
-static void okp_teardown_from_list(void *unused)
+void okp_teardown_from_list(void *data)
 {
+	ocfs_super *osb = data;
 	unsigned long flags;
 	struct list_head *pos, *tmp;
 	struct ocfs2_kiocb_private *okp;
 	LIST_HEAD(my_list);
 
-	spin_lock_irqsave(&okp_teardown_lock, flags);
-	list_splice_init(&okp_teardown_list, &my_list);
-	spin_unlock_irqrestore(&okp_teardown_lock, flags);
+	spin_lock_irqsave(&osb->osb_okp_teardown_lock, flags);
+	list_splice_init(&osb->osb_okp_teardown_list, &my_list);
+	spin_unlock_irqrestore(&osb->osb_okp_teardown_lock, flags);
 
 	list_for_each_safe(pos, tmp, &my_list) {
 		okp = list_entry(pos, struct ocfs2_kiocb_private,
@@ -123,18 +121,34 @@
 	}
 }
 
+/*
+ * This releases the dlm locks we held across an aio operation.
+ *
+ * While aio operations are in flight they have a vfsmnt reference for the
+ * file which prevents unmount.  This dtor gets called *after* that
+ * ref is dropped, however, so we have to make sure to account for 
+ * pending work we have here in the unmount path.  The race starts when
+ * aio does its fputs, before it calls dtor which queues work, so just
+ * synchronizing with the work queue could miss that first phase.  So unmount
+ * first waits for the list to go empty.  Then it has to wait for keventd
+ * to finish the work, so it flushes the work queue which only proceeds
+ * after keventd comes out of the func and updates some sequence numbers.
+ */
 static void ocfs2_ki_dtor(struct kiocb *iocb)
 {
 	struct ocfs2_kiocb_private *okp;
+	ocfs_super *osb;
 	unsigned long flags;
 
 	okp = okp_from_iocb(iocb);
+	osb = okp->kp_osb;
 
 	/* okp_alloc only assigns the iocb->private and ->ki_dtor pointers if
 	 * it was able to alloc the okp and get an inode reference */
 	BUG_ON(okp == NULL);
 	BUG_ON(okp->kp_inode == NULL);
 	BUG_ON(!list_empty(&okp->kp_teardown_item));
+	BUG_ON(list_empty(&okp->kp_pending_item));
 
 	/* we had better not try to work with this iocb again */
 	okp_to_iocb(iocb, NULL);
@@ -144,18 +158,53 @@
 		 * there is very little in the teardown that is interrupt-safe,
 		 * push it to keventd
 		 */
-		spin_lock_irqsave(&okp_teardown_lock, flags);
-		list_add_tail(&okp->kp_teardown_item, &okp_teardown_list);
-		schedule_work(&okp_teardown_work);
-		spin_unlock_irqrestore(&okp_teardown_lock, flags);
+		spin_lock_irqsave(&osb->osb_okp_teardown_lock, flags);
+		list_add_tail(&okp->kp_teardown_item,
+			      &osb->osb_okp_teardown_list);
+		list_del_init(&okp->kp_pending_item);
+		if (list_empty(&osb->osb_okp_pending_list))
+			wake_up(&osb->osb_okp_pending_wq);
+		list_del_init(&okp->kp_pending_item);
+		schedule_work(&osb->osb_okp_teardown_work);
+		spin_unlock_irqrestore(&osb->osb_okp_teardown_lock, flags);
 	} else
 		okp_teardown(okp);
 }
 
+static int okp_pending_empty(ocfs_super *osb)
+{
+	unsigned long flags;
+	int empty;
+
+	spin_lock_irqsave(&osb->osb_okp_teardown_lock, flags);
+	empty = list_empty(&osb->osb_okp_pending_list);
+	spin_unlock_irqrestore(&osb->osb_okp_teardown_lock, flags);
+
+	return empty;
+}
+
+/* see ocfs2_ki_dtor */
+void ocfs2_wait_for_okp_destruction(ocfs_super *osb)
+{
+	/* first wait for okps to enter the work queue */
+	wait_event(osb->osb_okp_pending_wq, okp_pending_empty(osb));
+	/* 
+	 * then wait for keventd to finish with all its work, including ours.
+	 * 
+	 * XXX this makes me very nervous.  what if our work blocks keventd
+	 * during an unlock and the unlock can only proceed if keventd
+	 * can get to some more work that the dlm might have queued?  
+	 * do we push any dlm work to keventd?
+	 */
+	flush_scheduled_work();
+}
+
 static struct ocfs2_kiocb_private *okp_alloc(struct kiocb *iocb)
 {
 	struct inode *inode = iocb->ki_filp->f_dentry->d_inode;
 	struct ocfs2_kiocb_private *okp;
+	unsigned long flags;
+	ocfs_super *osb;
 
 	okp = kcalloc(1, sizeof(*okp), GFP_KERNEL);
 	if (okp == NULL) {
@@ -171,6 +220,9 @@
 		okp = ERR_PTR(-EINVAL);
 		goto out;
 	}
+	/* unmount syncs with work using this ref before destroying the osb */
+	osb = OCFS2_SB(inode->i_sb);
+	okp->kp_osb = osb;
 
 	okp_to_iocb(iocb, okp);
 #ifndef KIOCB_DTOR_IN_PRIVATE
@@ -179,7 +231,11 @@
 	iocb->ki_dip.ki_dtor = ocfs2_ki_dtor;
 #endif
 	INIT_BUFFER_LOCK_CTXT(&okp->kp_ctxt);
+
 	INIT_LIST_HEAD(&okp->kp_teardown_item);
+	spin_lock_irqsave(&osb->osb_okp_teardown_lock, flags);
+	list_add_tail(&okp->kp_pending_item, &osb->osb_okp_pending_list);
+	spin_unlock_irqrestore(&okp->kp_osb->osb_okp_teardown_lock, flags);
 out:
 	return okp;
 }

Modified: trunk/fs/ocfs2/aio.h
===================================================================
--- trunk/fs/ocfs2/aio.h	2005-03-24 20:11:10 UTC (rev 2045)
+++ trunk/fs/ocfs2/aio.h	2005-03-24 22:00:16 UTC (rev 2046)
@@ -33,4 +33,7 @@
 			    loff_t pos);
 #endif
 
+void okp_teardown_from_list(void *data);
+void ocfs2_wait_for_okp_destruction(ocfs_super *osb);
+
 #endif /* OCFS2_AIO_H */

Modified: trunk/fs/ocfs2/ocfs.h
===================================================================
--- trunk/fs/ocfs2/ocfs.h	2005-03-24 20:11:10 UTC (rev 2045)
+++ trunk/fs/ocfs2/ocfs.h	2005-03-24 22:00:16 UTC (rev 2046)
@@ -398,6 +398,14 @@
 	struct hb_callback_func	osb_hb_down;
 
 	struct list_head	osb_net_handlers;
+
+	/* see ocfs2_ki_dtor.  _lock is grabbed in interrupt context in
+	 * _dtor, maybe, so be sure to use _irqsave */
+	spinlock_t			osb_okp_teardown_lock;
+	struct work_struct		osb_okp_teardown_work;
+	struct list_head		osb_okp_teardown_list;
+	struct list_head		osb_okp_pending_list;
+	wait_queue_head_t		osb_okp_pending_wq;
 };
 
 typedef struct _ocfs_global_ctxt

Modified: trunk/fs/ocfs2/super.c
===================================================================
--- trunk/fs/ocfs2/super.c	2005-03-24 20:11:10 UTC (rev 2045)
+++ trunk/fs/ocfs2/super.c	2005-03-24 22:00:16 UTC (rev 2046)
@@ -47,6 +47,7 @@
 #include "ocfs_log.h"
 #include "ocfs.h"
 #include "ocfs2.h"
+#include "aio.h"
 
 /* this should be the only file to include a version 1 header */
 #include "ocfs1_fs_compat.h"
@@ -984,6 +985,8 @@
 	if (tmp < 0)
 		LOG_ERROR_STATUS(tmp);
 
+	ocfs2_wait_for_okp_destruction(osb);
+
 	ocfs2_put_slot(osb);
 
 	ocfs_release_system_inodes(osb);
@@ -1103,6 +1106,13 @@
 	spin_lock_init(&osb->s_next_gen_lock);
 	get_random_bytes(&osb->s_next_generation, sizeof(u32));
 
+	spin_lock_init(&osb->osb_okp_teardown_lock);
+	INIT_LIST_HEAD(&osb->osb_okp_teardown_list);
+	INIT_LIST_HEAD(&osb->osb_okp_pending_list);
+	init_waitqueue_head(&osb->osb_okp_pending_wq);
+	/* we sync with this work queue (and sb ref) on unmount */
+	INIT_WORK(&osb->osb_okp_teardown_work, okp_teardown_from_list, osb);
+
 	/* FIXME
 	 * This should be done in ocfs_journal_init(), but unknown
 	 * ordering issues will cause the filesystem to crash.