[Ocfs2-commits] mfasheh commits r1605 - branches/dlm-glue/src

Fri Oct 29 17:05:24 CDT 2004

Author: mfasheh
Date: 2004-10-29 17:05:22 -0500 (Fri, 29 Oct 2004)
New Revision: 1605

Modified:
   branches/dlm-glue/src/dlmglue.c
   branches/dlm-glue/src/inode.c
   branches/dlm-glue/src/journal.c
   branches/dlm-glue/src/ocfs.h
   branches/dlm-glue/src/ocfs_journal.h
   branches/dlm-glue/src/proc.c
   branches/dlm-glue/src/super.c
Log:
* get rid of the unused 'needs_flush' flag
  
* implement transaction ids on locks. commit thread no longer has to run a  
  long list of locks anymore as last transaction is recorded on them in
  ocfs_commit_trans.



Modified: branches/dlm-glue/src/dlmglue.c
===================================================================

--- branches/dlm-glue/src/dlmglue.c	2004-10-29 01:33:20 UTC (rev 1604)
+++ branches/dlm-glue/src/dlmglue.c	2004-10-29 22:05:22 UTC (rev 1605)
@@ -971,6 +971,11 @@
 
 	LOG_ENTRY();
 
+#ifdef VERBOSE_LOCKING_TRACE
+	printk("Taking %s lock on inode %llu.\n",
+	       ex ? "EXMODE" : "PRMODE", OCFS_I(inode)->ip_blkno);
+#endif
+
 	/* we skip recovery wait on journal inodes as those can be
 	 * locked from ocfs_recover_node. */
 	if (!INODE_JOURNAL(inode)) {
@@ -1257,7 +1262,9 @@
 	OCFS_ASSERT(lock->l_level == LKM_EXMODE || 
 		    lock->l_level == LKM_PRMODE);
 
-	if (!lock->l_ro_holders && !lock->l_ex_holders) {
+	if (!lock->l_ro_holders
+	    && !lock->l_ex_holders
+	    && ocfs_inode_fully_checkpointed(inode)) {
 		new_level = ocfs2_highest_compat_lock_level(lock->l_blocking);
 		if (lock->l_level == LKM_EXMODE) {
 			__ocfs2_stuff_meta_lvb(inode);
@@ -1267,7 +1274,7 @@
 		return __ocfs2_downconvert_lock(lockres, lock, new_level,
 						set_lvb);
 	}
-	if (lock->l_ex_holders)
+	if (!ocfs_inode_fully_checkpointed(inode))
 		ocfs_start_checkpoint(OCFS2_SB(inode->i_sb));
 
 	*requeue++;

Modified: branches/dlm-glue/src/inode.c
===================================================================
--- branches/dlm-glue/src/inode.c	2004-10-29 01:33:20 UTC (rev 1604)
+++ branches/dlm-glue/src/inode.c	2004-10-29 22:05:22 UTC (rev 1605)
@@ -314,8 +314,6 @@
 
 	init_rwsem(&i->ip_alloc_sem);
 	init_MUTEX(&(i->ip_io_sem));
-	INIT_LIST_HEAD(&i->ip_pending_locks);
-	INIT_LIST_HEAD(&i->ip_j_inode);
 
 	/* These should be set in read_inode2. */
 	i->ip_clusters = 0;

Modified: branches/dlm-glue/src/journal.c
===================================================================
--- branches/dlm-glue/src/journal.c	2004-10-29 01:33:20 UTC (rev 1604)
+++ branches/dlm-glue/src/journal.c	2004-10-29 22:05:22 UTC (rev 1605)
@@ -58,42 +58,34 @@
 static int __ocfs_recovery_thread(void *arg);
 static int ocfs_commit_cache (ocfs_super * osb);
 static int ocfs_wait_on_mount(ocfs_super *osb);
-static void ocfs_handle_move_locks(ocfs_journal *journal, 
-				   ocfs_journal_handle *handle);
+static void ocfs_handle_cleanup_locks(ocfs_journal *journal, 
+				      ocfs_journal_handle *handle,
+				      int set_id);
 static void ocfs_commit_unstarted_handle(ocfs_journal_handle *handle);
 static int ocfs_journal_toggle_dirty(ocfs_super *osb,
 				     int dirty);
 
 /* 
  * ocfs_commit_cache()
- *
- * This is in journal.c for lack of a better place.
- *
  */
 static int ocfs_commit_cache(ocfs_super *osb)
 {
-	int status = 0, tmpstat;
-	unsigned int flushed  = 0;
-	unsigned int cmt_locks;
+	int status = 0;
+	unsigned int flushed;
+	unsigned long old_id;
 	ocfs_journal * journal = NULL;
-	struct list_head *p, *n;
-	ocfs_journal_lock *lock = NULL;
-	struct inode *inode;
-	ocfs_inode_private *ip;
 
 	LOG_ENTRY();
 
 	journal = osb->journal;
 
-	/* Step 1: flush all pending commits and checkpoint the journal. */
+	/* Flush all pending commits and checkpoint the journal. */
 	down_write(&journal->trans_barrier);
 
 	if (atomic_read(&journal->num_trans) == 0) {
+		up_write(&journal->trans_barrier);
 		LOG_TRACE_STR("No transactions for me to flush!");
-		/* now, we may have locks left to drop even though no
-		 * transactions are in the journal. */
-
-		goto drop_locks;
+		goto finally;
 	}
 
 	journal_lock_updates(journal->k_journal);
@@ -105,91 +97,18 @@
 		goto finally;
 	}
 
-	ocfs_inc_trans_id(journal);
+	old_id = ocfs_inc_trans_id(journal);
 
-drop_locks:
 	flushed = atomic_read(&journal->num_trans);
 	atomic_set(&journal->num_trans, 0);
-
-	/* Step 2: Drop any locks acquired during transactions which
-	 * have just been checkpointed.  */
-	spin_lock(&journal->cmt_lock);
-
-	cmt_locks = atomic_read(&journal->num_cmt_locks);
-
-	atomic_add(atomic_read(&journal->num_cmt_locks),
-		   &journal->num_chkpt_locks);
-	atomic_set(&journal->num_cmt_locks, 0);
-
-	/* move the locks off each inode onto the commit threads list. */
-	list_for_each_safe(p, n, &journal->committing_inodes) {
-		ip = list_entry(p, ocfs_inode_private, ip_j_inode);
-		inode = ip->ip_inode;
-
-		if (!list_empty(&OCFS_I(inode)->ip_pending_locks))
-			list_splice_init(&OCFS_I(inode)->ip_pending_locks,
-					(&journal->checkpointing_locks)->prev);
-
-		/* we can now remove the inode from the committing
-		 * list. */
-		list_del_init(&OCFS_I(inode)->ip_j_inode);
-	}
-	osb->needs_flush = 0;
-
-	spin_unlock(&journal->cmt_lock);
-
-	/* TODO: Can we assert this anymore and move these lock
-	 * releases back up?  
-	 *
-	 * Once we've got cmt_lock, we can let
-	 * transactions start again -- it should protect us against
-	 * people mucking with the committed list... */
 	up_write(&journal->trans_barrier);
 
 #ifdef VERBOSE_COMMIT_THREAD
-	if (flushed || cmt_locks)
-		printk("(%u) commit_thread: flushed %u transactions, "
-		       "releasing %u locks\n", current->pid, flushed, 
-		       cmt_locks);
+	printk("(%u) commit_thread: flushed transaction %lu (%u handles)\n",
+	       current->pid, journal->trans_id, flushed);
 #endif
 
-#ifdef VERBOSE_COMMIT_THREAD
-	if (flushed || cmt_locks)
-		printk("(%u) commit_thread: after optimization, %u locks "
-		       "to release\n", current->pid, 
-		       atomic_read(&journal->num_chkpt_locks));
-#endif
-
-	p = n = NULL;
-	list_for_each_safe(p, n, &journal->checkpointing_locks) {
-		if (!atomic_read(&journal->num_chkpt_locks))
-			BUG();
-
-		lock = list_entry(p, ocfs_journal_lock, jl_lock_list);
-
-		list_del(&(lock->jl_lock_list));
-
-		if (!lock->jl_inode)
-			BUG();
-
-#if 0
-		/* enable this for tons of output, which will likely
-		 * hang your box :) */
-		printk("commit_thread: release lock %u (inode %llu)\n", 
-		       atomic_read(&journal->num_chkpt_locks),
-		       OCFS_I(lock->jl_inode)->ip_blkno);
-#endif
-		tmpstat = 0;
-		OCFS_ASSERT(!INODE_DELETED(lock->jl_inode));
-		ocfs2_meta_unlock(lock->jl_inode, 1);
-		iput(lock->jl_inode);
-
-		atomic_dec(&journal->num_chkpt_locks);
-		kmem_cache_free(OcfsGlobalCtxt.lock_cache, lock);
-	}
-
 	ocfs2_kick_vote_thread(osb);
-
 finally:
 	LOG_EXIT_STATUS (status);
 	return status;
@@ -435,10 +354,7 @@
 	/* You are allowed to add journal locks before the transaction
 	 * has started. */
 	osb = handle->osb;
-	ocfs_handle_move_locks(osb->journal, handle);
-	spin_lock(&osb->journal->cmt_lock);
-	osb->needs_flush = 1;
-	spin_unlock(&osb->journal->cmt_lock);
+	ocfs_handle_cleanup_locks(osb->journal, handle, 0);
 
 	kfree(handle);
 	LOG_EXIT();
@@ -488,10 +404,7 @@
 		BUG();
 	}
 
-	ocfs_handle_move_locks(osb->journal, handle);
-	spin_lock(&osb->journal->cmt_lock);
-	osb->needs_flush = 1;
-	spin_unlock(&osb->journal->cmt_lock);
+	ocfs_handle_cleanup_locks(osb->journal, handle, 1);
 
 	up_read(&journal->trans_barrier);
 
@@ -654,9 +567,6 @@
 
 	list_add_tail(&(lock->jl_lock_list), &(handle->locks));
 	handle->num_locks++;
-	spin_lock(&handle->journal->cmt_lock);
-	atomic_inc(&handle->journal->num_cmt_locks);
-	spin_unlock(&handle->journal->cmt_lock);
 
 	status = 0;
 bail:
@@ -664,19 +574,9 @@
 	return status;
 }
 
-/* move the locks off a journal handle and onto the
- * inode->ip_pending_locks. makes sure the inodes are on
- * journal->committing_inodes so that the commit thread can get them
- * after checkpoint.
- *
- * You want to do this before dropping trans_barrier to prevent the commit
- * thread from missing the locks. 
- * 
- * TODO: When we get rid of the last checkpointed transactions, we can
- * just put locks right on the inode in ocfs_handle_add_lock...
- */
-static void ocfs_handle_move_locks(ocfs_journal *journal, 
-				   ocfs_journal_handle *handle)
+static void ocfs_handle_cleanup_locks(ocfs_journal *journal, 
+				      ocfs_journal_handle *handle,
+				      int set_id)
 {
 	struct list_head *p, *n;
 	ocfs_journal_lock *lock;
@@ -688,18 +588,15 @@
 		handle->num_locks--;
 
 		inode = lock->jl_inode;
-
-		spin_lock(&journal->cmt_lock);
-		/* add the lock to the inode */
-		list_add_tail(&lock->jl_lock_list, 
-			      &OCFS_I(inode)->ip_pending_locks);
-		/* and make sure the inode is on the journals list */
-		if (list_empty(&OCFS_I(inode)->ip_j_inode))
-			list_add_tail(&OCFS_I(inode)->ip_j_inode, 
-				      &journal->committing_inodes);
-		spin_unlock(&journal->cmt_lock);
+		if (set_id)
+			ocfs_set_inode_lock_trans(journal, inode);
+		ocfs2_meta_unlock(inode, 1);
+		if (atomic_read(&inode->i_count) == 1)
+			LOG_ERROR_ARGS("Inode %llu, I'm doing a last iput "
+				       "for!", OCFS_I(inode)->ip_blkno);
+		iput(inode);
+		kmem_cache_free(OcfsGlobalCtxt.lock_cache, lock);
 	}
-	return;
 }
 
 #define OCFS_DEFAULT_COMMIT_INTERVAL 	(HZ * 5)
@@ -720,8 +617,6 @@
 	if (!osb)
 		BUG();
 
-	spin_lock_init(&(osb->journal->cmt_lock));
-
 	/* already have the inode for our journal */
 	inode = ocfs_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, osb->node_num);
 	if (inode == NULL) {
@@ -791,13 +686,9 @@
 	osb->journal->version = OCFS_JOURNAL_CURRENT_VERSION;
 	osb->journal->lockbh = bh;
 	atomic_set(&(osb->journal->num_trans), 0);
-	atomic_set(&(osb->journal->num_cmt_locks), 0);
-	atomic_set(&(osb->journal->num_chkpt_locks), 0);
 	init_rwsem(&(osb->journal->trans_barrier));
 	osb->journal->state = OCFS_JOURNAL_LOADED;
 	osb->journal->trans_id = (unsigned long) 1;
-	INIT_LIST_HEAD(&(osb->journal->committing_inodes));
-	INIT_LIST_HEAD(&(osb->journal->checkpointing_locks));
 
 	*dirty = (fe->id1.journal1.i_flags & OCFS2_JOURNAL_DIRTY_FL);
 	status = 0;
@@ -1518,15 +1409,12 @@
 			/* we can trust num_trans here because we're
 			 * in shutdown and nobody other than ourselves
 			 * should be able to start more. */
-			if ((atomic_read(&journal->num_trans) == 0)
-			    && (atomic_read(&journal->num_cmt_locks) == 0))
+			if (atomic_read(&journal->num_trans) == 0)
 				break;
 #ifdef VERBOSE_COMMIT_THREAD
-			printk("(%u) commit_thread: %u transactions, %u locks"
-			       "pending on shutdown\n", 
-			       current->pid, 
-			       atomic_read(&journal->num_trans),
-			       atomic_read(&journal->num_cmt_locks));
+			printk("(%u) commit_thread: %u transactions pending "
+			       "on shutdown\n", 
+			       current->pid, atomic_read(&journal->num_trans));
 #endif
 			goto skip_sleep;
 		}
@@ -1535,4 +1423,3 @@
 	complete (&(commit->c_complete));
 	return 0;
 }
-

Modified: branches/dlm-glue/src/ocfs.h
===================================================================
--- branches/dlm-glue/src/ocfs.h	2004-10-29 01:33:20 UTC (rev 1604)
+++ branches/dlm-glue/src/ocfs.h	2004-10-29 22:05:22 UTC (rev 1605)
@@ -211,6 +211,7 @@
 	unsigned int            l_ro_holders;
 	unsigned int            l_ex_holders;
 	dlm_lockstatus          l_lksb;
+	unsigned long           l_last_trans; /* protected by trans_inc_lock */
 	u32                     l_local_seq;
 
 	/* used from AST/BAST funcs. */
@@ -293,14 +294,6 @@
 
 	__u32 		  ip_dir_start_lookup;
 
-	/* ip_pending_locks and ip_j_inode are protected by the
-	 * journals cmt_lock.  
-	 * ip_pending_locks: disk locks for this inode which have to be
-	 * released once  their transaction checkpoints
- 	 * ip_j_inode: list_head for journal->committing_inodes. */
-	struct list_head  ip_pending_locks;
-	struct list_head  ip_j_inode;
-
 	/* protected by trans_inc_lock, which transaction were we
 	 * created on? Zero if none. */
 	unsigned long               ip_created_trans;
@@ -425,7 +418,6 @@
 	int s_sectsize_bits;
 	int s_clustersize;
 	int s_clustersize_bits;
-	int needs_flush;
 	struct proc_dir_entry *proc_sub_dir; /* points to /proc/fs/ocfs2/<maj_min> */
 
 	ocfs_alloc_bm cluster_bitmap;

Modified: branches/dlm-glue/src/ocfs_journal.h
===================================================================
--- branches/dlm-glue/src/ocfs_journal.h	2004-10-29 01:33:20 UTC (rev 1604)
+++ branches/dlm-glue/src/ocfs_journal.h	2004-10-29 22:05:22 UTC (rev 1605)
@@ -67,54 +67,53 @@
 	unsigned long             trans_id;
 	/* locking order: trans_lock -> cmt_lock */
 	spinlock_t                cmt_lock;   /* protects the committed list */
-	atomic_t                  num_cmt_locks; /* number of delayed
-						  * locks */
-	atomic_t                  num_chkpt_locks;
 	struct rw_semaphore       trans_barrier;
-
-	struct list_head          committing_inodes;   /* list of all
-						        * inodes that
-						        * have committed
-						        * and are
-						        * awaiting a
-						        * checkpoint. Protected
-						        * by cmt_lock. */
-	struct list_head          checkpointing_locks; /* locks
-							* pending release
-							* after a checkpoint
-							* -- this variable
-							* is unlocked as
-							* commit_thread is
-							* the only guy who
-							* looks at it! */
 };
 
 extern spinlock_t trans_inc_lock;
 
 /* wrap trans_id so we never have it equal to zero. */
-static inline void ocfs_inc_trans_id(ocfs_journal *j)
+static inline unsigned long ocfs_inc_trans_id(ocfs_journal *j)
 {
+	unsigned long old_id;
 	spin_lock(&trans_inc_lock);
-	j->trans_id++;
+	old_id = j->trans_id++;
 	if (!j->trans_id)
 		j->trans_id = 1;
 	spin_unlock(&trans_inc_lock);
+	return old_id;
 }
 
-static inline int ocfs_trans_checkpointed(ocfs_journal *j, 
-					  unsigned long trans_id)
+static inline void ocfs_set_inode_lock_trans(ocfs_journal *journal,
+					     struct inode *inode)
 {
+	ocfs2_lock *lock = &OCFS_I(inode)->ip_lockres.lr_meta;
+
+	spin_lock(&trans_inc_lock);
+	lock->l_last_trans = journal->trans_id;
+	spin_unlock(&trans_inc_lock);
+}
+
+/* Used to figure out whether it's safe to drop a metadata lock on an
+ * inode. Returns true if all the inodes changes have been
+ * checkpointed to disk. You should be holding the spinlock on the
+ * metadata lock while calling this to be sure that nobody can take
+ * the lock at put it on another transaction. */
+static inline int ocfs_inode_fully_checkpointed(struct inode *inode)
+{
 	int ret;
+	ocfs2_lock *lock = &OCFS_I(inode)->ip_lockres.lr_meta;
+	ocfs_journal *journal = OCFS2_SB(inode->i_sb)->journal;
+
 	spin_lock(&trans_inc_lock);
-	ret = time_after(trans_id, j->trans_id);
+	ret = time_after(journal->trans_id, lock->l_last_trans);
 	spin_unlock(&trans_inc_lock);
 	return ret;
 }
 
-/* convenience function to check if an inode has been checkpointed
- * yet. Replaces ocfs_journal_new_file_search. Will do you a favor and
- * set created_trans = 0 when you've been checkpointed. 
- * returns '1' if the inode hasn't been checkpointed yet. */
+/* convenience function to check if an inode is still new (has never
+ * hit disk) Will do you a favor and set created_trans = 0 when you've
+ * been checkpointed.  returns '1' if the inode is still new. */
 static inline int ocfs_inode_is_new(struct inode *inode)
 {
 	int ret;
@@ -144,11 +143,6 @@
 typedef struct _ocfs_journal_lock ocfs_journal_lock;
 struct _ocfs_journal_lock {
 	struct inode     *jl_inode;
-	/* lock_list: we are either on 
-	 *  - handle->locks: if still running
-	 *  - inode->ip_pending_locks: if waiting for checkpoint
-	 *  - journal->checkpointing_locks: awaiting release after checkpoint
-	 */
 	struct list_head  jl_lock_list;
 };
 

Modified: branches/dlm-glue/src/proc.c
===================================================================
--- branches/dlm-glue/src/proc.c	2004-10-29 01:33:20 UTC (rev 1604)
+++ branches/dlm-glue/src/proc.c	2004-10-29 22:05:22 UTC (rev 1605)
@@ -427,18 +427,14 @@
   "Volume size              : %llu\n"		\
   "Failed Large Allocs      : %u\n"		\
   "Retry Large Allocs       : %u\n"		\
-  "Open Transactions:       : %u\n"		\
-  "Delayed Locks            : %u\n"		\
-  "Checkpointing Locks      : %u\n"
+  "Open Transactions:       : %u\n"		
 
 	len = sprintf (page, PROC_STATS, pubmap,
 		       osb->num_nodes, osb->s_clustersize, 
 		       ocfs2_clusters_to_bytes(osb->sb, osb->cluster_bitmap.validbits),
 		       osb->cluster_bitmap.failed, 
 		       osb->cluster_bitmap.ok_retries, 
-		       atomic_read(&osb->journal->num_trans),
-		       atomic_read(&osb->journal->num_cmt_locks),
-		       atomic_read(&osb->journal->num_chkpt_locks));
+		       atomic_read(&osb->journal->num_trans));
 
 	ret = ocfs_proc_calc_metrics (page, start, off, count, eof, len);
 

Modified: branches/dlm-glue/src/super.c
===================================================================
--- branches/dlm-glue/src/super.c	2004-10-29 01:33:20 UTC (rev 1604)
+++ branches/dlm-glue/src/super.c	2004-10-29 22:05:22 UTC (rev 1605)
@@ -1094,7 +1094,6 @@
 	init_MUTEX (&(osb->recovery_lock));
 	init_MUTEX (&(osb->orphan_recovery_lock));
 
-	osb->needs_flush = 0;
 	osb->disable_recovery = 0;
 
 	atomic_set (&osb->num_recovery_threads, 0);