[Ocfs2-commits] rev 745 - in trunk/src: . inc

Tue Mar 2 15:38:49 CST 2004

Author: mfasheh
Date: 2004-03-02 15:38:47 -0600 (Tue, 02 Mar 2004)
New Revision: 745

Modified:
   trunk/src/alloc.c
   trunk/src/dir.c
   trunk/src/inc/journal.h
   trunk/src/journal.c
   trunk/src/namei.c
Log:
* Fix a potential bug where when journalling a new buffer (hasn't been
  read off disk), the copyout data is written back to disk during
  abort. This could put random bytes onto disk. Instead, we simply
  journal_forget those buffers now if they're not already part of any
  other transaction. We now actually make use of the
  OCFS_JOURNAL_ACCESS_CREATE flag as our hint to ocfs_journal_access
  that the block has been newly allocated and may not contain valid data
  yet.



Modified: trunk/src/alloc.c
===================================================================

--- trunk/src/alloc.c	2004-03-02 01:28:01 UTC (rev 744)
+++ trunk/src/alloc.c	2004-03-02 21:38:47 UTC (rev 745)
@@ -695,8 +695,7 @@
 	for(i = 0; i < numSectorsAlloc; i++) {
 		if (handle) {
 			status = ocfs_journal_access(handle, header_bhs[i], 
-						    OCFS_JOURNAL_ACCESS_WRITE);
-
+						   OCFS_JOURNAL_ACCESS_CREATE);
 			if (status < 0) {
 				LOG_ERROR_STATUS(status);
 				goto finally;
@@ -704,8 +703,6 @@
 		}
 		buff = OCFS_BH_GET_DATA_WRITE(header_bhs[i]);  /* write */
 		memset(buff, 0, osb->sect_size);
-
-		/* TODO: Do we really need to do this? */
 		set_buffer_uptodate(header_bhs[i]);
 		OCFS_BH_PUT_DATA(header_bhs[i]);
 	}

Modified: trunk/src/dir.c
===================================================================
--- trunk/src/dir.c	2004-03-02 01:28:01 UTC (rev 744)
+++ trunk/src/dir.c	2004-03-02 21:38:47 UTC (rev 745)
@@ -833,7 +833,7 @@
 	
 	set_buffer_uptodate(bhs[freeOffset+1]);
 	status = ocfs_journal_access(handle, bhs[freeOffset+1], 
-				     OCFS_JOURNAL_ACCESS_WRITE);
+				     OCFS_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
 		LOG_ERROR_STATUS(status);
 		goto bail;
@@ -1276,7 +1276,7 @@
 				set_buffer_uptodate(newbhs[i]);
 				/* clear all 128k, all garbage currently */
 				status = ocfs_journal_access(handle, newbhs[i],
-						    OCFS_JOURNAL_ACCESS_WRITE);
+						   OCFS_JOURNAL_ACCESS_CREATE);
 				if (status < 0) {
 					while (i >= 0)
 						brelse(newbhs[i--]);

Modified: trunk/src/inc/journal.h
===================================================================
--- trunk/src/inc/journal.h	2004-03-02 01:28:01 UTC (rev 744)
+++ trunk/src/inc/journal.h	2004-03-02 21:38:47 UTC (rev 745)
@@ -104,6 +104,8 @@
 
 typedef struct _ocfs_journal_copyout ocfs_journal_copyout;
 struct _ocfs_journal_copyout {
+	__u8                forget;  /* should we journal_forget this
+				      * buffer instead? */
 	unsigned long       blocknr; /* what block is this for? */
 	char                *data;   /* the actual data */
 };
@@ -264,8 +266,10 @@
 void                 ocfs_abort_trans(ocfs_journal_handle *handle);
 /*
  * Create access is for when we get a newly created buffer and we're
- * not gonna read it off disk, but rather fill it ourselves. The
- * buffer should already be locked.
+ * not gonna read it off disk, but rather fill it ourselves. If it's
+ * not part of an already commiting transaction, we'll mark it to be 
+ * passed to journal_forget in case of abort. Otherwise, it's treated 
+ * just like a OCFS_JOURNAL_ACCESS_WRITE buffer.
  *
  * Write access is for when we read a block off disk and are going to
  * modify it. This way the journalling layer knows it may need to make

Modified: trunk/src/journal.c
===================================================================
--- trunk/src/journal.c	2004-03-02 01:28:01 UTC (rev 744)
+++ trunk/src/journal.c	2004-03-02 21:38:47 UTC (rev 745)
@@ -483,9 +483,27 @@
 
 		if (co == NULL)
 			BUG();
-		LOG_TRACE_ARGS("Aborting block %lu\n", co->blocknr);
+
+		LOG_TRACE_ARGS("Aborting block %lu, forget=%u\n", co->blocknr,
+			       co->forget);
 		data = OCFS_BH_GET_DATA_WRITE(bh);
-		memcpy(data, co->data, bh->b_size);
+		if (co->forget) {
+			/* journal_forget will perform a brelse for us */
+			get_bh(bh);
+			journal_forget(handle->k_handle, bh);
+
+			/* this buffer has changed data which we want to
+	                 * consider invalid -- mark the sequence number as
+	                 * old. */
+			CLEAR_BH_SEQNUM(bh);
+
+			lock_buffer(bh);
+			clear_buffer_uptodate(bh);
+			clear_buffer_dirty(bh);
+			unlock_buffer(bh);
+		} else {
+			memcpy(data, co->data, bh->b_size);
+		}
 		OCFS_BH_PUT_DATA(bh);
 	}
 
@@ -517,15 +535,14 @@
 	 * transactions in the journal so that we know that disk
 	 * reflects the latest correct blocks. After that, we just
 	 * repopulate the buffers from disk. */
-
-	/* journal flush here */
 	journal_lock_updates(journal->k_journal);
 	retval = journal_flush(journal->k_journal);
 	journal_unlock_updates(journal->k_journal);
 	if (retval < 0)
 		LOG_ERROR_STATUS(retval);
 
-	/* reread buffers here and then brelse them */
+	/* If we ever worry about abort performance, I'm 90% sure this
+	 * read is not necessary. */
 	if (handle->num_buffs != 0)
 		retval = ocfs_read_bhs(osb, 
 				       handle->buffs[0]->b_blocknr * 512,
@@ -603,24 +620,39 @@
 
 	if (!found) {
 		i = handle->num_co;
+		handle->co_buffs[i].blocknr = bh->b_blocknr;
+		handle->num_co++;
 
-		LOG_TRACE_ARGS("Copying buffer out to position %d\n", i);
-		/* This malloc should just be a slab. */
-		handle->co_buffs[i].data = ocfs_malloc(bh->b_size);
-		if (handle->co_buffs[i].data == NULL) {
-			status = -ENOMEM;
-			goto done;
+		/* The buffer has no copy out, we have two choices below.
+		 *
+		 * If we haven't read the buffer off disk (like in a
+		 * create where we're going to completely fill in the
+		 * bh anyway), and we know that the buffer isn't in
+		 * JBD for another transaction, then we can just
+		 * perform a journal_forget at abort time instead of
+		 * filling it with whatever junk was in memory.
+		 *
+		 * Otherwise, we make a copy of the data in the buffer. */
+		if (!buffer_jbd(bh) && type == OCFS_JOURNAL_ACCESS_CREATE) {
+			LOG_TRACE_ARGS("Making block (%lu) a forget block at "
+				       "position %d\n", bh->b_blocknr, i);
+			handle->co_buffs[i].data = NULL;
+			handle->co_buffs[i].forget = 1;
+		} else {
+			LOG_TRACE_ARGS("Copying block (%lu) out to position"
+				       "%d\n", bh->b_blocknr, i);
+			/* This malloc should just be a slab. */
+			handle->co_buffs[i].data = ocfs_malloc(bh->b_size);
+			if (handle->co_buffs[i].data == NULL) {
+				status = -ENOMEM;
+				goto done;
+			}
+			memcpy(handle->co_buffs[i].data, data, bh->b_size);
 		}
-		memcpy(handle->co_buffs[i].data, data, bh->b_size);
-		handle->co_buffs[i].blocknr = bh->b_blocknr;
-		handle->num_co++;
 	}
 
 	switch (type) {
 	case OCFS_JOURNAL_ACCESS_CREATE:
-		status = journal_get_create_access(handle->k_handle, bh);
-		break;
-
 	case OCFS_JOURNAL_ACCESS_WRITE:
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 		status = journal_get_write_access(handle->k_handle, bh, NULL);

Modified: trunk/src/namei.c
===================================================================
--- trunk/src/namei.c	2004-03-02 01:28:01 UTC (rev 744)
+++ trunk/src/namei.c	2004-03-02 21:38:47 UTC (rev 745)
@@ -450,7 +450,7 @@
 
 			set_buffer_uptodate(dirbhs[i]);
 			status = ocfs_journal_access(handle, dirbhs[i], 
-					     OCFS_JOURNAL_ACCESS_WRITE);
+					     OCFS_JOURNAL_ACCESS_CREATE);
 			if (status < 0) {
 				while (i >= 0)
 					brelse(dirbhs[i--]);