[Ocfs2-commits] rev 745 - in trunk/src: . inc
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Tue Mar 2 15:38:49 CST 2004
Author: mfasheh
Date: 2004-03-02 15:38:47 -0600 (Tue, 02 Mar 2004)
New Revision: 745
Modified:
trunk/src/alloc.c
trunk/src/dir.c
trunk/src/inc/journal.h
trunk/src/journal.c
trunk/src/namei.c
Log:
* Fix a potential bug where when journalling a new buffer (hasn't been
read off disk), the copyout data is written back to disk during
abort. This could put random bytes onto disk. Instead, we simply
journal_forget those buffers now if they're not already part of any
other transaction. We now actually make use of the
OCFS_JOURNAL_ACCESS_CREATE flag as our hint to ocfs_journal_access
that the block has been newly allocated and may not contain valid data
yet.
Modified: trunk/src/alloc.c
===================================================================
--- trunk/src/alloc.c 2004-03-02 01:28:01 UTC (rev 744)
+++ trunk/src/alloc.c 2004-03-02 21:38:47 UTC (rev 745)
@@ -695,8 +695,7 @@
for(i = 0; i < numSectorsAlloc; i++) {
if (handle) {
status = ocfs_journal_access(handle, header_bhs[i],
- OCFS_JOURNAL_ACCESS_WRITE);
-
+ OCFS_JOURNAL_ACCESS_CREATE);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto finally;
@@ -704,8 +703,6 @@
}
buff = OCFS_BH_GET_DATA_WRITE(header_bhs[i]); /* write */
memset(buff, 0, osb->sect_size);
-
- /* TODO: Do we really need to do this? */
set_buffer_uptodate(header_bhs[i]);
OCFS_BH_PUT_DATA(header_bhs[i]);
}
Modified: trunk/src/dir.c
===================================================================
--- trunk/src/dir.c 2004-03-02 01:28:01 UTC (rev 744)
+++ trunk/src/dir.c 2004-03-02 21:38:47 UTC (rev 745)
@@ -833,7 +833,7 @@
set_buffer_uptodate(bhs[freeOffset+1]);
status = ocfs_journal_access(handle, bhs[freeOffset+1],
- OCFS_JOURNAL_ACCESS_WRITE);
+ OCFS_JOURNAL_ACCESS_CREATE);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
@@ -1276,7 +1276,7 @@
set_buffer_uptodate(newbhs[i]);
/* clear all 128k, all garbage currently */
status = ocfs_journal_access(handle, newbhs[i],
- OCFS_JOURNAL_ACCESS_WRITE);
+ OCFS_JOURNAL_ACCESS_CREATE);
if (status < 0) {
while (i >= 0)
brelse(newbhs[i--]);
Modified: trunk/src/inc/journal.h
===================================================================
--- trunk/src/inc/journal.h 2004-03-02 01:28:01 UTC (rev 744)
+++ trunk/src/inc/journal.h 2004-03-02 21:38:47 UTC (rev 745)
@@ -104,6 +104,8 @@
typedef struct _ocfs_journal_copyout ocfs_journal_copyout;
struct _ocfs_journal_copyout {
+ __u8 forget; /* should we journal_forget this
+ * buffer instead? */
unsigned long blocknr; /* what block is this for? */
char *data; /* the actual data */
};
@@ -264,8 +266,10 @@
void ocfs_abort_trans(ocfs_journal_handle *handle);
/*
* Create access is for when we get a newly created buffer and we're
- * not gonna read it off disk, but rather fill it ourselves. The
- * buffer should already be locked.
+ * not gonna read it off disk, but rather fill it ourselves. If it's
+ * not part of an already commiting transaction, we'll mark it to be
+ * passed to journal_forget in case of abort. Otherwise, it's treated
+ * just like a OCFS_JOURNAL_ACCESS_WRITE buffer.
*
* Write access is for when we read a block off disk and are going to
* modify it. This way the journalling layer knows it may need to make
Modified: trunk/src/journal.c
===================================================================
--- trunk/src/journal.c 2004-03-02 01:28:01 UTC (rev 744)
+++ trunk/src/journal.c 2004-03-02 21:38:47 UTC (rev 745)
@@ -483,9 +483,27 @@
if (co == NULL)
BUG();
- LOG_TRACE_ARGS("Aborting block %lu\n", co->blocknr);
+
+ LOG_TRACE_ARGS("Aborting block %lu, forget=%u\n", co->blocknr,
+ co->forget);
data = OCFS_BH_GET_DATA_WRITE(bh);
- memcpy(data, co->data, bh->b_size);
+ if (co->forget) {
+ /* journal_forget will perform a brelse for us */
+ get_bh(bh);
+ journal_forget(handle->k_handle, bh);
+
+ /* this buffer has changed data which we want to
+ * consider invalid -- mark the sequence number as
+ * old. */
+ CLEAR_BH_SEQNUM(bh);
+
+ lock_buffer(bh);
+ clear_buffer_uptodate(bh);
+ clear_buffer_dirty(bh);
+ unlock_buffer(bh);
+ } else {
+ memcpy(data, co->data, bh->b_size);
+ }
OCFS_BH_PUT_DATA(bh);
}
@@ -517,15 +535,14 @@
* transactions in the journal so that we know that disk
* reflects the latest correct blocks. After that, we just
* repopulate the buffers from disk. */
-
- /* journal flush here */
journal_lock_updates(journal->k_journal);
retval = journal_flush(journal->k_journal);
journal_unlock_updates(journal->k_journal);
if (retval < 0)
LOG_ERROR_STATUS(retval);
- /* reread buffers here and then brelse them */
+ /* If we ever worry about abort performance, I'm 90% sure this
+ * read is not necessary. */
if (handle->num_buffs != 0)
retval = ocfs_read_bhs(osb,
handle->buffs[0]->b_blocknr * 512,
@@ -603,24 +620,39 @@
if (!found) {
i = handle->num_co;
+ handle->co_buffs[i].blocknr = bh->b_blocknr;
+ handle->num_co++;
- LOG_TRACE_ARGS("Copying buffer out to position %d\n", i);
- /* This malloc should just be a slab. */
- handle->co_buffs[i].data = ocfs_malloc(bh->b_size);
- if (handle->co_buffs[i].data == NULL) {
- status = -ENOMEM;
- goto done;
+ /* The buffer has no copy out, we have two choices below.
+ *
+ * If we haven't read the buffer off disk (like in a
+ * create where we're going to completely fill in the
+ * bh anyway), and we know that the buffer isn't in
+ * JBD for another transaction, then we can just
+ * perform a journal_forget at abort time instead of
+ * filling it with whatever junk was in memory.
+ *
+ * Otherwise, we make a copy of the data in the buffer. */
+ if (!buffer_jbd(bh) && type == OCFS_JOURNAL_ACCESS_CREATE) {
+ LOG_TRACE_ARGS("Making block (%lu) a forget block at "
+ "position %d\n", bh->b_blocknr, i);
+ handle->co_buffs[i].data = NULL;
+ handle->co_buffs[i].forget = 1;
+ } else {
+ LOG_TRACE_ARGS("Copying block (%lu) out to position"
+ "%d\n", bh->b_blocknr, i);
+ /* This malloc should just be a slab. */
+ handle->co_buffs[i].data = ocfs_malloc(bh->b_size);
+ if (handle->co_buffs[i].data == NULL) {
+ status = -ENOMEM;
+ goto done;
+ }
+ memcpy(handle->co_buffs[i].data, data, bh->b_size);
}
- memcpy(handle->co_buffs[i].data, data, bh->b_size);
- handle->co_buffs[i].blocknr = bh->b_blocknr;
- handle->num_co++;
}
switch (type) {
case OCFS_JOURNAL_ACCESS_CREATE:
- status = journal_get_create_access(handle->k_handle, bh);
- break;
-
case OCFS_JOURNAL_ACCESS_WRITE:
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
status = journal_get_write_access(handle->k_handle, bh, NULL);
Modified: trunk/src/namei.c
===================================================================
--- trunk/src/namei.c 2004-03-02 01:28:01 UTC (rev 744)
+++ trunk/src/namei.c 2004-03-02 21:38:47 UTC (rev 745)
@@ -450,7 +450,7 @@
set_buffer_uptodate(dirbhs[i]);
status = ocfs_journal_access(handle, dirbhs[i],
- OCFS_JOURNAL_ACCESS_WRITE);
+ OCFS_JOURNAL_ACCESS_CREATE);
if (status < 0) {
while (i >= 0)
brelse(dirbhs[i--]);
More information about the Ocfs2-commits
mailing list