[Ocfs2-commits] bryce commits r1789 - branches/endian/src
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Tue Jan 18 15:41:48 CST 2005
Author: bryce
Date: 2005-01-18 15:41:46 -0600 (Tue, 18 Jan 2005)
New Revision: 1789
Modified:
branches/endian/src/24io.c
branches/endian/src/alloc.c
branches/endian/src/alloc.h
branches/endian/src/aops.c
branches/endian/src/buffer_head_io.c
branches/endian/src/buffer_head_io.h
branches/endian/src/dcache.c
branches/endian/src/dir.c
branches/endian/src/extent_map.c
branches/endian/src/file.c
branches/endian/src/file.h
branches/endian/src/heartbeat.c
branches/endian/src/heartbeat.h
branches/endian/src/inode.c
branches/endian/src/inode.h
branches/endian/src/journal.c
branches/endian/src/localalloc.c
branches/endian/src/namei.c
branches/endian/src/namei.h
branches/endian/src/ocfs.h
branches/endian/src/ocfs1_fs_compat.h
branches/endian/src/ocfs2.h
branches/endian/src/ocfs2_fs.h
branches/endian/src/ocfs_compat.h
branches/endian/src/ocfs_journal.h
branches/endian/src/ocfs_log.h
branches/endian/src/proc.c
branches/endian/src/suballoc.c
branches/endian/src/suballoc.h
branches/endian/src/super.c
branches/endian/src/symlink.c
branches/endian/src/symlink.h
branches/endian/src/sysfile.c
branches/endian/src/sysfile.h
branches/endian/src/util.c
branches/endian/src/util.h
branches/endian/src/vote.c
branches/endian/src/vote.h
Log:
Endian changes for ocfs2 for PPC64 (experimental)
Modified: branches/endian/src/24io.c
===================================================================
--- branches/endian/src/24io.c 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/24io.c 2005-01-18 21:41:46 UTC (rev 1789)
@@ -143,10 +143,10 @@
}
if (rw == READ) {
- if (i_size_read(inode) <= *offp) /* read past end of file */
+ if (inode->i_size <= *offp) /* read past end of file */
return 0;
- if (size > (i_size_read(inode) - *offp))
- size = i_size_read(inode) - *offp;
+ if (size > (inode->i_size - *offp))
+ size = inode->i_size - *offp;
}
/* make sure aligned to either PAGE_SIZE or sect_size IO */
@@ -370,20 +370,20 @@
unmap_kvec(vec, rw == READ);
free_kvec(vec);
- if (res > 0)
- iocb->nr_transferred += res;
+ if (res <= 0)
+ ocfs_aio_complete(iocb, res);
- /* we only issue a continuation if this part suceeded
- * and there is more remaining */
- if (!iocb->ctx->dead && res == iocb->this_size &&
- iocb->nr_transferred < iocb->size) {
- iocb->this_size = rw;
- INIT_TQUEUE(&iocb->u.tq, ocfs_aio_issue, iocb);
- schedule_task(&iocb->u.tq);
+ iocb->nr_transferred += res;
+
+ if ((res != iocb->this_size) || iocb->ctx->dead ||
+ (iocb->nr_transferred == iocb->size)) {
+ ocfs_aio_complete(iocb, 0);
return;
}
- ocfs_aio_complete(iocb, res);
+ iocb->this_size = rw;
+ INIT_TQUEUE(&iocb->u.tq, ocfs_aio_issue, iocb);
+ schedule_task(&iocb->u.tq);
}
static void ocfs_aio_complete_read(void *_iocb, struct kvec *vec, ssize_t res)
@@ -472,7 +472,7 @@
int sector_size = 1 << OCFS_SB(inode->i_sb)->s_sectsize_bits;
int sector_mask = (1 << sector_size) - 1;
- if (!size || (pos == i_size_read(inode))) {
+ if (!size || (pos == inode->i_size)) {
ocfs_aio_complete(req, 0);
return 0;
}
@@ -480,7 +480,7 @@
if (aio_max_size < sector_size)
return -EINVAL;
- if (pos >= i_size_read(inode))
+ if (pos >= inode->i_size)
return -ENXIO;
if ((pos < 0) || (pos & sector_mask) || (size & sector_mask))
Modified: branches/endian/src/alloc.c
===================================================================
--- branches/endian/src/alloc.c 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/alloc.c 2005-01-18 21:41:46 UTC (rev 1789)
@@ -33,9 +33,10 @@
#include "ocfs_log.h"
#include "ocfs.h"
#include "ocfs2.h"
+#include "ocfs2_fs.h"
#include "alloc.h"
-#include "dlmglue.h"
+#include "dlm.h"
#include "extent_map.h"
#include "inode.h"
#include "localalloc.h"
@@ -104,9 +105,9 @@
static int ocfs_extent_contig(struct inode *inode, ocfs2_extent_rec *ext,
u64 blkno)
{
- return blkno == (ext->e_blkno +
+ return blkno == (le64_to_cpu(ext->bryce_e_blkno) +
ocfs2_clusters_to_blocks(inode->i_sb,
- ext->e_clusters));
+ le32_to_cpu(ext->bryce_e_clusters)));
}
/*
@@ -125,8 +126,8 @@
OCFS_ASSERT(IS_VALID_FILE_ENTRY(fe));
- if (fe->i_last_eb_blk) {
- retval = ocfs_read_block(osb, fe->i_last_eb_blk, &eb_bh,
+ if (le64_to_cpu(fe->bryce_i_last_eb_blk)) {
+ retval = ocfs_read_block(osb, le64_to_cpu(fe->bryce_i_last_eb_blk), &eb_bh,
OCFS_BH_CACHED, inode);
if (retval < 0) {
LOG_ERROR_STATUS(retval);
@@ -137,15 +138,15 @@
} else
el = &(fe->id2.i_list);
- OCFS_ASSERT(el->l_tree_depth == 0);
+ OCFS_ASSERT(le16_to_cpu(el->bryce_l_tree_depth) == 0);
- retval = el->l_count - el->l_next_free_rec;
+ retval = le16_to_cpu(el->bryce_l_count) - le16_to_cpu(el->bryce_l_next_free_rec);
bail:
if (eb_bh)
brelse(eb_bh);
LOG_EXIT_STATUS(retval);
- return retval;
+ return(retval);
}
/* expects array to already be malloced
@@ -203,17 +204,16 @@
eb = (ocfs2_extent_block *) bhs[i]->b_data;
/* Ok, setup the minimal stuff here. */
strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
- eb->h_blkno = cpu_to_le64(first_blkno);
- eb->h_fs_generation = cpu_to_le32(osb->fs_generation);
+ eb->bryce_h_blkno = cpu_to_le64(first_blkno);
#ifndef OCFS_USE_ALL_METADATA_SUBALLOCATORS
/* we always use node zeros suballocator */
- eb->h_suballoc_node = 0;
+ eb->bryce_h_suballoc_node = cpu_to_le16(0);
#else
- eb->h_suballoc_node = cpu_to_le16(osb->slot_num);
+ eb->h_suballoc_node = osb->node_num;
#endif
- eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start);
- eb->h_list.l_count = ocfs2_extent_recs_per_eb(osb->sb);
+ eb->bryce_h_suballoc_bit = cpu_to_le16(suballoc_bit_start);
+ eb->h_list.bryce_l_count = cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb));
suballoc_bit_start++;
first_blkno++;
@@ -240,7 +240,7 @@
}
}
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
}
/*
@@ -286,9 +286,9 @@
el = &fe->id2.i_list;
/* we never add a branch to a leaf. */
- OCFS_ASSERT(el->l_tree_depth);
+ OCFS_ASSERT(le16_to_cpu(el->bryce_l_tree_depth));
- new_blocks = el->l_tree_depth;
+ new_blocks = le16_to_cpu(el->bryce_l_tree_depth);
/* allocate the number of new eb blocks we need */
size = sizeof(struct buffer_head *) * new_blocks;
@@ -328,14 +328,14 @@
goto bail;
}
- eb->h_next_leaf_blk = 0;
- eb_el->l_tree_depth = i;
- eb_el->l_next_free_rec = 1;
- eb_el->l_recs[0].e_cpos = fe->i_clusters;
- eb_el->l_recs[0].e_blkno = next_blkno;
- eb_el->l_recs[0].e_clusters = 0;
- if (!eb_el->l_tree_depth)
- new_last_eb_blk = le64_to_cpu(eb->h_blkno);
+ eb->bryce_h_next_leaf_blk = cpu_to_le64(0);
+ eb_el->bryce_l_tree_depth = cpu_to_le16(i);
+ eb_el->bryce_l_next_free_rec = cpu_to_le16(1);
+ eb_el->l_recs[0].bryce_e_cpos = fe->bryce_i_clusters; /* no need to convert? */
+ eb_el->l_recs[0].bryce_e_blkno = cpu_to_le64(next_blkno);
+ eb_el->l_recs[0].bryce_e_clusters = cpu_to_le32(0);
+ if (!le16_to_cpu(eb_el->bryce_l_tree_depth))
+ new_last_eb_blk = le64_to_cpu(eb->bryce_h_blkno);
status = ocfs_journal_dirty(handle, bh);
if (status < 0) {
@@ -343,7 +343,7 @@
goto bail;
}
- next_blkno = le64_to_cpu(eb->h_blkno);
+ next_blkno = le64_to_cpu(eb->bryce_h_blkno);
}
/* This is a bit hairy. We want to update up to three blocks
@@ -375,18 +375,18 @@
/* Link the new branch into the rest of the tree (el will
* either be on the fe, or the extent block passed in. */
- i = el->l_next_free_rec;
- el->l_recs[i].e_blkno = next_blkno;
- el->l_recs[i].e_cpos = fe->i_clusters;
- el->l_recs[i].e_clusters = 0;
- el->l_next_free_rec++;
+ i = le16_to_cpu(el->bryce_l_next_free_rec);
+ el->l_recs[i].bryce_e_blkno = cpu_to_le64(next_blkno);
+ el->l_recs[i].bryce_e_cpos = fe->bryce_i_clusters;
+ el->l_recs[i].bryce_e_clusters = cpu_to_le32(0);
+ el->bryce_l_next_free_rec = cpu_to_le16(le16_to_cpu(el->bryce_l_next_free_rec) + 1 );
/* fe needs a new last extent block pointer, as does the
* next_leaf on the previously last-extent-block. */
- fe->i_last_eb_blk = new_last_eb_blk;
+ fe->bryce_i_last_eb_blk = cpu_to_le64(new_last_eb_blk);
eb = (ocfs2_extent_block *) last_eb_bh->b_data;
- eb->h_next_leaf_blk = new_last_eb_blk;
+ eb->bryce_h_next_leaf_blk = cpu_to_le64(new_last_eb_blk);
status = ocfs_journal_dirty(handle, last_eb_bh);
if (status < 0)
@@ -457,12 +457,12 @@
}
/* copy the fe data into the new extent block */
- eb_el->l_tree_depth = fe_el->l_tree_depth;
- eb_el->l_next_free_rec = fe_el->l_next_free_rec;
- for(i = 0; i < fe_el->l_next_free_rec; i++) {
- eb_el->l_recs[i].e_cpos = fe_el->l_recs[i].e_cpos;
- eb_el->l_recs[i].e_clusters = fe_el->l_recs[i].e_clusters;
- eb_el->l_recs[i].e_blkno = fe_el->l_recs[i].e_blkno;
+ eb_el->bryce_l_tree_depth = fe_el->bryce_l_tree_depth;
+ eb_el->bryce_l_next_free_rec = fe_el->bryce_l_next_free_rec;
+ for(i = 0; i < le16_to_cpu(fe_el->bryce_l_next_free_rec); i++) {
+ eb_el->l_recs[i].bryce_e_cpos = fe_el->l_recs[i].bryce_e_cpos;
+ eb_el->l_recs[i].bryce_e_clusters = fe_el->l_recs[i].bryce_e_clusters;
+ eb_el->l_recs[i].bryce_e_blkno = fe_el->l_recs[i].bryce_e_blkno;
}
status = ocfs_journal_dirty(handle, new_eb_bh);
@@ -479,21 +479,21 @@
}
/* update fe now */
- fe_el->l_tree_depth++;
- fe_el->l_recs[0].e_cpos = 0;
- fe_el->l_recs[0].e_blkno = eb->h_blkno;
- fe_el->l_recs[0].e_clusters = fe->i_clusters;
- for(i = 1; i < fe_el->l_next_free_rec; i++) {
- fe_el->l_recs[i].e_cpos = 0;
- fe_el->l_recs[i].e_clusters = 0;
- fe_el->l_recs[i].e_blkno = 0;
+ fe_el->bryce_l_tree_depth = cpu_to_le16(le16_to_cpu(fe_el->bryce_l_tree_depth) + 1);
+ fe_el->l_recs[0].bryce_e_cpos = cpu_to_le32(0);
+ fe_el->l_recs[0].bryce_e_blkno = eb->bryce_h_blkno;
+ fe_el->l_recs[0].bryce_e_clusters = fe->bryce_i_clusters;
+ for(i = 1; i < le16_to_cpu(fe_el->bryce_l_next_free_rec); i++) {
+ fe_el->l_recs[i].bryce_e_cpos = cpu_to_le32(0);
+ fe_el->l_recs[i].bryce_e_clusters = cpu_to_le32(0);
+ fe_el->l_recs[i].bryce_e_blkno = cpu_to_le64(0);
}
- fe_el->l_next_free_rec = 1;
+ fe_el->bryce_l_next_free_rec = cpu_to_le16(1);
/* If this is our 1st tree depth shift, then last_eb_blk
* becomes the allocated extent block */
- if (fe_el->l_tree_depth == 1)
- fe->i_last_eb_blk = eb->h_blkno;
+ if (le16_to_cpu(fe_el->bryce_l_tree_depth) == 1)
+ fe->bryce_i_last_eb_blk = eb->bryce_h_blkno;
status = ocfs_journal_dirty(handle, fe_bh);
if (status < 0) {
@@ -527,6 +527,7 @@
u32 new_clusters)
{
int status, i, num_bhs = 0;
+ int temp_i;
u64 next_blkno;
struct buffer_head **eb_bhs = NULL;
ocfs2_dinode *fe;
@@ -544,14 +545,14 @@
fe = (ocfs2_dinode *) fe_bh->b_data;
el = &fe->id2.i_list;
- if (el->l_tree_depth) {
+ if (le16_to_cpu(el->bryce_l_tree_depth)) {
/* This is another operation where we want to be
* careful about our tree updates. An error here means
* none of the previous changes we made should roll
* forward. As a result, we have to record the buffers
* for this part of the tree in an array and reserve a
* journal write to them before making any changes. */
- num_bhs = fe->id2.i_list.l_tree_depth;
+ num_bhs = le16_to_cpu(fe->id2.i_list.bryce_l_tree_depth);
eb_bhs = kmalloc(sizeof(struct buffer_head *) * num_bhs,
GFP_KERNEL);
if (!eb_bhs) {
@@ -562,9 +563,9 @@
memset(eb_bhs, 0, sizeof(struct buffer_head *) * num_bhs);
i = 0;
- while(el->l_tree_depth) {
- OCFS_ASSERT_RO(el->l_next_free_rec);
- next_blkno = el->l_recs[el->l_next_free_rec-1].e_blkno;
+ while(le16_to_cpu(el->bryce_l_tree_depth)) {
+ OCFS_ASSERT_RO(le16_to_cpu(el->bryce_l_next_free_rec));
+ next_blkno = le64_to_cpu(el->l_recs[le16_to_cpu(el->bryce_l_next_free_rec)-1].bryce_e_blkno);
OCFS_ASSERT(i < num_bhs);
status = ocfs_read_block(osb, next_blkno, &eb_bhs[i],
@@ -588,15 +589,21 @@
/* When we leave this loop, eb_bhs[num_bhs - 1] will
* hold the bottom-most leaf extent block. */
}
- OCFS_ASSERT(!el->l_tree_depth);
+ OCFS_ASSERT(!le16_to_cpu(el->bryce_l_tree_depth));
el = &fe->id2.i_list;
/* If we have tree depth, then the fe update is
* trivial, and we want to switch el out for the
* bottom-most leaf in order to update it with the
* actual extent data below. */
- OCFS_ASSERT_RO(el->l_next_free_rec);
- el->l_recs[el->l_next_free_rec - 1].e_clusters += new_clusters;
+ OCFS_ASSERT_RO(le16_to_cpu(el->bryce_l_next_free_rec));
+
+/* el->l_recs[el->bryce_l_next_free_rec - 1].bryce_e_clusters) += new_clusters; */
+
+ temp_i = le16_to_cpu(el->bryce_l_next_free_rec) - 1;
+ el->l_recs[temp_i].bryce_e_clusters = le32_to_cpu(el->l_recs[temp_i]) + new_clusters;
+
+
/* (num_bhs - 1) to avoid the leaf */
for(i = 0; i < (num_bhs - 1); i++) {
eb = (ocfs2_extent_block *) eb_bhs[i]->b_data;
@@ -604,9 +611,12 @@
/* finally, make our actual change to the
* intermediate extent blocks. */
- el->l_recs[el->l_next_free_rec - 1].e_clusters
- += new_clusters;
+/* el->l_recs[el->bryce_l_next_free_rec - 1].bryce_e_clusters += new_clusters; */
+
+ temp_i = el->bryce_l_next_free_rec - 1;
+ el->l_recs[temp_i] = le32_to_cpu(le32_to_cpu(el->l_recs[temp_i]) + new_clusters);
+
status = ocfs_journal_dirty(handle, eb_bhs[i]);
if (status < 0)
LOG_ERROR_STATUS(status);
@@ -616,31 +626,31 @@
* the loop above */
eb = (ocfs2_extent_block *) eb_bhs[num_bhs - 1]->b_data;
el = &eb->h_list;
- OCFS_ASSERT(!el->l_tree_depth);
+ OCFS_ASSERT(!le16_to_cpu(el->bryce_l_tree_depth));
}
/* yay, we can finally add the actual extent now! */
- i = el->l_next_free_rec - 1;
- if (el->l_next_free_rec && ocfs_extent_contig(inode,
+ i = le16_to_cpu(el->bryce_l_next_free_rec) - 1;
+ if (le16_to_cpu(el->bryce_l_next_free_rec) && ocfs_extent_contig(inode,
&el->l_recs[i],
start_blk)) {
- el->l_recs[i].e_clusters += new_clusters;
- } else if (el->l_next_free_rec && !el->l_recs[i].e_clusters) {
+ el->l_recs[i].bryce_e_clusters += new_clusters;
+ } else if (le16_to_cpu(el->bryce_l_next_free_rec) && !le32_to_cpu(el->l_recs[i].bryce_e_clusters)) {
/* having an empty extent at eof is legal. */
- OCFS_ASSERT_RO(el->l_recs[i].e_cpos == fe->i_clusters);
- el->l_recs[i].e_blkno = start_blk;
- el->l_recs[i].e_clusters = new_clusters;
+ OCFS_ASSERT_RO(le32_to_cpu(el->l_recs[i].bryce_e_cpos) == le32_to_cpu(fe->bryce_i_clusters));
+ el->l_recs[i].bryce_e_blkno = cpu_to_le64(start_blk);
+ el->l_recs[i].bryce_e_clusters = cpu_to_le32(new_clusters);
} else {
/* No contiguous record, or no empty record at eof, so
* we add a new one. */
- OCFS_ASSERT(el->l_next_free_rec < el->l_count);
- i = el->l_next_free_rec;
+ OCFS_ASSERT(le16_to_cpu(el->bryce_l_next_free_rec) < le16_to_cpu(el->bryce_l_count));
+ i = le16_to_cpu(el->bryce_l_next_free_rec);
- el->l_recs[i].e_blkno = start_blk;
- el->l_recs[i].e_clusters = new_clusters;
- el->l_recs[i].e_cpos = fe->i_clusters;
- el->l_next_free_rec++;
+ el->l_recs[i].bryce_e_blkno = cpu_to_le64(start_blk);
+ el->l_recs[i].bryce_e_clusters = cpu_to_le32(new_clusters);
+ el->l_recs[i].bryce_e_cpos = fe->bryce_i_clusters;
+ el->bryce_l_next_free_rec = cpu_to_le16(le16_to_cpu(el->bryce_l_next_free_rec + 1));
}
/*
@@ -651,13 +661,13 @@
new_clusters);
if (status) {
LOG_ERROR_STATUS(status);
- ocfs2_extent_map_drop(inode, fe->i_clusters);
+ ocfs2_extent_map_drop(inode, le32_to_cpu(fe->bryce_i_clusters));
}
status = ocfs_journal_dirty(handle, fe_bh);
if (status < 0)
LOG_ERROR_STATUS(status);
- if (fe->id2.i_list.l_tree_depth) {
+ if (le16_to_cpu(fe->id2.i_list.bryce_l_tree_depth)) {
status = ocfs_journal_dirty(handle, eb_bhs[num_bhs - 1]);
if (status < 0)
LOG_ERROR_STATUS(status);
@@ -715,10 +725,10 @@
fe = (ocfs2_dinode *) fe_bh->b_data;
el = &fe->id2.i_list;
- while(el->l_tree_depth > 1) {
- OCFS_ASSERT_RO(el->l_next_free_rec);
- i = el->l_next_free_rec - 1;
- blkno = el->l_recs[i].e_blkno;
+ while(le16_to_cpu(el->bryce_l_tree_depth) > 1) {
+ OCFS_ASSERT_RO(le16_to_cpu(el->bryce_l_next_free_rec));
+ i = le16_to_cpu(el->bryce_l_next_free_rec) - 1;
+ blkno = le64_to_cpu(el->l_recs[i].bryce_e_blkno);
OCFS_ASSERT_RO(blkno);
if (bh) {
@@ -737,7 +747,7 @@
OCFS_ASSERT_RO(IS_VALID_EXTENT_BLOCK(eb));
el = &eb->h_list;
- if (el->l_next_free_rec < el->l_count) {
+ if (le16_to_cpu(el->bryce_l_next_free_rec) < le16_to_cpu(el->bryce_l_count)) {
if (lowest_bh)
brelse(lowest_bh);
lowest_bh = bh;
@@ -748,7 +758,7 @@
/* If we didn't find one and the fe doesn't have any room,
* then return '1' */
if (!lowest_bh
- && (fe->id2.i_list.l_next_free_rec == fe->id2.i_list.l_count))
+ && (le16_to_cpu(fe->id2.i_list.bryce_l_next_free_rec) == le16_to_cpu(fe->id2.i_list.bryce_l_count)))
status = 1;
*target_bh = lowest_bh;
@@ -785,9 +795,9 @@
fe = (ocfs2_dinode *) fe_bh->b_data;
el = &fe->id2.i_list;
- if (el->l_tree_depth) {
+ if (le16_to_cpu(el->bryce_l_tree_depth)) {
/* jump to end of tree */
- status = ocfs_read_block(osb, fe->i_last_eb_blk, &last_eb_bh,
+ status = ocfs_read_block(osb, le64_to_cpu(fe->bryce_i_last_eb_blk), &last_eb_bh,
OCFS_BH_CACHED, inode);
if (status < 0) {
LOG_EXIT_STATUS(status);
@@ -798,10 +808,10 @@
}
/* Can we allocate without adding/shifting tree bits? */
- i = el->l_next_free_rec - 1;
- if (!el->l_next_free_rec
- || (el->l_next_free_rec < el->l_count)
- || !el->l_recs[i].e_clusters
+ i = le16_to_cpu(el->bryce_l_next_free_rec) - 1;
+ if (!le16_to_cpu(el->bryce_l_next_free_rec)
+ || (le16_to_cpu(el->bryce_l_next_free_rec) < le16_to_cpu(el->bryce_l_count))
+ || !le32_to_cpu(el->l_recs[i].bryce_e_clusters)
|| ocfs_extent_contig(inode, &el->l_recs[i], start_blk))
goto out_add;
@@ -818,11 +828,11 @@
/* We traveled all the way to the bottom and found nothing. */
if (shift) {
/* if we hit a leaf, we'd better be empty :) */
- OCFS_ASSERT(el->l_next_free_rec == el->l_count);
+ OCFS_ASSERT(le16_to_cpu(el->bryce_l_next_free_rec) == le16_to_cpu(el->bryce_l_count));
OCFS_ASSERT(!bh);
LOG_TRACE_ARGS("ocfs2_allocate_extent: need to shift tree "
"depth (current = %u)\n",
- fe->id2.i_list.l_tree_depth);
+ le16_to_cpu(fe->id2.i_list.bryce_l_tree_depth));
/* ocfs2_shift_tree_depth will return us a buffer with
* the new extent block (so we can pass that to
@@ -835,7 +845,7 @@
}
/* Special case: we have room now if we shifted from
* tree_depth 0 */
- if (fe->id2.i_list.l_tree_depth == 1)
+ if (le16_to_cpu(fe->id2.i_list.bryce_l_tree_depth) == 1)
goto out_add;
}
@@ -889,7 +899,7 @@
*new_last_eb = NULL;
/* we have no tree, so of course, no last_eb. */
- if (!fe->id2.i_list.l_tree_depth)
+ if (!le16_to_cpu(fe->id2.i_list.bryce_l_tree_depth))
goto bail;
/* trunc to zero special case - this makes tree_depth = 0
@@ -899,11 +909,11 @@
eb = (ocfs2_extent_block *) old_last_eb->b_data;
el = &(eb->h_list);
- OCFS_ASSERT(el->l_next_free_rec);
+ OCFS_ASSERT(le16_to_cpu(el->bryce_l_next_free_rec));
/* Make sure that this guy will actually be empty after we
* clear away the data. */
- if (el->l_recs[0].e_cpos < new_i_clusters)
+ if (le32_to_cpu(el->l_recs[0].bryce_e_cpos) < new_i_clusters)
goto bail;
/* Ok, at this point, we know that last_eb will definitely
@@ -912,9 +922,9 @@
el = &(fe->id2.i_list);
/* go down the tree, */
do {
- for(i = (el->l_next_free_rec - 1); i >= 0; i--) {
- if (el->l_recs[i].e_cpos < new_i_clusters) {
- block = el->l_recs[i].e_blkno;
+ for(i = (le16_to_cpu(el->bryce_l_next_free_rec) - 1); i >= 0; i--) {
+ if (le32_to_cpu(el->l_recs[i].bryce_e_cpos) < new_i_clusters) {
+ block = le64_to_cpu(el->l_recs[i].bryce_e_blkno);
break;
}
}
@@ -934,11 +944,11 @@
eb = (ocfs2_extent_block *) bh->b_data;
el = &(eb->h_list);
OCFS_ASSERT(IS_VALID_EXTENT_BLOCK(eb));
- } while (el->l_tree_depth);
+ } while (le16_to_cpu(el->bryce_l_tree_depth));
*new_last_eb = bh;
get_bh(*new_last_eb);
- LOG_TRACE_ARGS("returning block %llu\n", le64_to_cpu(eb->h_blkno));
+ LOG_TRACE_ARGS("returning block %llu\n", le64_to_cpu(eb->bryce_h_blkno));
bail:
if (bh)
brelse(bh);
@@ -970,7 +980,7 @@
status = ocfs_find_new_last_ext_blk(osb,
inode,
fe,
- fe->i_clusters - clusters_to_del,
+ le32_to_cpu(fe->bryce_i_clusters) - clusters_to_del,
old_last_eb_bh,
&last_eb_bh);
if (status < 0) {
@@ -989,41 +999,41 @@
el = &(fe->id2.i_list);
spin_lock(&OCFS_I(inode)->ip_lock);
- OCFS_I(inode)->ip_clusters = fe->i_clusters - clusters_to_del;
+ OCFS_I(inode)->ip_clusters = le32_to_cpu(fe->bryce_i_clusters) - clusters_to_del;
spin_unlock(&OCFS_I(inode)->ip_lock);
- fe->i_clusters -= clusters_to_del;
- fe->i_mtime = OCFS_CURRENT_TIME;
+ fe->bryce_i_clusters = cpu_to_le32(le32_to_cpu(fe->bryce_i_clusters) - clusters_to_del);
+ fe->bryce_i_mtime = cpu_to_le64(OCFS_CURRENT_TIME);
- i = el->l_next_free_rec - 1;
+ i = le16_to_cpu(el->bryce_l_next_free_rec) - 1;
- OCFS_ASSERT(el->l_recs[i].e_clusters >= clusters_to_del);
- el->l_recs[i].e_clusters -= clusters_to_del;
+ OCFS_ASSERT(le32_to_cpu(el->l_recs[i].bryce_e_clusters) >= clusters_to_del);
+ el->l_recs[i].bryce_e_clusters = cpu_to_le32(le32_to_cpu(el->l_recs[i].bryce_e_clusters) - clusters_to_del);
/* tree depth zero, we can just delete the clusters, otherwise
* we need to record the offset of the next level extent block
* as we may overwrite it. */
- if (!el->l_tree_depth)
- delete_blk = el->l_recs[i].e_blkno + el->l_recs[i].e_clusters;
+ if (!le16_to_cpu(el->bryce_l_tree_depth))
+ delete_blk = le64_to_cpu(el->l_recs[i].bryce_e_blkno) + le32_to_cpu(el->l_recs[i].bryce_e_clusters);
else
- next_eb = el->l_recs[i].e_blkno;
+ next_eb = le64_to_cpu(el->l_recs[i].bryce_e_blkno);
- if (!el->l_recs[i].e_clusters) {
+ if (!le32_to_cpu(el->l_recs[i].bryce_e_clusters)) {
/* if we deleted the whole extent record, then clear
* out the other fields and update the extent
* list. For depth > 0 trees, we've already recorded
* the extent block in 'next_eb' */
- el->l_recs[i].e_cpos = 0;
- el->l_recs[i].e_blkno = 0;
- OCFS_ASSERT(el->l_next_free_rec);
- el->l_next_free_rec--;
+ el->l_recs[i].bryce_e_cpos = cpu_to_le32(0);
+ el->l_recs[i].bryce_e_blkno = cpu_to_le64(0);
+ OCFS_ASSERT(le16_to_cpu(el->bryce_l_next_free_rec));
+ el->bryce_l_next_free_rec = cpu_to_le16(le16_to_cpu(el->bryce_l_next_free_rec) - 1);
}
- depth = el->l_tree_depth;
- if (!fe->i_clusters) {
+ depth = le16_to_cpu(el->bryce_l_tree_depth);
+ if (!le32_to_cpu(fe->bryce_i_clusters)) {
/* trunc to zero is a special case. */
- el->l_tree_depth = 0;
- fe->i_last_eb_blk = 0;
+ el->bryce_l_tree_depth = cpu_to_le16(0);
+ fe->bryce_i_last_eb_blk = cpu_to_le64(0);
} else if (last_eb)
- fe->i_last_eb_blk = last_eb->h_blkno;
+ fe->bryce_i_last_eb_blk = last_eb->bryce_h_blkno;
status = ocfs_journal_dirty(handle, fe_bh);
if (status < 0) {
@@ -1041,7 +1051,7 @@
LOG_ERROR_STATUS(status);
goto bail;
}
- last_eb->h_next_leaf_blk = 0;
+ last_eb->bryce_h_next_leaf_blk = cpu_to_le64(0);
status = ocfs_journal_dirty(handle, last_eb_bh);
if (status < 0) {
LOG_ERROR_STATUS(status);
@@ -1070,36 +1080,42 @@
goto bail;
}
- OCFS_ASSERT(el->l_next_free_rec);
- OCFS_ASSERT(depth == (el->l_tree_depth + 1));
+ OCFS_ASSERT(le16_to_cpu(el->bryce_l_next_free_rec));
+ OCFS_ASSERT(depth == (le16_to_cpu(el->bryce_l_tree_depth) + 1));
- i = el->l_next_free_rec - 1;
+ i = le16_to_cpu(el->bryce_l_next_free_rec) - 1;
LOG_TRACE_ARGS("extent block %llu, before: record %d: "
"(%u, %u, %llu), next = %u\n",
- le64_to_cpu(eb->h_blkno), i,
- el->l_recs[i].e_cpos, el->l_recs[i].e_clusters,
- el->l_recs[i].e_blkno, el->l_next_free_rec);
+ le64_to_cpu(eb->bryce_h_blkno),
+ i,
+ le32_to_cpu(el->l_recs[i].bryce_e_cpos),
+ le32_to_cpu(el->l_recs[i].bryce_e_clusters),
+ le64_to_cpu(el->l_recs[i].bryce_e_blkno),
+ le16_to_cpu(el->bryce_l_next_free_rec));
- OCFS_ASSERT(el->l_recs[i].e_clusters >= clusters_to_del);
- el->l_recs[i].e_clusters -= clusters_to_del;
+ OCFS_ASSERT(le32_to_cpu(el->l_recs[i].bryce_e_clusters) >= clusters_to_del);
+ el->l_recs[i].bryce_e_clusters = cpu_to_le32(le32_to_cpu(el->l_recs[i].bryce_e_clusters) - clusters_to_del);
- next_eb = el->l_recs[i].e_blkno;
+ next_eb = le64_to_cpu(el->l_recs[i].bryce_e_blkno);
/* bottom-most block requires us to delete data.*/
- if (!el->l_tree_depth)
- delete_blk = el->l_recs[i].e_blkno +
- el->l_recs[i].e_clusters;
- if (!el->l_recs[i].e_clusters) {
- el->l_recs[i].e_cpos = 0;
- el->l_recs[i].e_blkno = 0;
- OCFS_ASSERT(el->l_next_free_rec);
- el->l_next_free_rec--;
+ if (!le16_to_cpu(el->bryce_l_tree_depth))
+ delete_blk = le64_to_cpu(el->l_recs[i].bryce_e_blkno) +
+ le32_to_cpu(el->l_recs[i].bryce_e_clusters);
+ if (!le32_to_cpu(el->l_recs[i].bryce_e_clusters)) {
+ el->l_recs[i].bryce_e_cpos = cpu_to_le32(0);
+ el->l_recs[i].bryce_e_blkno = cpu_to_le64(0);
+ OCFS_ASSERT(le16_to_cpu(el->bryce_l_next_free_rec));
+ el->bryce_l_next_free_rec = cpu_to_le16(le16_to_cpu(el->bryce_l_next_free_rec) - 1);
}
LOG_TRACE_ARGS("extent block %llu, after: record %d: "
"(%u, %u, %llu), next = %u\n",
- le64_to_cpu(eb->h_blkno), i,
- el->l_recs[i].e_cpos, el->l_recs[i].e_clusters,
- el->l_recs[i].e_blkno, el->l_next_free_rec);
+ le64_to_cpu(eb->bryce_h_blkno),
+ i,
+ le32_to_cpu(el->l_recs[i].bryce_e_cpos),
+ le32_to_cpu(el->l_recs[i].bryce_e_clusters),
+ le64_to_cpu(el->l_recs[i].bryce_e_blkno),
+ le16_to_cpu(el->bryce_l_next_free_rec));
status = ocfs_journal_dirty(handle, eb_bh);
if (status < 0) {
@@ -1107,12 +1123,12 @@
goto bail;
}
- if (!el->l_next_free_rec) {
+ if (!le16_to_cpu(el->bryce_l_next_free_rec)) {
LOG_TRACE_ARGS("deleting this extent block.\n");
- OCFS_ASSERT(!eb->h_suballoc_node);
- OCFS_ASSERT(!el->l_recs[0].e_clusters);
- OCFS_ASSERT(!el->l_recs[0].e_cpos);
- OCFS_ASSERT(!el->l_recs[0].e_blkno);
+ OCFS_ASSERT(!le16_to_cpu(eb->bryce_h_suballoc_node));
+ OCFS_ASSERT(!le32_to_cpu(el->l_recs[0].bryce_e_clusters));
+ OCFS_ASSERT(!le32_to_cpu(el->l_recs[0].bryce_e_cpos));
+ OCFS_ASSERT(!le64_to_cpu(el->l_recs[0].bryce_e_blkno));
status = ocfs2_free_extent_block(handle,
tc->tc_ext_alloc_inode,
tc->tc_ext_alloc_bh,
@@ -1138,11 +1154,11 @@
status = 0;
bail:
if (!status)
- ocfs2_extent_map_trunc(inode, fe->i_clusters);
+ ocfs2_extent_map_trunc(inode, le32_to_cpu(fe->bryce_i_clusters));
else
ocfs2_extent_map_drop(inode, 0);
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
}
/*
@@ -1172,35 +1188,31 @@
down_write(&OCFS_I(inode)->ip_alloc_sem);
- target_i_clusters = ocfs2_clusters_for_bytes(osb->sb,
- i_size_read(inode));
-
- /* the extent map gets truncated in ocfs_do_truncate */
- ocfs2_meta_lvb_set_trunc_clusters(inode, target_i_clusters);
-
last_eb_bh = tc->tc_last_eb_bh;
tc->tc_last_eb_bh = NULL;
handle = tc->tc_handle;
+ target_i_clusters = ocfs2_clusters_for_bytes(osb->sb,
+ inode->i_size);
fe = (ocfs2_dinode *) fe_bh->b_data;
- if (fe->id2.i_list.l_tree_depth) {
+ if (le16_to_cpu(fe->id2.i_list.bryce_l_tree_depth)) {
eb = (ocfs2_extent_block *) last_eb_bh->b_data;
el = &(eb->h_list);
} else
el = &(fe->id2.i_list);
- last_eb = fe->i_last_eb_blk;
+ last_eb = le64_to_cpu(fe->bryce_i_last_eb_blk);
start:
LOG_TRACE_ARGS("ocfs_commit_truncate: fe->i_clusters = %u, "
"last_eb = %llu, fe->i_last_eb_blk = %llu, "
"fe->id2.i_list.l_tree_depth = %u last_eb_bh = %p\n",
- fe->i_clusters, last_eb, fe->i_last_eb_blk,
- fe->id2.i_list.l_tree_depth, last_eb_bh);
+ le32_to_cpu(fe->bryce_i_clusters), last_eb, le64_to_cpu(fe->bryce_i_last_eb_blk),
+ le16_to_cpu(fe->id2.i_list.bryce_l_tree_depth), last_eb_bh);
- if (last_eb != fe->i_last_eb_blk) {
+ if (last_eb != le64_to_cpu(fe->bryce_i_last_eb_blk)) {
LOG_TRACE_ARGS("last_eb changed!\n");
- OCFS_ASSERT(fe->id2.i_list.l_tree_depth);
- last_eb = fe->i_last_eb_blk;
+ OCFS_ASSERT(le16_to_cpu(fe->id2.i_list.bryce_l_tree_depth));
+ last_eb = le64_to_cpu(fe->bryce_i_last_eb_blk);
/* i_last_eb_blk may have changed, read it if
* necessary. We don't have to worry about the
* truncate to zero case here (where there becomes no
@@ -1225,12 +1237,13 @@
/* by now, el will point to the extent list on the bottom most
* portion of this tree. */
- i = el->l_next_free_rec - 1;
- if (el->l_recs[i].e_cpos >= target_i_clusters)
- clusters_to_del = el->l_recs[i].e_clusters;
+ i = le16_to_cpu(el->bryce_l_next_free_rec) - 1;
+ if (le32_to_cpu(el->l_recs[i].bryce_e_cpos) >= target_i_clusters)
+ clusters_to_del = le32_to_cpu(el->l_recs[i].bryce_e_clusters);
else
- clusters_to_del = (el->l_recs[i].e_clusters
- + el->l_recs[i].e_cpos) - target_i_clusters;
+ clusters_to_del = (le32_to_cpu(el->l_recs[i].bryce_e_clusters)
+ + le32_to_cpu(el->l_recs[i].bryce_e_cpos))
+ - target_i_clusters;
LOG_TRACE_ARGS("clusters_to_del = %u in this pass\n", clusters_to_del);
credits = ocfs_calc_tree_trunc_credits(osb->sb, clusters_to_del,
@@ -1242,14 +1255,6 @@
LOG_ERROR_STATUS(status);
goto bail;
}
- /* Since we got our cluster lock from caller and we
- * don't add it to the handle: */
- ocfs_set_inode_lock_trans(osb->journal, inode);
-
- inode->i_ctime = inode->i_mtime = CURRENT_TIME;
- status = ocfs_mark_inode_dirty(handle, inode, fe_bh);
- if (status < 0)
- LOG_ERROR_STATUS(status);
} else {
status = ocfs_extend_trans(handle, credits);
if (status < 0) {
@@ -1265,8 +1270,8 @@
goto bail;
}
- OCFS_ASSERT(fe->i_clusters >= target_i_clusters);
- if (fe->i_clusters > target_i_clusters)
+ OCFS_ASSERT(le32_to_cpu(fe->bryce_i_clusters) >= target_i_clusters);
+ if (le32_to_cpu(fe->bryce_i_clusters) > target_i_clusters)
goto start;
bail:
up_write(&OCFS_I(inode)->ip_alloc_sem);
@@ -1308,13 +1313,13 @@
*tc = NULL;
new_i_clusters = ocfs2_clusters_for_bytes(osb->sb,
- i_size_read(inode));
+ inode->i_size);
fe = (ocfs2_dinode *) fe_bh->b_data;
LOG_TRACE_ARGS("fe->i_clusters = %u, new_i_clusters = %u, fe->i_size ="
- "%llu\n", fe->i_clusters, new_i_clusters, fe->i_size);
+ "%llu\n", le32_to_cpu(fe->bryce_i_clusters), new_i_clusters, le64_to_cpu(fe->bryce_i_size));
- OCFS_ASSERT(fe->i_clusters > new_i_clusters);
+ OCFS_ASSERT(le32_to_cpu(fe->bryce_i_clusters) > new_i_clusters);
*tc = kmalloc(sizeof(ocfs2_truncate_context), GFP_KERNEL);
if (!(*tc)) {
@@ -1332,11 +1337,11 @@
}
metadata_delete = 0;
- if (fe->id2.i_list.l_tree_depth) {
+ if (le16_to_cpu(fe->id2.i_list.bryce_l_tree_depth)) {
/* If we have a tree, then the truncate may result in
* metadata deletes. Figure this out from the
* rightmost leaf block.*/
- status = ocfs_read_block(osb, fe->i_last_eb_blk,
+ status = ocfs_read_block(osb, le64_to_cpu(fe->bryce_i_last_eb_blk),
&last_eb_bh, OCFS_BH_CACHED, inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
@@ -1345,7 +1350,7 @@
eb = (ocfs2_extent_block *) last_eb_bh->b_data;
OCFS_ASSERT(IS_VALID_EXTENT_BLOCK(eb));
el = &(eb->h_list);
- if (el->l_recs[0].e_cpos >= new_i_clusters)
+ if (le32_to_cpu(el->l_recs[0].bryce_e_cpos) >= new_i_clusters)
metadata_delete = 1;
}
@@ -1360,15 +1365,15 @@
}
ocfs_handle_add_inode(handle, ext_alloc_inode);
- status = ocfs2_meta_lock(ext_alloc_inode,
- handle,
- &ext_alloc_bh,
- 1);
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, 0,
+ &ext_alloc_bh, ext_alloc_inode);
if (status < 0) {
if (status != -EINTR)
LOG_ERROR_STATUS (status);
goto bail;
}
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
+ 0, ext_alloc_inode);
}
data_alloc_inode = ocfs_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, -1);
@@ -1379,12 +1384,15 @@
}
ocfs_handle_add_inode(handle, data_alloc_inode);
- status = ocfs2_meta_lock(data_alloc_inode, handle, &data_alloc_bh, 1);
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE,
+ 0, &data_alloc_bh, data_alloc_inode);
if (status < 0) {
if (status != -EINTR)
LOG_ERROR_STATUS (status);
goto bail;
}
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
+ 0, data_alloc_inode);
(*tc)->tc_bitmap_inode = data_alloc_inode;
(*tc)->tc_bitmap_bh = data_alloc_bh;
Modified: branches/endian/src/alloc.h
===================================================================
--- branches/endian/src/alloc.h 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/alloc.h 2005-01-18 21:41:46 UTC (rev 1789)
@@ -48,7 +48,7 @@
* new tree_depth==0 extent_block, and one block at the new
* top-of-the tree.
*/
- return fe->id2.i_list.l_tree_depth + 2;
+ return(le16_to_cpu(fe->id2.i_list.bryce_l_tree_depth) + 2);
}
typedef struct _ocfs2_truncate_context {
Modified: branches/endian/src/aops.c
===================================================================
--- branches/endian/src/aops.c 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/aops.c 2005-01-18 21:41:46 UTC (rev 1789)
@@ -35,8 +35,6 @@
#include "extent_map.h"
#include "file.h"
#include "inode.h"
-#include "symlink.h"
-
#include "ocfs_journal.h"
@@ -56,8 +54,6 @@
LOG_ENTRY_ARGS("(0x%p, %llu, 0x%p, %d)\n", inode,
(unsigned long long)iblock, bh_result, create);
- OCFS_ASSERT(!ocfs2_inode_is_fast_symlink(inode));
-
if ((iblock << inode->i_sb->s_blocksize_bits) > PATH_MAX + 1) {
LOG_ERROR_ARGS ("block offset > PATH_MAX: %llu",
(unsigned long long)iblock);
@@ -79,7 +75,7 @@
}
if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb,
- fe->i_clusters)) {
+ le32_to_cpu(fe->bryce_i_clusters))) {
LOG_ERROR_ARGS ("block offset is outside the allocated size: %llu",
(unsigned long long)iblock);
goto bail;
@@ -87,9 +83,9 @@
/* We don't use the page cache to create symlink data, so if
* need be, copy it over from the buffer cache. */
- if (!buffer_uptodate(bh_result) && ocfs_inode_is_new(inode)) {
+ if (!buffer_uptodate(bh_result) && ocfs_inode_is_new(osb, inode)) {
buffer_cache_bh = sb_getblk(osb->sb,
- fe->id2.i_list.l_recs[0].e_blkno + iblock);
+ le64_to_cpu(fe->id2.i_list.l_recs[0].bryce_e_blkno) + iblock);
if (!buffer_cache_bh) {
LOG_ERROR_STR("couldn't getblock for symlink!");
goto bail;
@@ -100,7 +96,7 @@
* the bh, even if it commits while we're doing the
* copy, the data is still good. */
if (buffer_jbd(buffer_cache_bh)
- && ocfs_inode_is_new(inode)) {
+ && ocfs_inode_is_new(osb, inode)) {
kaddr = kmap_atomic(bh_result->b_page, KM_USER0);
if (!kaddr) {
LOG_ERROR_ARGS("couldn't kmap!\n");
@@ -116,7 +112,7 @@
}
map_bh(bh_result, inode->i_sb,
- fe->id2.i_list.l_recs[0].e_blkno + iblock);
+ le64_to_cpu(fe->id2.i_list.l_recs[0].bryce_e_blkno) + iblock);
err = 0;
@@ -129,11 +125,12 @@
}
static int ocfs_get_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
+ struct buffer_head *bh_result, int create)
{
int err = -EIO;
u64 vbo = 0;
u64 p_blkno;
+ int open_direct;
LOG_ENTRY_ARGS("(0x%p, %llu, 0x%p, %d)\n", inode,
(unsigned long long)iblock, bh_result, create);
@@ -143,6 +140,8 @@
inode, inode->i_ino);
}
+ open_direct = OCFS_I(inode)->ip_open_flags & OCFS_OIN_OPEN_FOR_DIRECTIO;
+
if (S_ISLNK(inode->i_mode)) {
/* this always does I/O for some reason. */
err = ocfs_symlink_get_block (inode, iblock, bh_result,
@@ -163,8 +162,13 @@
}
spin_unlock(&OCFS_I(inode)->ip_lock);
+ if (!open_direct)
+ down_read(&OCFS_I(inode)->ip_node_extend_sem);
+
err = ocfs2_extent_map_get_blocks(inode, iblock, 1, &p_blkno,
NULL);
+ if (!open_direct)
+ up_read(&OCFS_I(inode)->ip_node_extend_sem);
if (err) {
LOG_ERROR_ARGS("Error %d from get_blocks(0x%p, %llu, 1, %llu, NULL)\n",
@@ -192,7 +196,7 @@
if (vbo != OCFS_I(inode)->ip_mmu_private) {
LOG_ERROR_ARGS("Uh-oh, vbo = %lld, i_size = %llu, mmu = %llu, "
"inode = %llu\n",
- vbo, i_size_read(inode),
+ vbo, inode->i_size,
OCFS_I(inode)->ip_mmu_private,
OCFS_I(inode)->ip_blkno);
BUG();
@@ -496,57 +500,85 @@
* called like this: dio->get_blocks(dio->inode, fs_startblk,
* fs_count, map_bh, dio->rw == WRITE);
*/
-static int ocfs_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
- unsigned long max_blocks,
- struct buffer_head *bh_result, int create)
+static int ocfs_direct_IO_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks, struct buffer_head *bh_result, int create)
{
- int ret;
+ int ret = -1;
+ int status;
+ ocfs_super *osb = NULL;
u64 vbo_max; /* file offset, max_blocks from iblock */
u64 p_blkno;
int contig_blocks;
+ int set_new = 0; /* flag */
unsigned char blocksize_bits;
if (!inode || !bh_result) {
- LOG_ERROR_STR("inode or bh_result is null");
+ LOG_ERROR_STR("ocfs_direct_IO_get_blocks: inode or bh_result is null");
return -EIO;
}
+ osb = OCFS_SB(inode->i_sb);
blocksize_bits = inode->i_sb->s_blocksize_bits;
+ /* make sure we're up to date... */
+ if (atomic_read(&OCFS_I(inode)->ip_needs_verification)) {
+ LOG_TRACE_STR ("ocfs_direct_IO_get_blocks: verify oin.");
+ status = ocfs_verify_update_inode (osb, inode);
+ if (status < 0) {
+ LOG_TRACE_STR ("ocfs_verify_update_inode failed");
+ ret = -EIO;
+ goto bail;
+ }
+ }
/* This function won't even be called if the request isn't all
* nicely aligned and of the right size, so there's no need
* for us to check any of that. */
- vbo_max = ((u64)iblock + max_blocks) << blocksize_bits;
+ vbo_max = (u64)(iblock + max_blocks) << blocksize_bits;
- spin_lock(&OCFS_I(inode)->ip_lock);
- if ((iblock + max_blocks) >
- ocfs2_clusters_to_blocks(inode->i_sb,
- OCFS_I(inode)->ip_clusters)) {
- spin_unlock(&OCFS_I(inode)->ip_lock);
- ret = -EIO;
- goto bail;
+ /* NOTE: create flag is set when we ?may? have to allocate some
+ blocks for the file. */
+ if (create &&
+ (vbo_max > ocfs2_clusters_to_bytes(inode->i_sb,
+ OCFS_I(inode)->ip_clusters))) {
+ /* WARNING: How much do we really want to extend the file? */
+ status = ocfs_extend_file(osb, inode, vbo_max);
+ if (status < 0) {
+ status = -ENOSPC;
+ LOG_ERROR_STR("ocfs_direct_IO_get_blocks: failed to extend the file!");
+ goto bail;
+ }
+ set_new = 1;
}
- spin_unlock(&OCFS_I(inode)->ip_lock);
/* This figure out the size of the next contiguous block, and
* our logical offset */
- ret = ocfs2_extent_map_get_blocks(inode, iblock, 1, &p_blkno,
+ status = ocfs2_extent_map_get_blocks(inode, iblock, 1, &p_blkno,
&contig_blocks);
- if (ret) {
+ if (status) {
LOG_ERROR_ARGS("get_blocks() failed iblock=%llu\n",
(unsigned long long)iblock);
- ret = -EIO;
+ status = -EIO;
goto bail;
}
- map_bh(bh_result, inode->i_sb, p_blkno);
+ /* Do whatever we need to the buffer_head */
+ if (set_new) {
+ set_buffer_new(bh_result);
+ /* Do we really want to set bh_result->b_blocknr here too? */
+ bh_result->b_blocknr = p_blkno;
+ } else {
+ clear_buffer_new(bh_result);
+ /* is the last argument here correct? */
+ map_bh(bh_result, inode->i_sb, p_blkno);
+ }
/* make sure we don't map more than max_blocks blocks here as
that's all the kernel will handle at this point. */
if (max_blocks < contig_blocks)
contig_blocks = max_blocks;
bh_result->b_size = contig_blocks << blocksize_bits;
+
+ ret = 0;
bail:
return ret;
}
@@ -606,7 +638,6 @@
.prepare_write = ocfs_prepare_write,
.commit_write = ocfs_commit_write,
.bmap = ocfs_bmap,
- .sync_page = block_sync_page,
/*
* On a 2.4 system, we are only adding this here as a dummy basically.
Modified: branches/endian/src/buffer_head_io.c
===================================================================
--- branches/endian/src/buffer_head_io.c 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/buffer_head_io.c 2005-01-18 21:41:46 UTC (rev 1789)
@@ -58,6 +58,7 @@
unlock_buffer(bh);
// LOG_EXIT();
+ return;
}
int ocfs_write_blocks (ocfs_super *osb, struct buffer_head *bhs[],
Modified: branches/endian/src/buffer_head_io.h
===================================================================
--- branches/endian/src/buffer_head_io.h 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/buffer_head_io.h 2005-01-18 21:41:46 UTC (rev 1789)
@@ -143,8 +143,8 @@
int i;
for(i = 0; i < 512; i++)
if ((bh)->b_data[i] != 'P')
- return 0;
- return 1;
+ return(0);
+ return(1);
}
#define OCFS_DO_HEX_DUMP(bh) \
Modified: branches/endian/src/dcache.c
===================================================================
--- branches/endian/src/dcache.c 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/dcache.c 2005-01-18 21:41:46 UTC (rev 1789)
@@ -38,6 +38,7 @@
#include "alloc.h"
#include "dcache.h"
#include "file.h"
+#include "vote.h"
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_DCACHE
@@ -58,6 +59,12 @@
osb = OCFS_SB(inode->i_sb);
+#ifdef PURE_EVIL
+ if (evil_filename_check(EVIL_DENTRY, dentry)) {
+ LOG_ERROR_ARGS("EVIL REVALIDATE: flags=%d\n", flags);
+ }
+#endif
+
OCFS_ASSERT(osb);
if (inode == osb->root_inode) {
@@ -65,16 +72,13 @@
goto bail;
}
- spin_lock(&OCFS_I(inode)->ip_lock);
/* did we or someone else delete this inode? */
if (INODE_DELETED(inode)) {
- spin_unlock(&OCFS_I(inode)->ip_lock);
LOG_TRACE_ARGS("dentry_revalidate: inode (%llu) deleted, "
"returning false\n",
OCFS_I(inode)->ip_blkno);
goto bail;
}
- spin_unlock(&OCFS_I(inode)->ip_lock);
#warning "should we do this for all files?"
if (S_ISDIR(inode->i_mode) && (!inode->i_nlink)) {
@@ -84,8 +88,23 @@
goto bail;
}
+ if (ocfs_node_map_is_only(osb, &osb->publ_map, osb->node_num)) {
+ LOG_TRACE_STR ("Only node alive. revalidate=true.");
+ ret = 1;
+ goto bail;
+ }
+
+ /* if I hold cache lock, no revalidate needed */
+ if (ocfs_is_local_cache_lock(osb, inode)) {
+ ret = 1;
+ goto bail;
+ }
+
ret = 1;
+ /* TODO: Is this really necessary? */
+ atomic_set(&OCFS_I(inode)->ip_needs_verification, 1);
+
bail:
LOG_EXIT_INT (ret);
Modified: branches/endian/src/dir.c
===================================================================
--- branches/endian/src/dir.c 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/dir.c 2005-01-18 21:41:46 UTC (rev 1789)
@@ -49,7 +49,7 @@
#include "alloc.h"
#include "dir.h"
-#include "dlmglue.h"
+#include "dlm.h"
#include "extent_map.h"
#include "file.h"
#include "inode.h"
@@ -85,6 +85,7 @@
struct inode *inode = filp->f_dentry->d_inode;
struct super_block * sb = inode->i_sb;
int have_disk_lock = 0;
+ ocfs_super *osb = OCFS_SB(sb);
LOG_SET_CONTEXT(READDIR);
@@ -93,7 +94,7 @@
stored = 0;
bh = NULL;
- error = ocfs2_meta_lock(inode, NULL, NULL, 0);
+ error = ocfs_acquire_lock_ro(osb, inode);
if (error < 0) {
if (error != -EINTR)
LOG_ERROR_STATUS (error);
@@ -105,7 +106,7 @@
offset = filp->f_pos & (sb->s_blocksize - 1);
- while (!error && !stored && filp->f_pos < i_size_read(inode)) {
+ while (!error && !stored && filp->f_pos < inode->i_size) {
blk = (filp->f_pos) >> sb->s_blocksize_bits;
bh = ocfs_bread (inode, blk, &err, 0);
if (!bh) {
@@ -142,10 +143,10 @@
* least that it is non-zero. A
* failure will be detected in the
* dirent test below. */
- if (le16_to_cpu(de->rec_len) <
+ if (le16_to_cpu(de->bryce_rec_len) <
OCFS2_DIR_REC_LEN(1))
break;
- i += le16_to_cpu(de->rec_len);
+ i += le16_to_cpu(de->bryce_rec_len);
}
offset = i;
filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1))
@@ -153,7 +154,7 @@
filp->f_version = inode->i_version;
}
- while (!error && filp->f_pos < i_size_read(inode)
+ while (!error && filp->f_pos < inode->i_size
&& offset < sb->s_blocksize) {
de = (struct ocfs2_dir_entry *) (bh->b_data + offset);
if (!ocfs_check_dir_entry (inode, de, bh, offset)) {
@@ -164,8 +165,8 @@
brelse (bh);
goto bail;
}
- offset += le16_to_cpu(de->rec_len);
- if (le64_to_cpu(de->inode)) {
+ offset += le16_to_cpu(de->bryce_rec_len);
+ if (le64_to_cpu(de->bryce_inode)) {
/* We might block in the next section
* if the data destination is
* currently swapped out. So, use a
@@ -181,7 +182,7 @@
error = filldir(dirent, de->name,
de->name_len,
filp->f_pos,
- ino_from_blkno(sb, le64_to_cpu(de->inode)),
+ ino_from_blkno(sb, le64_to_cpu(de->bryce_inode)),
d_type);
if (error)
break;
@@ -189,7 +190,7 @@
goto revalidate;
stored ++;
}
- filp->f_pos += le16_to_cpu(de->rec_len);
+ filp->f_pos += le16_to_cpu(de->bryce_rec_len);
}
offset = 0;
brelse (bh);
@@ -200,8 +201,11 @@
stored = 0;
bail:
- if (have_disk_lock)
- ocfs2_meta_unlock(inode, 0);
+ if (have_disk_lock) {
+ error = ocfs_release_lock_ro (osb, inode);
+ if (error < 0)
+ LOG_ERROR_STATUS (error);
+ }
LOG_EXIT_STATUS(stored);
LOG_CLEAR_CONTEXT();
@@ -220,6 +224,7 @@
struct ocfs2_dir_entry **dirent)
{
int status = -ENOENT;
+ int tmpstat;
int lock_acq = 0;
LOG_ENTRY_ARGS ("(osb=%p, parent=%llu, name='%*s', blkno=%p, inode=%p)\n",
@@ -227,7 +232,7 @@
if (take_lock) {
/* Get a lock on the directory... */
- status = ocfs2_meta_lock(inode, NULL, NULL, 0);
+ status = ocfs_acquire_lock_ro (osb, inode);
if (status < 0) {
/* Volume should be disabled in this case */
if (status != -EINTR)
@@ -243,13 +248,19 @@
goto leave;
}
- *blkno = le64_to_cpu((*dirent)->inode);
+ *blkno = le64_to_cpu((*dirent)->bryce_inode);
status = 0;
leave:
if (take_lock && lock_acq)
- ocfs2_meta_unlock(inode, 0);
+ {
+ tmpstat = ocfs_release_lock_ro (osb, inode);
+ if (tmpstat < 0) {
+ LOG_ERROR_STATUS (tmpstat);
+ /* Volume should be disabled in this case */
+ }
+ }
if (status < 0) {
*dirent = NULL;
@@ -276,7 +287,7 @@
int err;
sb = inode->i_sb;
- if ((i_size_read(inode) <
+ if ((inode->i_size <
(OCFS2_DIR_REC_LEN(1) + OCFS2_DIR_REC_LEN(2))) ||
!(bh = ocfs_bread (inode, 0, &err, 0))) {
LOG_ERROR_ARGS ("bad directory (dir #%llu) - no data block\n",
@@ -286,9 +297,9 @@
de = (struct ocfs2_dir_entry *) bh->b_data;
de1 = (struct ocfs2_dir_entry *)
- ((char *) de + le16_to_cpu(de->rec_len));
- if ((le64_to_cpu(de->inode) != OCFS_I(inode)->ip_blkno) ||
- !le64_to_cpu(de1->inode) ||
+ ((char *) de + le16_to_cpu(de->bryce_rec_len));
+ if ((le64_to_cpu(de->bryce_inode) != OCFS_I(inode)->ip_blkno) ||
+ !le64_to_cpu(de1->bryce_inode) ||
strcmp (".", de->name) ||
strcmp ("..", de1->name)) {
LOG_ERROR_ARGS ("bad directory (dir #%llu) - no `.' or `..'\n",
@@ -296,9 +307,9 @@
brelse (bh);
return 1;
}
- offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len);
- de = (struct ocfs2_dir_entry *) ((char *) de1 + le16_to_cpu(de1->rec_len));
- while (offset < i_size_read(inode) ) {
+ offset = le16_to_cpu(de->bryce_rec_len) + le16_to_cpu(de1->bryce_rec_len);
+ de = (struct ocfs2_dir_entry *) ((char *) de1 + le16_to_cpu(de1->bryce_rec_len));
+ while (offset < inode->i_size ) {
if (!bh || (void *) de >= (void *) (bh->b_data + sb->s_blocksize)) {
brelse (bh);
bh = ocfs_bread(inode,
@@ -315,13 +326,13 @@
brelse (bh);
return 1;
}
- if (le64_to_cpu(de->inode)) {
+ if (le64_to_cpu(de->bryce_inode)) {
brelse (bh);
return 0;
}
- offset += le16_to_cpu(de->rec_len);
+ offset += le16_to_cpu(de->bryce_rec_len);
de = (struct ocfs2_dir_entry *)
- ((char *) de + le16_to_cpu(de->rec_len));
+ ((char *) de + le16_to_cpu(de->bryce_rec_len));
}
brelse (bh);
return 1;
@@ -341,7 +352,7 @@
u64 p_blkno;
spin_lock(&OCFS_I(dir)->ip_lock);
- extend = (i_size_read(dir) == ocfs2_clusters_to_bytes(sb, OCFS_I(dir)->ip_clusters));
+ extend = (dir->i_size == ocfs2_clusters_to_bytes(sb, OCFS_I(dir)->ip_clusters));
spin_unlock(&OCFS_I(dir)->ip_lock);
if (extend) {
@@ -371,7 +382,7 @@
status = 0;
bail:
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
}
/* assumes you already have a cluster lock on the directory. */
@@ -381,9 +392,8 @@
struct buffer_head **new_de_bh)
{
int status = 0;
+ ocfs2_dinode *fe = (ocfs2_dinode *) parent_fe_bh->b_data;
int credits, num_free_extents;
- loff_t dir_i_size;
- ocfs2_dinode *fe = (ocfs2_dinode *) parent_fe_bh->b_data;
ocfs2_alloc_context *data_ac = NULL;
ocfs2_alloc_context *meta_ac = NULL;
ocfs_journal_handle *handle = NULL;
@@ -393,9 +403,8 @@
LOG_ENTRY();
- dir_i_size = i_size_read(dir);
LOG_TRACE_ARGS("extending dir %llu (i_size = %llu)\n",
- OCFS_I(dir)->ip_blkno, dir_i_size);
+ OCFS_I(dir)->ip_blkno, dir->i_size);
handle = ocfs_alloc_handle(osb);
if (handle == NULL) {
@@ -405,7 +414,7 @@
/* dir->i_size is always block aligned. */
spin_lock(&OCFS_I(dir)->ip_lock);
- if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS_I(dir)->ip_clusters)) {
+ if (dir->i_size == ocfs2_clusters_to_bytes(sb, OCFS_I(dir)->ip_clusters)) {
spin_unlock(&OCFS_I(dir)->ip_lock);
num_free_extents = ocfs_num_free_extents(osb, dir, fe);
if (num_free_extents < 0) {
@@ -458,16 +467,15 @@
}
memset(new_bh->b_data, 0, sb->s_blocksize);
de = (struct ocfs2_dir_entry *) new_bh->b_data;
- de->inode = 0;
- de->rec_len = le16_to_cpu(sb->s_blocksize);
+ de->bryce_inode = le64_to_cpu(0); /* 0 shouldn't need conversion but just to be on the safe side */
+ de->bryce_rec_len = le16_to_cpu(sb->s_blocksize);
status = ocfs_journal_dirty(handle, new_bh);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
}
- dir_i_size += dir->i_sb->s_blocksize;
- i_size_write(dir, dir_i_size);
+ dir->i_size += dir->i_sb->s_blocksize;
dir->i_blocks += 1;
status = ocfs_mark_inode_dirty(handle, dir, parent_fe_bh);
if (status < 0) {
@@ -519,7 +527,7 @@
OCFS_ASSERT(S_ISDIR(dir->i_mode));
fe = (ocfs2_dinode *) parent_fe_bh->b_data;
- OCFS_ASSERT(fe->i_size == i_size_read(dir));
+ OCFS_ASSERT(le64_to_cpu(fe->bryce_i_size) == dir->i_size);
sb = dir->i_sb;
@@ -543,7 +551,7 @@
brelse (bh);
bh = NULL;
- if (i_size_read(dir) <= offset) {
+ if (dir->i_size <= offset) {
status = ocfs_extend_dir(osb,
dir,
parent_fe_bh,
@@ -576,9 +584,9 @@
status = -EEXIST;
goto bail;
}
- if (((le64_to_cpu(de->inode) == 0) &&
- (le16_to_cpu(de->rec_len) >= rec_len)) ||
- (le16_to_cpu(de->rec_len) >=
+ if (((le64_to_cpu(de->bryce_inode) == 0) &&
+ (le16_to_cpu(de->bryce_rec_len) >= rec_len)) ||
+ (le16_to_cpu(de->bryce_rec_len) >=
(OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) {
/* Ok, we found a spot. Return this bh and let
* the caller actually fill it in. */
@@ -587,8 +595,8 @@
status = 0;
goto bail;
}
- offset += le16_to_cpu(de->rec_len);
- de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
+ offset += le16_to_cpu(de->bryce_rec_len);
+ de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->bryce_rec_len));
}
status = 0;
@@ -596,5 +604,5 @@
if (bh)
brelse(bh);
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
}
Modified: branches/endian/src/extent_map.c
===================================================================
--- branches/endian/src/extent_map.c 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/extent_map.c 2005-01-18 21:41:46 UTC (rev 1789)
@@ -120,11 +120,11 @@
parent = *p;
ent = rb_entry(parent, struct ocfs2_extent_map_entry,
e_node);
- if ((cpos + clusters) <= ent->e_rec.e_cpos) {
+ if ((cpos + clusters) <= le32_to_cpu(ent->e_rec.bryce_e_cpos)) {
p = &(*p)->rb_left;
ent = NULL;
- } else if (cpos >= (ent->e_rec.e_cpos +
- ent->e_rec.e_clusters)) {
+ } else if (cpos >= (le32_to_cpu(ent->e_rec.bryce_e_cpos) +
+ le32_to_cpu(ent->e_rec.bryce_e_clusters))) {
p = &(*p)->rb_right;
ent = NULL;
} else
@@ -162,29 +162,29 @@
* we hold alloc_sem. So we can do this without other
* locks.
*/
- while (el->l_tree_depth)
+ while (le16_to_cpu(el->bryce_l_tree_depth))
{
blkno = 0;
- for (i = 0; i < el->l_next_free_rec; i++) {
+ for (i = 0; i < le16_to_cpu(el->bryce_l_next_free_rec); i++) {
rec = &el->l_recs[i];
ret = -EBADR;
- if ((rec->e_cpos + rec->e_clusters) >
+ if ((le32_to_cpu(rec->bryce_e_cpos) + le32_to_cpu(rec->bryce_e_clusters)) >
OCFS_I(inode)->ip_clusters)
goto out_free;
- if ((rec->e_cpos + rec->e_clusters) <= cpos) {
+ if ((le32_to_cpu(rec->bryce_e_cpos) + le32_to_cpu(rec->bryce_e_clusters)) <= cpos) {
ret = ocfs2_extent_map_insert(inode,
rec,
- el->l_tree_depth);
+ le16_to_cpu(el->bryce_l_tree_depth));
if (ret && (ret != -EEXIST))
goto out_free;
continue;
}
- if ((cpos + clusters) <= rec->e_cpos) {
+ if ((cpos + clusters) <= le32_to_cpu(rec->bryce_e_cpos)) {
ret = ocfs2_extent_map_insert(inode,
rec,
- el->l_tree_depth);
+ le16_to_cpu(el->bryce_l_tree_depth));
if (ret && (ret != -EEXIST))
goto out_free;
continue;
@@ -198,9 +198,9 @@
/* Check to see if we're stradling */
ret = -ESRCH;
- if ((rec->e_cpos > cpos) ||
+ if ((le32_to_cpu(rec->bryce_e_cpos) > cpos) ||
((cpos + clusters) >
- (rec->e_cpos + rec->e_clusters)))
+ (le32_to_cpu(rec->bryce_e_cpos) + le32_to_cpu(rec->bryce_e_clusters))))
goto out_free;
/*
@@ -212,7 +212,7 @@
if (blkno)
goto out_free;
- blkno = rec->e_blkno;
+ blkno = le64_to_cpu(rec->bryce_e_blkno);
}
/*
@@ -237,13 +237,13 @@
el = &eb->h_list;
}
- if (el->l_tree_depth)
+ if (le16_to_cpu(el->bryce_l_tree_depth))
BUG();
- for (i = 0; i < el->l_next_free_rec; i++) {
+ for (i = 0; i < le16_to_cpu(el->bryce_l_next_free_rec); i++) {
rec = &el->l_recs[i];
ret = ocfs2_extent_map_insert(inode, rec,
- el->l_tree_depth);
+ le16_to_cpu(el->bryce_l_tree_depth));
if (ret)
goto out_free;
}
@@ -283,7 +283,7 @@
*ret_ent = ent;
return 0;
}
- blkno = ent->e_rec.e_blkno;
+ blkno = le64_to_cpu(ent->e_rec.bryce_e_blkno);
spin_unlock(&OCFS_I(inode)->ip_lock);
ret = ocfs_read_block(OCFS_SB(inode->i_sb), blkno, &bh,
@@ -339,8 +339,8 @@
struct rb_node **p, *parent;
struct ocfs2_extent_map_entry *old_ent;
- old_ent = ocfs2_extent_map_lookup(em, ent->e_rec.e_cpos,
- ent->e_rec.e_clusters,
+ old_ent = ocfs2_extent_map_lookup(em, le32_to_cpu(ent->e_rec.bryce_e_cpos),
+ le32_to_cpu(ent->e_rec.bryce_e_clusters),
&p, &parent);
if (old_ent)
return -EEXIST;
@@ -376,8 +376,9 @@
goto out_unlock;
}
- old_ent = ocfs2_extent_map_lookup(em, rec->e_cpos,
- rec->e_clusters, NULL, NULL);
+ old_ent = ocfs2_extent_map_lookup(em, le32_to_cpu(rec->bryce_e_cpos),
+ le32_to_cpu(rec->bryce_e_clusters),
+ NULL, NULL);
if (!old_ent)
BUG();
@@ -405,28 +406,28 @@
* the first go round. In the end, the pieces we need will
* be used, and the pieces we don't will be freed.
*/
- ctxt->need_left = !!(rec->e_cpos > old_ent->e_rec.e_cpos);
- ctxt->need_right = !!((old_ent->e_rec.e_cpos +
- old_ent->e_rec.e_clusters) >
- (rec->e_cpos + rec->e_clusters));
+ ctxt->need_left = !!(le32_to_cpu(rec->bryce_e_cpos) > le32_to_cpu(old_ent->e_rec.bryce_e_cpos));
+ ctxt->need_right = !!((le32_to_cpu(old_ent->e_rec.bryce_e_cpos) +
+ le32_to_cpu(old_ent->e_rec.bryce_e_clusters)) >
+ (le32_to_cpu(rec->bryce_e_cpos) + le32_to_cpu(rec->bryce_e_clusters)));
ret = -EAGAIN;
if (ctxt->need_left) {
if (!ctxt->left_ent)
goto out_unlock;
*(ctxt->left_ent) = *old_ent;
- ctxt->left_ent->e_rec.e_clusters =
- rec->e_cpos - ctxt->left_ent->e_rec.e_cpos;
+ cpu_to_le32(ctxt->left_ent->e_rec.bryce_e_clusters) =
+ le32_to_cpu(rec->bryce_e_cpos) - le32_to_cpu(ctxt->left_ent->e_rec.bryce_e_cpos);
}
if (ctxt->need_right) {
if (!ctxt->right_ent)
goto out_unlock;
*(ctxt->right_ent) = *old_ent;
- ctxt->right_ent->e_rec.e_cpos =
- rec->e_cpos + rec->e_clusters;
- ctxt->right_ent->e_rec.e_clusters =
- (old_ent->e_rec.e_cpos +
- old_ent->e_rec.e_clusters) -
- ctxt->right_ent->e_rec.e_cpos;
+ cpu_to_le32(ctxt->right_ent->e_rec.bryce_e_cpos) =
+ le32_to_cpu(rec->bryce_e_cpos) + le32_to_cpu(rec->bryce_e_clusters);
+ cpu_to_le32(ctxt->right_ent->e_rec.bryce_e_clusters) =
+ (le32_to_cpu(old_ent->e_rec.bryce_e_cpos) +
+ le32_to_cpu(old_ent->e_rec.bryce_e_clusters)) -
+ le32_to_cpu(ctxt->right_ent->e_rec.bryce_e_cpos);
}
rb_erase(&old_ent->e_node, &em->em_extents);
@@ -467,13 +468,13 @@
int ret;
struct ocfs2_em_insert_context ctxt = {0, };
- if ((rec->e_cpos + rec->e_clusters) >
+ if ((le32_to_cpu(rec->bryce_e_cpos) + le32_to_cpu(rec->bryce_e_clusters)) >
OCFS_I(inode)->ip_map.em_clusters)
return -EBADR;
/* Zero e_clusters means a truncated tail record. It better be EOF */
- if (!rec->e_clusters) {
- if ((rec->e_cpos + rec->e_clusters) !=
+ if (!le32_to_cpu(rec->bryce_e_clusters)) {
+ if ((le32_to_cpu(rec->bryce_e_cpos) + le32_to_cpu(rec->bryce_e_clusters)) !=
OCFS_I(inode)->ip_map.em_clusters)
return -EBADR;
@@ -553,7 +554,7 @@
ocfs2_extent_rec *old;
OCFS_ASSERT(new_clusters);
- OCFS_ASSERT(rec->e_clusters >= new_clusters);
+ OCFS_ASSERT(le32_to_cpu(rec->bryce_e_clusters) >= new_clusters);
if (em->em_clusters < OCFS_I(inode)->ip_clusters) {
/*
@@ -565,32 +566,32 @@
em->em_clusters = OCFS_I(inode)->ip_clusters;
}
- OCFS_ASSERT((rec->e_cpos + rec->e_clusters) ==
+ OCFS_ASSERT((le32_to_cpu(rec->bryce_e_cpos) + le32_to_cpu(rec->bryce_e_clusters)) ==
(em->em_clusters + new_clusters));
em->em_clusters += new_clusters;
ret = -ENOENT;
- if (rec->e_clusters > new_clusters) {
+ if (le32_to_cpu(rec->bryce_e_clusters) > new_clusters) {
/* This is a contiguous append */
- ent = ocfs2_extent_map_lookup(em, rec->e_cpos, 1,
+ ent = ocfs2_extent_map_lookup(em, le32_to_cpu(rec->bryce_e_cpos), 1,
NULL, NULL);
if (ent) {
old = &ent->e_rec;
- OCFS_ASSERT((rec->e_cpos + rec->e_clusters) ==
- (old->e_cpos + old->e_clusters +
+ OCFS_ASSERT((le32_to_cpu(rec->bryce_e_cpos) + le32_to_cpu(rec->bryce_e_clusters)) ==
+ (le32_to_cpu(old->bryce_e_cpos) + le32_to_cpu(old->bryce_e_clusters) +
new_clusters));
if (!ent->e_tree_depth) {
- OCFS_ASSERT(old->e_cpos == rec->e_cpos);
- OCFS_ASSERT(old->e_blkno ==
- rec->e_blkno);
+ OCFS_ASSERT(le32_to_cpu(old->bryce_e_cpos) == le32_to_cpu(rec->bryce_e_cpos));
+ OCFS_ASSERT(le64_to_cpu(old->bryce_e_blkno) ==
+ le64_to_cpu(rec->bryce_e_blkno));
ret = 0;
}
/*
* Let non-leafs fall through as -ENOENT to
* force insertion of the new leaf.
*/
- old->e_clusters += new_clusters;
+ cpu_to_le32(old->bryce_e_clusters) += new_clusters;
}
}
@@ -683,18 +684,18 @@
if (ent) {
/* We should never find ourselves straddling an interval */
- if ((ent->e_rec.e_cpos > v_cpos) ||
+ if ((le32_to_cpu(ent->e_rec.bryce_e_cpos) > v_cpos) ||
((v_cpos + count) >
- (ent->e_rec.e_cpos + ent->e_rec.e_clusters)))
+ (le32_to_cpu(ent->e_rec.bryce_e_cpos) + le32_to_cpu(ent->e_rec.bryce_e_clusters))))
return -ESRCH;
- coff = v_cpos - ent->e_rec.e_cpos;
+ coff = v_cpos - le32_to_cpu(ent->e_rec.bryce_e_cpos);
*p_cpos = ocfs2_blocks_to_clusters(inode->i_sb,
- ent->e_rec.e_blkno) +
+ le64_to_cpu(ent->e_rec.bryce_e_blkno)) +
coff;
if (ret_count)
- *ret_count = ent->e_rec.e_clusters - coff;
+ *ret_count = le32_to_cpu(ent->e_rec.bryce_e_clusters) - coff;
return 0;
}
@@ -742,19 +743,19 @@
rec = &ent->e_rec;
/* We should never find ourselves straddling an interval */
- if ((rec->e_cpos > cpos) ||
+ if ((le32_to_cpu(rec->bryce_e_cpos) > cpos) ||
((cpos + clusters) >
- (rec->e_cpos + rec->e_clusters)))
+ (le32_to_cpu(rec->bryce_e_cpos) + le32_to_cpu(rec->bryce_e_clusters))))
return -ESRCH;
boff = ocfs2_clusters_to_blocks(inode->i_sb,
- cpos - rec->e_cpos);
+ cpos - le32_to_cpu(rec->bryce_e_cpos));
boff += (v_blkno & (u64)(bpc - 1));
- *p_blkno = rec->e_blkno + boff;
+ *p_blkno = le64_to_cpu(rec->bryce_e_blkno) + boff;
if (ret_count) {
*ret_count = ocfs2_clusters_to_blocks(inode->i_sb,
- rec->e_clusters) - boff;
+ le32_to_cpu(rec->bryce_e_clusters)) - boff;
}
return 0;
@@ -773,6 +774,48 @@
return 0;
}
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)
+static struct rb_node *rb_last(struct rb_root *root)
+{
+ struct rb_node *n;
+
+#ifdef rb_node
+#undef rb_node
+ n = root->rb_node;
+#define rb_node rb_node_s
+#else
+ n = root->rb_node;
+#endif
+ if (!n)
+ return NULL;
+ while (n->rb_right)
+ n = n->rb_right;
+ return n;
+}
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+static struct rb_node *rb_prev(struct rb_node *node)
+{
+ /* If we have a left-hand child, go down and then right as far
+ as we can. */
+ if (node->rb_left) {
+ node = node->rb_left;
+ while (node->rb_right)
+ node=node->rb_right;
+ return node;
+ }
+
+ /* No left-hand children. Go up till we find an ancestor which
+ is a right-hand child of its parent */
+ while (node->rb_parent && node == node->rb_parent->rb_left)
+ node = node->rb_parent;
+
+ return node->rb_parent;
+}
+#endif /* LINUX_VERSION_CODE */
+
+
/* Needs the lock */
static void __ocfs2_extent_map_drop(struct inode *inode,
u32 new_clusters,
@@ -793,7 +836,7 @@
ent = rb_entry(node, struct ocfs2_extent_map_entry,
e_node);
- if (ent->e_rec.e_cpos < new_clusters)
+ if (le32_to_cpu(ent->e_rec.bryce_e_cpos) < new_clusters)
break;
rb_erase(&ent->e_node, &em->em_extents);
@@ -808,12 +851,14 @@
/* Do we have an entry straddling new_clusters? */
if (tail_ent) {
if (ent &&
- ((ent->e_rec.e_cpos + ent->e_rec.e_clusters) >
+ ((le32_to_cpu(ent->e_rec.bryce_e_cpos) + le32_to_cpu(ent->e_rec.bryce_e_clusters)) >
new_clusters))
*tail_ent = ent;
else
*tail_ent = NULL;
}
+
+ return;
}
static void __ocfs2_extent_map_drop_cleanup(struct rb_node *free_head)
@@ -879,8 +924,8 @@
__ocfs2_extent_map_drop(inode, new_clusters, &free_head, &ent);
if (ent)
- ent->e_rec.e_clusters =
- new_clusters - ent->e_rec.e_cpos;
+ cpu_to_le32(ent->e_rec.bryce_e_clusters) =
+ new_clusters - le32_to_cpu(ent->e_rec.bryce_e_cpos);
OCFS_I(inode)->ip_map.em_clusters = new_clusters;
@@ -908,4 +953,6 @@
void __exit exit_ocfs2_extent_maps(void)
{
kmem_cache_destroy(ocfs2_em_ent_cachep);
+
+ return;
}
Modified: branches/endian/src/file.c
===================================================================
--- branches/endian/src/file.c 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/file.c 2005-01-18 21:41:46 UTC (rev 1789)
@@ -38,13 +38,12 @@
#include "alloc.h"
#include "dir.h"
-#include "dlmglue.h"
+#include "dlm.h"
#include "extent_map.h"
#include "file.h"
#include "sysfile.h"
#include "inode.h"
#include "ioctl.h"
-#include "mmap.h"
#include "suballoc.h"
#include "util.h"
@@ -64,62 +63,188 @@
ocfs2_dinode *fe,
u64 new_size);
+static void ocfs_fe_set_attributes(ocfs2_dinode *fe, struct iattr *attr)
+{
+ if (attr->ia_valid & ATTR_SIZE)
+ cpu_to_le64(fe->bryce_i_size) = attr->ia_size;
+ if (attr->ia_valid & ATTR_UID)
+ cpu_to_le32(fe->bryce_i_uid) = attr->ia_uid;
+ if (attr->ia_valid & ATTR_GID)
+ cpu_to_le32(fe->bryce_i_gid) = attr->ia_gid;
+ if (attr->ia_valid & ATTR_MODE)
+ cpu_to_le16(fe->bryce_i_mode) = attr->ia_mode;
+ if (attr->ia_valid & ATTR_CTIME)
+ cpu_to_le64(fe->bryce_i_ctime) = ocfs_get_seconds(attr->ia_ctime);
+ if (attr->ia_valid & ATTR_ATIME)
+ cpu_to_le64(fe->bryce_i_atime) = ocfs_get_seconds(attr->ia_atime);
+ if (attr->ia_valid & ATTR_MTIME)
+ cpu_to_le64(fe->bryce_i_mtime) = ocfs_get_seconds(attr->ia_mtime);
+}
+
int ocfs_sync_inode(struct inode *inode)
{
filemap_fdatawrite(inode->i_mapping);
return sync_mapping_buffers(inode->i_mapping);
}
-/* Checks an open request against our currently open mode */
-static inline int ocfs2_valid_open(int mode, int open_direct)
+static inline int ocfs_wait_on_first_open(ocfs_super *osb,
+ struct inode *inode)
{
- int ret = 1;
+ int status = 0;
+ sigset_t tmpsig;
- if (mode & O_DIRECT) {
- if (!open_direct)
- ret = 0;
- } else {
- if (open_direct && !(mode & O_RDONLY))
- ret = 0;
+ ocfs_block_sigs(&tmpsig, SHUTDOWN_SIGS);
+again:
+ if (signal_pending(current)) {
+ status = -EINTR;
+ goto bail;
}
- return ret;
+
+ spin_lock(&OCFS_I(inode)->ip_lock);
+ if (!(OCFS_I(inode)->ip_open_flags & OCFS_IN_FIRST_OPEN))
+ goto bail;
+ spin_unlock(&OCFS_I(inode)->ip_lock);
+
+ interruptible_sleep_on(&osb->open_event);
+ goto again;
+
+bail:
+ spin_unlock(&OCFS_I(inode)->ip_lock);
+ ocfs_unblock_sigs(tmpsig);
+
+ return(status);
}
+static inline void ocfs_notify_openers(ocfs_super *osb)
+{
+ wake_up(&osb->open_event);
+}
+
/*
* ocfs_file_open()
*
*/
static int ocfs_file_open(struct inode *inode, struct file *file)
{
- int status;
+ int ret =0, err = 0, status = 0, first_open = 0;
int mode = file->f_flags;
+ ocfs_super *osb = OCFS_SB(inode->i_sb);
ocfs_inode_private *oip = OCFS_I(inode);
LOG_ENTRY_ARGS ("(0x%p, 0x%p, '%*s')\n", inode, file,
file->f_dentry->d_name.len,
file->f_dentry->d_name.name);
- status = -EACCES;
+ if (osb->osb_flags & OCFS_OSB_FLAGS_SHUTDOWN) {
+ LOG_ERROR_STR ("Volume has been shutdown");
+ status = -EACCES;
+ goto leave;
+ }
+ if (atomic_read(&oip->ip_needs_verification)) {
+ down(&inode->i_sem);
+ status = ocfs_verify_update_inode (osb, inode);
+ up(&inode->i_sem);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto leave;
+ }
+ }
+
spin_lock(&oip->ip_lock);
- if (oip->ip_open_cnt &&
- !ocfs2_valid_open(mode,
- oip->ip_flags & OCFS_INODE_OPEN_DIRECT)) {
- spin_unlock(&oip->ip_lock);
- goto leave;
+ if (!oip->ip_open_cnt++) {
+ first_open = 1;
+ oip->ip_open_flags |= OCFS_IN_FIRST_OPEN;
}
+ spin_unlock(&oip->ip_lock);
- if (mode & O_DIRECT)
- oip->ip_flags |= OCFS_INODE_OPEN_DIRECT;
- else
- oip->ip_flags &= ~OCFS_INODE_OPEN_DIRECT;
+ if (!first_open)
+ status = ocfs_wait_on_first_open(osb, inode);
- oip->ip_open_cnt++;
+ if (status < 0) {
+ if (status != -EINTR)
+ LOG_ERROR_STATUS(status);
+ spin_lock(&oip->ip_lock);
+ oip->ip_open_cnt--;
+ goto leave_unlock;
+ }
+
+ /* TODO: if we're not opening for write then lets send an additional
+ * flag over to tell the other node it's not necessary to do the
+ * truncate_inode_pages (he just has to sync). */
+
+ status = 0;
+ if (!(mode & O_DIRECT)
+ && (first_open || (mode & (O_WRONLY|O_RDWR))))
+ status = ocfs_notify_on_open(osb, inode);
+
+ spin_lock(&oip->ip_lock);
+ if (first_open) {
+ oip->ip_open_flags &= ~OCFS_IN_FIRST_OPEN;
+ ocfs_notify_openers(osb);
+ }
+
+ if (status < 0) {
+ oip->ip_open_cnt--;
+ if (status != -EINTR)
+ LOG_ERROR_STATUS(status);
+ goto leave_unlock;
+ }
+
+ if (oip->ip_open_cnt > 1) {
+ /* We're not the only person who has it open right
+ * now so lets check whether the requested
+ * access/share access conflicts with the existing
+ * open operations. */
+
+ LOG_TRACE_ARGS ("oin->ip_open_cnt > 0! : %u\n",
+ oip->ip_open_cnt);
+ if (!(mode & O_DIRECT)) {
+ if ((oip->ip_open_flags & OCFS_OIN_OPEN_FOR_DIRECTIO) && !(mode & O_RDONLY)) {
+ oip->ip_open_cnt--;
+ status = -EACCES;
+ LOG_TRACE_STR("file is already open O_DIRECT, "
+ "cannot open non O_DIRECT");
+ goto leave_unlock;
+ }
+ } else if (mode & O_DIRECT) {
+ if (!(oip->ip_open_flags & OCFS_OIN_OPEN_FOR_DIRECTIO)) {
+ oip->ip_open_cnt--;
+ status = -EACCES;
+ LOG_TRACE_STR("file is already open non " \
+ "O_DIRECT, cannot open " \
+ "O_DIRECT");
+ goto leave_unlock;
+ }
+ }
+ status = 0;
+ } else {
+ if (mode & O_DIRECT)
+ OCFS_SET_FLAG(oip->ip_open_flags, OCFS_OIN_OPEN_FOR_DIRECTIO);
+ else
+ OCFS_CLEAR_FLAG(oip->ip_open_flags, OCFS_OIN_OPEN_FOR_DIRECTIO);
+ }
+
+leave_unlock:
spin_unlock(&oip->ip_lock);
- status = 0;
+
leave:
- LOG_EXIT_STATUS(status);
- return status;
+ if (status < 0) {
+ if (status != -ENOENT && status != -ENOMEM &&
+ status != -EACCES && status != -EINTR) {
+ LOG_ERROR_STATUS (status);
+ ret = -EACCES;
+ } else
+ ret = status;
+ } else {
+ ret = 0;
+ }
+
+ LOG_TRACE_ARGS
+ ("exiting file_open: file=%p dentry=%p inode=%p kiovec=%d\n",
+ file, file->f_dentry, file->f_dentry->d_inode, err);
+ LOG_EXIT_INT (ret);
+ return ret;
} /* ocfs_file_open */
static int ocfs_file_release(struct inode *inode, struct file *file)
@@ -133,7 +258,7 @@
spin_lock(&oip->ip_lock);
if (!--oip->ip_open_cnt)
- oip->ip_flags &= ~OCFS_INODE_OPEN_DIRECT;
+ oip->ip_open_flags &= ~OCFS_OIN_OPEN_FOR_DIRECTIO;
spin_unlock(&oip->ip_lock);
LOG_EXIT_INT(0);
@@ -146,7 +271,7 @@
*
*/
static int ocfs_sync_file(struct file *file, struct dentry *dentry,
- int datasync)
+ int datasync)
{
int err = 0;
journal_t *journal;
@@ -158,6 +283,12 @@
LOG_ENTRY_ARGS ("(0x%p, 0x%p, %d, '%*s')\n", file, dentry, datasync,
dentry->d_name.len, dentry->d_name.name);
+#ifdef PURE_EVIL
+ if (evil_filename_check(EVIL_DENTRY, dentry)) {
+ LOG_ERROR_ARGS("EVIL SYNC_FILE: mode=%d\n", file->f_flags);
+ }
+#endif
+
err = ocfs_sync_inode(dentry->d_inode);
if (err)
goto bail;
@@ -172,6 +303,117 @@
return (err < 0) ? -EIO : 0;
} /* ocfs_sync_file */
+/* ocfs_change_file_attrib()
+ *
+ */
+static int ocfs_change_file_attrib(ocfs_super *osb, struct iattr *attr,
+ struct inode *inode)
+{
+ int status = 0;
+ ocfs2_dinode *fe = NULL;
+ struct buffer_head *bh = NULL;
+ ocfs_journal_handle *handle = NULL;
+
+ LOG_ENTRY ();
+
+#ifdef PURE_EVIL
+ if (evil_filename_check(EVIL_INODE, inode)) {
+ LOG_ERROR_STR("EVIL ATTRIB");
+ }
+#endif
+
+ handle = ocfs_alloc_handle(osb);
+ if (handle == NULL) {
+ LOG_ERROR_STATUS(status);
+ goto leave;
+ }
+
+ status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE, 0, &bh, inode);
+ if (status < 0) {
+ if (status != -EINTR)
+ LOG_ERROR_STATUS (status);
+ goto leave;
+ }
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, FLAG_FILE_UPDATE_OIN,
+ inode);
+
+ /* Start a transaction - need a minimal amount of block credits (1) */
+ handle = ocfs_start_trans(osb, handle, OCFS_INODE_UPDATE_CREDITS);
+ if (handle == NULL) {
+ LOG_ERROR_STATUS(status);
+ goto leave;
+ }
+
+ fe = (ocfs2_dinode *) bh->b_data;
+
+ OCFS_ASSERT_RO(IS_VALID_FILE_ENTRY(fe));
+
+ status = ocfs_journal_access(handle, inode, bh,
+ OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto leave;
+ }
+
+ fe = (ocfs2_dinode *) bh->b_data;
+
+ cpu_to_le64(fe->bryce_i_mtime) = OCFS_CURRENT_TIME;
+
+ ocfs_fe_set_attributes(fe, attr);
+
+ status = ocfs_journal_dirty(handle, bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto leave;
+ }
+
+leave:
+ if (handle)
+ ocfs_commit_trans(handle);
+
+ if (bh != NULL)
+ brelse(bh);
+
+ if (status < 0)
+ if (status != -ENOSPC && status != -EINTR)
+ LOG_ERROR_STATUS (status);
+
+ LOG_EXIT_STATUS (status);
+ return status;
+} /* ocfs_change_file_attrib */
+
+
+#ifdef PURE_EVIL
+int evil_filename_check(int type, void *ptr)
+{
+ struct file *filp = ptr;
+ struct dentry *dentry = ptr;
+ struct inode *inode = ptr;
+ struct page *page = ptr;
+ struct list_head *iter, *temp_iter;
+
+ switch (type) {
+ case EVIL_FILE:
+ dentry = filp ? filp->f_dentry : NULL;
+ case EVIL_DENTRY:
+ return evil_dentry_check(dentry);
+
+ case EVIL_PAGE:
+ inode = page ? (page->mapping ? page->mapping->host : NULL) : NULL;
+ case EVIL_INODE:
+ if (inode && S_ISREG(inode->i_mode)) {
+ list_for_each_safe (iter, temp_iter, &(inode->i_dentry)) {
+ dentry = list_entry (iter, struct dentry, d_alias);
+ if (evil_dentry_check(dentry))
+ return 1;
+ }
+ }
+ return 0;
+ }
+ return 0;
+}
+#endif
+
/*
* ocfs_file_write()
* Linux 2.6 TODO: Remove all O_DIRECT conditionals here, they are no longer
@@ -181,27 +423,34 @@
size_t count, loff_t *ppos)
{
int ret = 0;
- int extended = 0;
+ int writingAtEOF = 0;
ocfs_super *osb = NULL;
struct dentry *dentry = filp->f_dentry;
struct inode *inode = dentry->d_inode;
int status;
- u64 newsize;
+ __u64 newsize;
struct super_block *sb = inode->i_sb;
int do_direct_io = 0;
int sector_size;
int have_i_sem = 0;
- int level = filp->f_flags & O_APPEND;
- loff_t saved_ppos;
- DECLARE_BUFFER_LOCK_CTXT(ctxt);
LOG_SET_CONTEXT(WRITE);
LOG_ENTRY_ARGS ("(0x%p, 0x%p, %u, '%*s')\n", filp, buf,
(unsigned int)count,
- filp->f_dentry->d_name.len,
- filp->f_dentry->d_name.name);
+ filp->f_dentry->d_name.len, filp->f_dentry->d_name.name);
+#ifdef PURE_EVIL
+ if (evil_filename_check(EVIL_DENTRY, dentry)) {
+ int z;
+ LOG_ERROR_ARGS("EVIL FILE_WRITE: count=%u, ppos=%llu, flags=%d\n", (unsigned int)count, *ppos, filp->f_flags);
+ for (z=0; z<(count<16?count:16); z++) {
+ printk("data[%d]=%02x ", z, ((char)buf[z]) & 0xff);
+ }
+ printk("\n");
+ }
+#endif
+
/* happy write of zero bytes */
if (count == 0) {
ret = 0;
@@ -217,47 +466,19 @@
osb = OCFS_SB(inode->i_sb);
sector_size = 1 << osb->s_sectsize_bits;
+ if (osb->osb_flags & OCFS_OSB_FLAGS_SHUTDOWN) {
+ LOG_TRACE_STR ("Volume has already started shutdown");
+ ret = -EIO;
+ goto bail;
+ }
+
down(&inode->i_sem);
have_i_sem = 1;
- ret = ocfs2_setup_io_locks(inode->i_sb, inode, (char *) buf,
- count, &ctxt);
- if (ret < 0) {
- LOG_ERROR_STATUS(ret);
- goto bail;
- }
-
- /* This will lock everyone in the context who's order puts
- * them before us. */
- ret = ocfs2_lock_buffer_inodes(&ctxt, inode);
- if (ret < 0) {
- if (ret != -EINTR)
- LOG_ERROR_STATUS(ret);
- goto bail;
- }
-
- ctxt.b_lock_data_write = 1;
-lock:
- status = ocfs2_meta_lock(inode, NULL, NULL, level);
- if (status < 0) {
- if (status != -EINTR)
- LOG_ERROR_STATUS(status);
- ret = status;
- goto bail;
- }
- /* to handle extending writes, we do a bit of our own locking
- * here, but we setup the ctxt do unlock for us (as well as
- * handle locking everything else. */
- if (level)
- ctxt.b_lock_meta_write = 1;
-
- /* work on a copy of ppos until we're sure that we won't have
- * to recalculate it due to relocking. */
- saved_ppos = *ppos;
-
if (filp->f_flags & O_APPEND) {
- saved_ppos = i_size_read(inode);
- LOG_TRACE_ARGS("O_APPEND: inode->i_size=%llu\n", saved_ppos);
+ LOG_TRACE_ARGS("O_APPEND: inode->i_size=%llu, ppos was %llu\n",
+ inode->i_size, *ppos);
+ *ppos = inode->i_size;
/* ugh, work around some applications which open
* everything O_DIRECT + O_APPEND and really don't
@@ -269,40 +490,40 @@
if (filp->f_flags & O_DIRECT) {
/* anything special for o_direct? */
LOG_TRACE_STR ("O_DIRECT");
- if ((saved_ppos & (sector_size - 1)) ||
- (count & (sector_size - 1)) ||
- ((unsigned long)buf & (sector_size - 1))) {
+ if (((*ppos) & (sector_size - 1)) || (count & (sector_size - 1)) ||
+ ((unsigned long)buf & (sector_size - 1)) ) {
do_direct_io = 0;
filp->f_flags |= O_SYNC;
- } else {
+ } else
do_direct_io = 1;
+ }
+ if (atomic_read(&OCFS_I(inode)->ip_needs_verification)) {
+ LOG_TRACE_STR ("OIN_NEEDS_VERIFICATION");
+ status = ocfs_verify_update_inode (osb, inode);
+ if (status < 0) {
+ LOG_TRACE_STR ("ocfs_verify_update_inode failed");
+ LOG_TRACE_STR ("TODO: disable volume");
+ ret = -EIO;
+ goto bail;
}
}
- ctxt.b_lock_direct = do_direct_io;
- newsize = count + saved_ppos;
+ newsize = count + *ppos;
if (filp->f_flags & O_APPEND)
- newsize = count + i_size_read(inode);
+ newsize = count + inode->i_size;
LOG_TRACE_ARGS ("ppos=%llu newsize=%llu cursize=%llu\n",
- saved_ppos, newsize, i_size_read(inode));
+ *ppos, newsize, inode->i_size);
- if (newsize > i_size_read(inode)) {
- if (!level) {
- /* we want an extend, but need a higher
- * level cluster lock. */
- LOG_TRACE_ARGS("inode %llu, had a PR, looping back "
- "for EX\n", OCFS_I(inode)->ip_blkno);
- ocfs2_meta_unlock(inode, level);
- level = 1;
- goto lock;
- }
- extended = 1;
+ if (newsize > inode->i_size) {
+ writingAtEOF = 1;
- LOG_TRACE_ARGS("Writing at EOF, will need more allocation: "
- "i_size=%llu, need=%llu\n",
- i_size_read(inode), newsize);
-
+ LOG_TRACE_ARGS
+ ("Writing at EOF, will need more allocation: have=%llu, "
+ "need=%llu\n",
+ ocfs2_clusters_to_bytes(inode->i_sb,
+ OCFS_I(inode)->ip_clusters),
+ newsize);
status = ocfs_extend_file(osb, inode, newsize);
if (status < 0) {
if (status != -EINTR && status != -ENOSPC) {
@@ -313,41 +534,10 @@
ret = -ENOSPC;
} else
ret = status;
-
- ocfs2_meta_unlock(inode, level);
- goto bail_unlock;
+ goto bail;
}
}
- /* we've got whatever cluster lock is appropriate now, so we
- * can stuff *ppos back. */
- *ppos = saved_ppos;
-
- if (!do_direct_io) {
- status = ocfs2_data_lock(inode, 1);
- if (status < 0) {
- if (status != -EINTR)
- LOG_ERROR_STATUS(status);
- ret = status;
-
- ocfs2_meta_unlock(inode, level);
- goto bail_unlock;
- }
- }
-
- /* Alright, fool the io locking stuff into thinking it's
- * handled our inode for us. We can now count on it to do the
- * unlock for us. */
- ctxt.b_target->ba_locked = 1;
-
- /* This will lock everyone who's order puts them *after* our inode. */
- ret = ocfs2_lock_buffer_inodes(&ctxt, NULL);
- if (ret < 0) {
- if (ret != -EINTR)
- LOG_ERROR_STATUS(ret);
- goto bail_unlock;
- }
-
down_read(&OCFS_I(inode)->ip_alloc_sem);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
/*
@@ -375,18 +565,17 @@
ret = generic_file_write_nolock (filp, buf, count, ppos);
#endif
up_read(&OCFS_I(inode)->ip_alloc_sem);
-
- if (extended) {
+ if (writingAtEOF) {
LOG_TRACE_STR
("Generic_file_write ok, asking for OIN update now");
- i_size_write(inode, newsize);
+ inode->i_size = newsize;
inode->i_blocks = (newsize + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
if (do_direct_io) {
/*
* This leaves dirty data in holes.
* Caveat Emptor.
*/
- OCFS_I(inode)->ip_mmu_private = i_size_read(inode);
+ OCFS_I(inode)->ip_mmu_private = inode->i_size;
} else {
status = ocfs2_zero_extend(inode);
/*
@@ -398,9 +587,6 @@
}
}
-bail_unlock:
- ocfs2_unlock_buffer_inodes(&ctxt);
-
bail:
if (have_i_sem)
up(&inode->i_sem);
@@ -425,13 +611,20 @@
int status = 0;
int do_direct_io = 0;
int sector_size;
- DECLARE_BUFFER_LOCK_CTXT(ctxt);
LOG_SET_CONTEXT(READ);
LOG_ENTRY_ARGS ("(0x%p, 0x%p, %u, '%*s')\n", filp, buf,
- (unsigned int)count, filp->f_dentry->d_name.len,
- filp->f_dentry->d_name.name);
+ (unsigned int)count,
+ filp->f_dentry->d_name.len, filp->f_dentry->d_name.name);
+
+#ifdef PURE_EVIL
+ if (evil_filename_check(EVIL_DENTRY, dentry)) {
+ LOG_ERROR_ARGS("EVIL FILE_READ: count=%u, ppos=%llu, flags=%d\n", (unsigned int)count, *ppos, filp->f_flags);
+ }
+#endif
+
+
if (!inode) {
LOG_ERROR_STR ("Bad inode or inode has no oin");
ret = -EINVAL;
@@ -445,28 +638,24 @@
/* anything special for o_direct? */
LOG_TRACE_STR ("O_DIRECT");
if (((*ppos) & (sector_size - 1)) || (count & (sector_size - 1)) ||
- ((unsigned long)buf & (sector_size - 1)) || (i_size_read(inode) & (sector_size -1))) {
+ ((unsigned long)buf & (sector_size - 1)) || (inode->i_size & (sector_size -1))) {
do_direct_io = 0;
filp->f_flags &= ~O_DIRECT;
- } else {
+ } else
do_direct_io = 1;
+ }
+ if (atomic_read(&OCFS_I(inode)->ip_needs_verification)) {
+ down(&inode->i_sem);
+ status = ocfs_verify_update_inode (osb, inode);
+ up(&inode->i_sem);
+ if (status < 0) {
+ LOG_TRACE_STR ("ocfs_verify_update_inode failed");
+ LOG_TRACE_STR ("TODO: disable volume");
+ ret = -EIO;
+ goto bail;
}
}
- ctxt.b_lock_direct = do_direct_io;
- ret = ocfs2_setup_io_locks(inode->i_sb, inode, buf, count, &ctxt);
- if (ret < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- ret = ocfs2_lock_buffer_inodes(&ctxt, NULL);
- if (ret < 0) {
- if (ret != -EINTR)
- LOG_ERROR_STATUS(status);
- goto bail_unlock;
- }
-
down_read(&OCFS_I(inode)->ip_alloc_sem);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
/*
@@ -495,9 +684,6 @@
if (ret == -EINVAL)
LOG_ERROR_STR ("Generic_file_read returned -EINVAL");
-bail_unlock:
- ocfs2_unlock_buffer_inodes(&ctxt);
-
bail:
LOG_EXIT_INT (ret);
@@ -508,7 +694,7 @@
struct file_operations ocfs_fops = {
.read = ocfs_file_read,
.write = ocfs_file_write,
- .mmap = ocfs2_mmap,
+ .mmap = generic_file_mmap,
.fsync = ocfs_sync_file,
.release = ocfs_file_release,
.open = ocfs_file_open,
@@ -542,11 +728,10 @@
LOG_ENTRY();
grow = new_i_size > inode->i_size;
- i_size_write(inode, new_i_size);
+ inode->i_size = new_i_size;
+ OCFS_SET_INODE_TIME(inode, i_mtime, OCFS_CURRENT_TIME);
inode->i_blocks = (new_i_size + sb->s_blocksize - 1)
>> sb->s_blocksize_bits;
- inode->i_ctime = inode->i_mtime = CURRENT_TIME;
-
status = ocfs_mark_inode_dirty(handle, inode, fe_bh);
if (status < 0) {
LOG_ERROR_STATUS (status);
@@ -556,7 +741,7 @@
/* FIXME: I think this should all be in the caller */
spin_lock(&oip->ip_lock);
if (!grow)
- oip->ip_mmu_private = i_size_read(inode);
+ oip->ip_mmu_private = inode->i_size;
spin_unlock(&oip->ip_lock);
bail:
@@ -612,22 +797,30 @@
LOG_ENTRY_ARGS("(inode = %llu, new_i_size = %llu\n",
OCFS_I(inode)->ip_blkno, new_i_size);
- ocfs_truncate_inode_pages(inode, new_i_size);
+ handle = ocfs_alloc_handle(osb);
+ if (handle == NULL) {
+ LOG_ERROR_STATUS (status = -ENOMEM);
+ goto bail;
+ }
- status = ocfs_read_block(osb, OCFS_I(inode)->ip_blkno, &fe_bh,
- OCFS_BH_CACHED, inode);
+ status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE,
+ FLAG_FILE_TRUNCATE|FLAG_FILE_UPDATE_OIN,
+ &fe_bh, inode);
if (status < 0) {
- LOG_ERROR_STATUS(status);
+ if (status != -EINTR)
+ LOG_ERROR_STATUS (status);
goto bail;
}
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
+ FLAG_FILE_TRUNCATE|FLAG_FILE_UPDATE_OIN, inode);
fe = (ocfs2_dinode *) fe_bh->b_data;
OCFS_ASSERT(IS_VALID_FILE_ENTRY(fe));
- OCFS_ASSERT(fe->i_size == i_size_read(inode));
+ OCFS_ASSERT(le64_to_cpu(fe->bryce_i_size) == inode->i_size);
- if (new_i_size > fe->i_size) {
+ if (new_i_size > le64_to_cpu(fe->bryce_i_size)) {
LOG_TRACE_ARGS("asked to truncate file with size (%llu) "
- "to size (%llu)!\n", fe->i_size,
+ "to size (%llu)!\n", le64_to_cpu(fe->bryce_i_size),
new_i_size);
status = -EINVAL;
LOG_ERROR_STATUS(status);
@@ -635,45 +828,32 @@
}
LOG_TRACE_ARGS("inode %llu, i_size = %llu, new_i_size = %llu\n",
- fe->i_blkno, fe->i_size, new_i_size);
+ le64_to_cpu(fe->bryce_i_blkno), le64_to_cpu(fe->bryce_i_size), new_i_size);
/* lets handle the simple truncate cases before doing any more
* cluster locking. */
- if (new_i_size == fe->i_size)
+ if (new_i_size == le64_to_cpu(fe->bryce_i_size))
goto bail;
- if (fe->i_clusters == ocfs2_clusters_for_bytes(osb->sb,
+ if (le32_to_cpu(fe->bryce_i_clusters) == ocfs2_clusters_for_bytes(osb->sb,
new_i_size)) {
LOG_TRACE_ARGS("fe->i_clusters = %u, so we do a simple "
- "truncate\n", fe->i_clusters);
+ "truncate\n", le32_to_cpu(fe->bryce_i_clusters));
/* No allocation change is required, so lets fast path
* this truncate. */
- handle = ocfs_start_trans(osb, NULL,
+ handle = ocfs_start_trans(osb, handle,
OCFS_INODE_UPDATE_CREDITS);
if (handle == NULL) {
LOG_ERROR_STATUS (status = -ENOMEM);
goto bail;
}
- /* Since we got our cluster lock from caller and we
- * don't add it to the handle: */
- ocfs_set_inode_lock_trans(osb->journal, inode);
-
status = ocfs_set_inode_size(handle, inode, fe_bh, new_i_size);
if (status < 0)
LOG_ERROR_STATUS (status);
goto bail;
}
- /* This forces other nodes to sync and drop their pages */
- status = ocfs2_data_lock(inode, 1);
- if (status < 0) {
- if (status != -EINTR)
- LOG_ERROR_STATUS(status);
- goto bail;
- }
- ocfs2_data_unlock(inode, 1);
-
/* alright, we're going to need to do a full blown alloc size
* change. Orphan the inode so that recovery can complete the
* truncate if necessary. This does the task of marking
@@ -715,12 +895,19 @@
{
struct address_space *mapping = inode->i_mapping;
struct page *page;
- u64 size = i_size_read(inode) - 1;
+ u64 size = inode->i_size - 1;
unsigned int offset;
int res = 0;
+#ifdef PURE_EVIL
+ if (evil_filename_check(EVIL_INODE, inode)) {
+ LOG_ERROR_ARGS("EVIL ZEROEXTEND: i_size=%llu, mmu_priv=%llu\n",
+ inode->i_size, OCFS_I(inode)->ip_mmu_private);
+ }
+#endif
+
/* Start the zeroing of blocks */
- if (i_size_read(inode) > OCFS_I(inode)->ip_mmu_private) {
+ if (inode->i_size > OCFS_I(inode)->ip_mmu_private) {
page = grab_cache_page(mapping,
size >> PAGE_CACHE_SHIFT);
if (!page) {
@@ -823,9 +1010,9 @@
goto leave;
}
- fe->i_clusters += num_bits;
+ cpu_to_le32(fe->bryce_i_clusters) += num_bits;
spin_lock(&OCFS_I(inode)->ip_lock);
- OCFS_I(inode)->ip_clusters = fe->i_clusters;
+ OCFS_I(inode)->ip_clusters = le32_to_cpu(fe->bryce_i_clusters);
spin_unlock(&OCFS_I(inode)->ip_lock);
status = ocfs_journal_dirty(handle, fe_bh);
@@ -838,7 +1025,7 @@
if (clusters_to_add) {
LOG_TRACE_ARGS("need to alloc once more, clusters = %u, "
- "wanted = %u\n", fe->i_clusters,
+ "wanted = %u\n", le32_to_cpu(fe->bryce_i_clusters),
clusters_to_add);
status = -EAGAIN;
if (reason)
@@ -847,7 +1034,7 @@
leave:
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
}
static unsigned int ocfs_calc_overalloc_bits(ocfs_super *osb,
@@ -859,7 +1046,7 @@
/* TODO: We will keep a small history of allocs on the filp
* and calculate a reasonable overalloc based on that data
* here. */
- return 0;
+ return(0);
}
/* ocfs_extend_file()
@@ -898,37 +1085,43 @@
goto leave;
}
- status = ocfs_read_block(osb, OCFS_I(inode)->ip_blkno, &bh,
- OCFS_BH_CACHED, inode);
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, FLAG_FILE_EXTEND,
+ &bh, inode);
if (status < 0) {
- LOG_ERROR_STATUS(status);
+ if (status != -EINTR)
+ LOG_ERROR_STATUS (status);
goto leave;
}
+ ocfs_handle_add_lock(handle,
+ OCFS_LKM_EXMODE,
+ FLAG_FILE_EXTEND|FLAG_FILE_UPDATE_OIN,
+ inode);
fe = (ocfs2_dinode *) bh->b_data;
OCFS_ASSERT(IS_VALID_FILE_ENTRY(fe));
- OCFS_ASSERT(i_size_read(inode) == fe->i_size);
- OCFS_ASSERT(new_i_size >= i_size_read(inode));
+ OCFS_ASSERT(new_i_size >= le64_to_cpu(fe->bryce_i_size));
- if (i_size_read(inode) == new_i_size)
- goto leave;
+ if (le64_to_cpu(fe->bryce_i_size) == new_i_size) {
+ OCFS_ASSERT(inode->i_size == new_i_size);
+ goto leave;
+ }
clusters_to_add = ocfs2_clusters_for_bytes(osb->sb, new_i_size)
- - fe->i_clusters;
+ - le32_to_cpu(fe->bryce_i_clusters);
LOG_TRACE_ARGS("extend inode %llu, new_i_size = %llu, i_size = %llu, "
"fe->i_clusters = %u, clusters_to_add = %u\n",
- OCFS_I(inode)->ip_blkno, new_i_size, i_size_read(inode),
- fe->i_clusters, clusters_to_add);
+ OCFS_I(inode)->ip_blkno, new_i_size, inode->i_size,
+ le32_to_cpu(fe->bryce_i_clusters), clusters_to_add);
- if (!clusters_to_add)
+ if (!clusters_to_add)
goto do_start_trans;
overalloc_bits = 0;
if (!skip_overalloc) {
- overalloc_bits = ocfs_calc_overalloc_bits(osb,
- NULL,
- fe,
+ overalloc_bits = ocfs_calc_overalloc_bits(osb,
+ NULL,
+ fe,
new_i_size);
clusters_to_add += overalloc_bits;
skip_overalloc = 1;
@@ -978,9 +1171,6 @@
goto leave;
}
- /* Since we got our cluster lock from caller and we don't add
- * it to the handle: */
- ocfs_set_inode_lock_trans(osb->journal, inode);
restarted_transaction:
/* reserve a write to the file entry early on - that we if we
* run out of credits in the allocation path, we can still
@@ -1010,7 +1200,7 @@
if (status == -EAGAIN
&& (new_i_size >
- ocfs2_clusters_to_bytes(osb->sb, fe->i_clusters))) {
+ ocfs2_clusters_to_bytes(osb->sb, le32_to_cpu(fe->bryce_i_clusters)))) {
if (why == RESTART_META) {
LOG_TRACE_ARGS("restarting function.\n");
@@ -1020,10 +1210,10 @@
/* update i_size in case we crash after the
* extend_trans */
- fe->i_size =
+ cpu_to_le64(fe->bryce_i_size) =
ocfs2_clusters_to_bytes(osb->sb,
- fe->i_clusters);
- fe->i_mtime = OCFS_CURRENT_TIME;
+ le32_to_cpu(fe->bryce_i_clusters));
+ cpu_to_le64(fe->bryce_i_mtime) = OCFS_CURRENT_TIME;
status = ocfs_journal_dirty(handle, bh);
if (status < 0) {
@@ -1034,7 +1224,7 @@
clusters_to_add =
ocfs2_clusters_for_bytes(osb->sb,
new_i_size)
- - fe->i_clusters + overalloc_bits;
+ - le32_to_cpu(fe->bryce_i_clusters) + overalloc_bits;
LOG_TRACE_ARGS("restarting transaction.\n");
/* TODO: This can be more intelligent. */
credits = ocfs_calc_extend_credits(osb->sb,
@@ -1055,18 +1245,18 @@
no_alloc:
/* this may not be the end of our allocation so only update
* i_size to what's appropriate. */
- if (new_i_size > ocfs2_clusters_to_bytes(osb->sb, fe->i_clusters))
- fe->i_size = ocfs2_clusters_to_bytes(osb->sb, fe->i_clusters);
+ if (new_i_size > ocfs2_clusters_to_bytes(osb->sb, le32_to_cpu(fe->bryce_i_clusters)))
+ le64_to_cpu(fe->bryce_i_size) = ocfs2_clusters_to_bytes(osb->sb, le32_to_cpu(fe->bryce_i_clusters));
else
- fe->i_size = new_i_size;
-
+ cpu_to_le64(fe->bryce_i_size) = new_i_size;
+#warning "is there a reason why we don't update i_blocks here?"
LOG_TRACE_ARGS("fe: i_clusters = %u, i_size=%llu\n",
- fe->i_clusters, fe->i_size);
+ le32_to_cpu(fe->bryce_i_clusters), le64_to_cpu(fe->bryce_i_size));
LOG_TRACE_ARGS("inode: ip_clusters=%u, i_size=%llu\n",
- OCFS_I(inode)->ip_clusters, i_size_read(inode));
+ OCFS_I(inode)->ip_clusters, inode->i_size);
- fe->i_ctime = fe->i_mtime = OCFS_CURRENT_TIME;
+ cpu_to_le64(fe->bryce_i_mtime) = OCFS_CURRENT_TIME;
status = ocfs_journal_dirty(handle, bh);
if (status < 0) {
@@ -1109,18 +1299,33 @@
*/
int ocfs_setattr(struct dentry *dentry, struct iattr *attr)
{
- int status = 0;
- int unlock = 0;
- u64 newsize;
struct inode *inode = dentry->d_inode;
+ int error = 0;
+ __u64 newsize;
+ int status;
+ ocfs_super *osb = NULL;
struct super_block *sb = inode->i_sb;
- ocfs_super *osb = OCFS2_SB(sb);
- struct buffer_head *bh = NULL;
- ocfs_journal_handle *handle = NULL;
+ LOG_SET_CONTEXT(SETATTR);
+
LOG_ENTRY_ARGS ("(0x%p, '%*s')\n", dentry,
dentry->d_name.len, dentry->d_name.name);
+ osb = OCFS_SB(inode->i_sb);
+
+#ifdef PURE_EVIL
+ if (evil_filename_check(EVIL_DENTRY, dentry)) {
+ LOG_ERROR_ARGS("EVIL SETATTR\n");
+ }
+#endif
+
+ if (!dentry->d_parent || !dentry->d_parent->d_inode) {
+ LOG_ERROR_STR ("bad inode or root inode");
+ goto bail;
+ }
+
+ newsize = attr->ia_size;
+
if (attr->ia_valid & ATTR_MODE)
LOG_TRACE_ARGS ("mode change: %d\n", attr->ia_mode);
if (attr->ia_valid & ATTR_UID)
@@ -1132,45 +1337,40 @@
if (attr->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME))
LOG_TRACE_STR ("time change...");
-#define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \
- | ATTR_GID | ATTR_UID | ATTR_MODE)
- if (!(attr->ia_valid & OCFS2_VALID_ATTRS)) {
- LOG_TRACE_ARGS("can't handle attrs: 0x%x\n", attr->ia_valid);
- return 0;
+ if (!(attr->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME |
+ ATTR_SIZE | ATTR_GID | ATTR_UID | ATTR_MODE))) {
+ LOG_TRACE_STR
+ ("can only change mode, uid, gid, size and time. exiting!");
+ goto bail;
}
- status = inode_change_ok (inode, attr);
- if (status)
- return status;
-
- newsize = attr->ia_size;
-
- status = ocfs2_meta_lock(inode, NULL, &bh, 1);
- if (status < 0) {
- if (status != -EINTR)
- LOG_ERROR_STATUS(status);
+ error = inode_change_ok (inode, attr);
+ if (error)
goto bail;
- }
- unlock = 1;
- if (attr->ia_valid & ATTR_SIZE &&
- newsize != i_size_read(inode)) {
- if (i_size_read(inode) > newsize)
- status = ocfs_truncate_file(osb, newsize, inode);
- else
+ /* get the file and parent offsets, and the file oin if present */
+ if (attr->ia_valid & ATTR_SIZE) {
+ if (inode->i_size > newsize) {
+ ocfs_truncate_inode_pages(inode, newsize);
+ status = ocfs_truncate_file(osb, newsize,
+ inode);
+ } else {
status = ocfs_extend_file(osb, inode, newsize);
+ }
if (status < 0) {
if (status != -EINTR && status != -ENOSPC)
- LOG_ERROR_STATUS(status);
- status = -ENOSPC;
+ LOG_ERROR_STATUS (status);
+ error = -ENOSPC;
goto bail;
}
+
spin_lock(&OCFS_I(inode)->ip_lock);
- i_size_write(inode, newsize);
+ inode->i_size = newsize;
inode->i_blocks = (newsize + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
- if (OCFS_I(inode)->ip_flags & OCFS_INODE_OPEN_DIRECT) {
+ if (OCFS_I(inode)->ip_open_flags &
+ OCFS_OIN_OPEN_FOR_DIRECTIO) {
/* This is a total broken hack for O_DIRECT crack */
- OCFS_I(inode)->ip_mmu_private = i_size_read(inode);
+ OCFS_I(inode)->ip_mmu_private = inode->i_size;
}
spin_unlock(&OCFS_I(inode)->ip_lock);
status = ocfs2_zero_extend(inode);
@@ -1180,42 +1380,20 @@
}
}
- handle = ocfs_start_trans(osb, NULL, OCFS_INODE_UPDATE_CREDITS);
- if (handle == NULL) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- /* Ok, this is the last transaction we'll do for a setattr so
- * just add our lock to the handle and let commit_trans deal
- * with it. */
- status = ocfs_handle_add_lock(handle, inode);
- if (status < 0)
- LOG_ERROR_STATUS(status);
- unlock = 0;
-
- status = inode_setattr (inode, attr);
+ status = ocfs_change_file_attrib(osb, attr, inode);
if (status < 0) {
- LOG_ERROR_STATUS(status);
+ if (status != -EINTR)
+ LOG_ERROR_STATUS (status);
+ error = -EIO;
goto bail;
}
+ error = inode_setattr (inode, attr);
- status = ocfs_mark_inode_dirty(handle, inode, bh);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto bail;
- }
-
bail:
- if (handle)
- ocfs_commit_trans(handle);
- if (unlock)
- ocfs2_meta_unlock(inode, 1);
- if (bh)
- brelse(bh);
+ LOG_EXIT_INT (error);
- LOG_EXIT_STATUS(status);
- return status;
+ LOG_CLEAR_CONTEXT();
+ return error;
} /* ocfs_setattr */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
Modified: branches/endian/src/file.h
===================================================================
--- branches/endian/src/file.h 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/file.h 2005-01-18 21:41:46 UTC (rev 1789)
@@ -59,4 +59,24 @@
struct buffer_head *fe_bh,
u64 new_i_size);
+#ifdef PURE_EVIL
+#define EVIL_FILENAME "libctx10.a"
+#define EVIL_FILENAME_LEN 10
+enum {
+ EVIL_FILE,
+ EVIL_INODE,
+ EVIL_DENTRY,
+ EVIL_PAGE
+};
+
+static inline int evil_dentry_check(struct dentry *dentry)
+{
+ return (dentry != NULL && dentry->d_name.len == EVIL_FILENAME_LEN &&
+ strncmp(dentry->d_name.name, EVIL_FILENAME, EVIL_FILENAME_LEN) == 0);
+}
+
+int evil_filename_check(int type, void *ptr);
+#endif /* PURE_EVIL */
+
+
#endif /* OCFS2_FILE_H */
Modified: branches/endian/src/heartbeat.c
===================================================================
--- branches/endian/src/heartbeat.c 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/heartbeat.c 2005-01-18 21:41:46 UTC (rev 1789)
@@ -3,8 +3,7 @@
*
* heartbeat.c
*
- * Register ourselves with the heartbaet service, keep our node maps
- * up to date, and fire off recover when needed.
+ * Keeps track of alive nodes in the cluster.
*
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
*
@@ -31,10 +30,6 @@
#include <linux/slab.h>
#include <linux/highmem.h>
-#include <cluster/util.h>
-#include <cluster/dlmcommon.h>
-#include <cluster/heartbeat.h>
-
#include "ocfs_log.h"
#include "ocfs.h"
#include "ocfs2.h"
@@ -42,6 +37,8 @@
#include "alloc.h"
#include "heartbeat.h"
#include "util.h"
+#include "volcfg.h"
+#include "vote.h"
#include "ocfs_journal.h"
#include "buffer_head_io.h"
@@ -49,386 +46,311 @@
/* Tracing */
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_HEARTBEAT
-#define OCFS2_HB_NODE_DOWN_PRI (0x0000001)
-#define OCFS2_HB_NODE_UP_PRI OCFS2_HB_NODE_DOWN_PRI
-
-static void ocfs2_hb_node_down_cb(struct inode *group,
- struct inode *node,
- int node_num,
- void *data);
-static void ocfs2_hb_node_up_cb(struct inode *group,
- struct inode *node,
- int node_num,
- void *data);
-
-static void __ocfs_node_map_dup(ocfs_super *osb,
- ocfs_node_map *target,
- ocfs_node_map *from);
-static inline void __ocfs_node_map_set_bit(ocfs_node_map *map,
- int bit);
-static inline void __ocfs_node_map_clear_bit(ocfs_node_map *map,
- int bit);
-static inline int __ocfs_node_map_is_empty(ocfs_node_map *map);
-static void __ocfs_node_map_dup(ocfs_super *osb,
- ocfs_node_map *target,
- ocfs_node_map *from);
-static void __ocfs_node_map_set(ocfs_node_map *target, ocfs_node_map *from);
-
-void ocfs2_init_node_maps(ocfs_super *osb)
+/*
+ * ocfs_nm_heart_beat()
+ *
+ * @osb: ocfs super block for the volume
+ * @flag: type of heart beat
+ * @read_publish: if the publish sector needs to be re-read
+ *
+ * Updates the timestamp in the nodes publish sector.
+ * NOTE: must be called while holding publish_lock!
+ *
+ * Returns 0 if success, < 0 if error.
+ */
+int ocfs_nm_heart_beat (ocfs_super * osb, __u32 flag, int read_publish)
{
- spin_lock_init(&osb->node_map_lock);
- ocfs_node_map_init(osb, &osb->mounted_map);
- ocfs_node_map_init(osb, &osb->recovery_map);
- ocfs_node_map_init(osb, &osb->umount_map);
-}
+ int status = 0;
+ ocfs_publish *publish = NULL;
+ int publish_idx = OCFS_VOLCFG_NEWCFG_SECTORS + osb->node_num;
+ struct buffer_head **pub_bh = &osb->autoconfig_bhs[publish_idx];
-static void ocfs2_hb_node_down_cb(struct inode *group,
- struct inode *node,
- int node_num,
- void *data)
-{
- ocfs_super *osb = data;
+ LOG_ENTRY_ARGS ("(0x%p, %u, %s)\n", osb, flag,
+ read_publish ? "true" : "false");
- if (osb->group_inode != group)
- return;
+ if (flag & HEARTBEAT_METHOD_DISK) {
+ if (pub_bh == NULL && !read_publish)
+ BUG();
- OCFS_ASSERT(osb->node_num != node_num);
+ if (read_publish) {
+ status = ocfs_read_block(osb,
+ (osb->publish_blkno + osb->node_num),
+ pub_bh, 0, NULL);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto finally;
+ }
+ }
- printk("ocfs2: node down event for %d\n", node_num);
+ publish = (ocfs_publish *) (*pub_bh)->b_data;
+ if ((publish->dirty) && (!osb->publish_dirty)) {
+ LOG_TRACE_STR(("NMThread reads the bit as dirty"));
+ publish->dirty = 0;
+ }
+ /* Write the current time in local node's publish sector */
+ publish->time = jiffies;
+ /* Dissallow 0 */
+ if (!publish->time)
+ publish->time = 1;
+ spin_lock (&OcfsGlobalCtxt.comm_seq_lock);
+ publish->comm_seq_num = OcfsGlobalCtxt.comm_seq_num;
+ spin_unlock (&OcfsGlobalCtxt.comm_seq_lock);
- if (ocfs_node_map_test_bit(osb, &osb->umount_map, node_num)) {
- /* If a node is in the umount map, then we've been
- * expecting him to go down and we know ahead of time
- * that recovery is not necessary. */
- ocfs_node_map_clear_bit(osb, &osb->umount_map, node_num);
- return;
+ status = ocfs_write_block(osb, *pub_bh, NULL);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto finally;
+ }
}
- ocfs_recovery_thread(osb, node_num);
-}
+ if (flag & HEARTBEAT_METHOD_IPC) {
+ /* Plug this in later... */
+ }
-static void ocfs2_hb_node_up_cb(struct inode *group,
- struct inode *node,
- int node_num,
- void *data)
-{
- ocfs_super *osb = data;
+finally:
+ LOG_EXIT_STATUS (status);
+ return status;
+} /* ocfs_nm_heart_beat */
- if (osb->group_inode != group)
- return;
- OCFS_ASSERT(osb->node_num != node_num);
-
- printk("ocfs2: node up event for %d\n", node_num);
- ocfs_node_map_clear_bit(osb, &osb->umount_map, node_num);
-}
-
-/* Most functions here are just stubs for now... */
-int ocfs2_register_hb_callbacks(ocfs_super *osb)
+/*
+ * ocfs_update_publish_map()
+ *
+ * @osb: ocfs super block for the volume
+ * @buffer: publish sectors read in the last round
+ * @first_time: if true, the buffer needs to be initialized
+ *
+ * Reads the publish sectors and compares the timestamp of each node
+ * to the one it read in the last round. As long as the timestamp keeps
+ * changing, the node is marked alive. Conversely, if the timestamp does
+ * not change over time, the node is marked dead. The function marks all
+ * the live nodes in the publishmap.
+ *
+ */
+void ocfs_update_publish_map (ocfs_super * osb, struct buffer_head *bhs[], int first_time)
{
- int status;
+ ocfs_publish *publish;
+ ocfs_vol_node_map *node_map;
+ int i;
+ __u16 num_nodes;
- status = hb_register_callback(HB_NODE_DOWN_CB,
- ocfs2_hb_node_down_cb,
- osb,
- OCFS2_HB_NODE_DOWN_PRI);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
+ LOG_ENTRY_ARGS ("(0x%p, 0x%p, %u)\n", osb, bhs, first_time);
- status = hb_register_callback(HB_NODE_UP_CB,
- ocfs2_hb_node_up_cb,
- osb,
- OCFS2_HB_NODE_UP_PRI);
- if (status < 0)
- LOG_ERROR_STATUS(status);
+ num_nodes = osb->max_nodes;
+ node_map = osb->vol_node_map;
-bail:
- return status;
-}
+ /* First time thru, update buffer with timestamps for all nodes */
+ if (first_time) {
+ /* Read the last comm_seq_num */
+ publish = (ocfs_publish *) bhs[osb->node_num]->b_data;
+ spin_lock (&OcfsGlobalCtxt.comm_seq_lock);
+ OcfsGlobalCtxt.comm_seq_num = publish->comm_seq_num + 10;
+ spin_unlock (&OcfsGlobalCtxt.comm_seq_lock);
-void ocfs2_clear_hb_callbacks(ocfs_super *osb)
-{
- int status;
+ /* Refresh local buffers */
+ for (i = 0; i < num_nodes; i++) {
+ publish = (ocfs_publish *) bhs[i]->b_data;
+ node_map[i].time = publish->time;
+ if (publish->mounted && i != osb->node_num) {
+ printk("ocfs2: Adding %s (node %d) to "
+ "clustered device (%u,%u)\n",
+ osb->node_cfg_info[i]->node_name, i,
+ MAJOR(osb->sb->s_dev),
+ MINOR(osb->sb->s_dev));
+ node_map[i].miss_cnt = 0;
+ ocfs_publish_map_set(&osb->publ_map, i);
+ }
+ }
+ goto bail; /* exit */
+ }
- status = hb_unregister_callback(HB_NODE_DOWN_CB,
- ocfs2_hb_node_down_cb, osb);
- if (status < 0)
- LOG_ERROR_STATUS(status);
+ for (i = 0; i < num_nodes; i++) {
+ publish = (ocfs_publish *) bhs[i]->b_data;
- status = hb_unregister_callback(HB_NODE_UP_CB,
- ocfs2_hb_node_up_cb, osb);
- if (status < 0)
- LOG_ERROR_STATUS(status);
+ /* Loop if slot is unused */
+ if (publish->time == 0ULL)
+ continue;
-}
+ /* Check if the node is hung or not by comparing the disk */
+ /* and memory timestamp values */
+ if (node_map[i].time == publish->time) {
+ if (ocfs_node_is_alive(&osb->publ_map, i)) {
+ char *err_msg = NULL;
+ if (atomic_read (&(node_map[i].dismount))) {
+ node_map[i].miss_cnt = MISS_COUNT_NODE_DEAD;
+ atomic_set (&(node_map[i].dismount), 0);
+ ocfs_publish_map_clear(&osb->publ_map, i);
+ err_msg = "Received dismount message. Removing %s "
+ "(node %d) from clustered device (%u,%u).\n";
+ } else {
+ (node_map[i].miss_cnt)++;
-/* special case -1 for now
- * TODO: should *really* make sure the calling func never passes -1!! */
-void ocfs_node_map_init(ocfs_super *osb,
- ocfs_node_map *map)
-{
- map->num_nodes = osb->max_nodes;
- memset(map->map, 0, BITS_TO_LONGS(OCFS_NODE_MAP_MAX_NODES) *
- sizeof(unsigned long));
-}
+ if (node_map[i].miss_cnt == MISS_COUNT_WARNING)
+ err_msg = "warning: %s (node %d) may be ejected from cluster "
+ "on device (%u.%u)... %d misses so far\n";
+ else if (node_map[i].miss_cnt == MISS_COUNT_EMERGENCY)
+ err_msg = "warning: %s (node %d) WILL BE EJECTED from cluster "
+ "on device (%u.%u)... %d misses so far\n";
+ else if (node_map[i].miss_cnt >= MISS_COUNT_NODE_DEAD)
+ err_msg = "Removing %s (node %d) from clustered device "
+ "(%u,%u) after %d misses\n";
+ }
-static inline void __ocfs_node_map_set_bit(ocfs_node_map *map,
- int bit)
-{
- set_bit(bit, map->map);
-}
+ if (err_msg)
+ LOG_ERROR_ARGS(err_msg, osb->node_cfg_info[i]->node_name, i,
+ MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev),
+ node_map[i].miss_cnt);
+ if (node_map[i].miss_cnt >= MISS_COUNT_NODE_DEAD) {
+ ocfs_recovery_map_set(osb, i);
+ ocfs_publish_map_clear(&osb->publ_map, i);
-void ocfs_node_map_set_bit(ocfs_super *osb,
- ocfs_node_map *map,
- int bit)
-{
- if (bit==-1)
- return;
- OCFS_ASSERT(bit < map->num_nodes);
- spin_lock(&osb->node_map_lock);
- __ocfs_node_map_set_bit(map, bit);
- spin_unlock(&osb->node_map_lock);
-}
+ /* Ok, we'd better recover him now...*/
+ ocfs_recovery_thread(osb, i);
+ }
+ }
+ } else {
+ if (!ocfs_node_is_alive(&osb->publ_map, i) &&
+ (osb->node_num != i))
+ printk ("ocfs2: Adding %s (node %d) to clustered device (%u,%u)\n",
+ osb->node_cfg_info[i]->node_name, i,
+ MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
+ node_map[i].miss_cnt = 0;
+ node_map[i].time = publish->time;
+ ocfs_publish_map_set(&osb->publ_map, i);
-static inline void __ocfs_node_map_clear_bit(ocfs_node_map *map,
- int bit)
-{
- clear_bit(bit, map->map);
-}
-
-void ocfs_node_map_clear_bit(ocfs_super *osb,
- ocfs_node_map *map,
- int bit)
-{
- if (bit==-1)
- return;
- OCFS_ASSERT(bit < map->num_nodes);
- spin_lock(&osb->node_map_lock);
- __ocfs_node_map_clear_bit(map, bit);
- spin_unlock(&osb->node_map_lock);
-}
-
-int ocfs_node_map_test_bit(ocfs_super *osb,
- ocfs_node_map *map,
- int bit)
-{
- int ret;
- if (bit >= map->num_nodes) {
- LOG_ERROR_ARGS("bit=%d map->num_nodes=%d\n", bit, map->num_nodes);
- BUG();
+ }
}
- spin_lock(&osb->node_map_lock);
- ret = test_bit(bit, map->map);
- spin_unlock(&osb->node_map_lock);
- return ret;
-}
-static inline int __ocfs_node_map_is_empty(ocfs_node_map *map)
-{
- int bit;
- bit = find_next_bit(map->map, map->num_nodes, 0);
- if (bit < map->num_nodes)
- return 0;
- return 1;
-}
+bail:
+ LOG_EXIT ();
+ return;
+} /* ocfs_update_publish_map */
-int ocfs_node_map_is_empty(ocfs_super *osb,
- ocfs_node_map *map)
-{
- int ret;
- OCFS_ASSERT(map->num_nodes > 0);
- spin_lock(&osb->node_map_lock);
- ret = __ocfs_node_map_is_empty(map);
- spin_unlock(&osb->node_map_lock);
- return ret;
-}
-static void __ocfs_node_map_dup(ocfs_super *osb,
- ocfs_node_map *target,
- ocfs_node_map *from)
-{
- OCFS_ASSERT(from->num_nodes > 0);
- ocfs_node_map_init(osb, target);
- __ocfs_node_map_set(target, from);
-}
+/* half a second timeout */
+#define OCFS_HEARTBEAT_JIFFIES (HZ >> 1)
-/* returns 1 if bit is the only bit set in target, 0 otherwise */
-int ocfs_node_map_is_only(ocfs_super *osb,
- ocfs_node_map *target,
- int bit)
+/*
+ * ocfs_heartbeat_thread()
+ *
+ * This function is executed as a kernel thread for each mounted ocfs volume.
+ */
+int ocfs_heartbeat_thread (void *arg)
{
- ocfs_node_map temp;
- int ret;
+ ocfs_super *osb;
+ char proc[16];
+ int status = 0;
+ __u8 *buffer = NULL;
+ ocfs_publish *publish;
+ unsigned long j;
+ __u16 num_nodes = 0;
+ ocfs_node_config_hdr *node_cfg_hdr = NULL;
+ __u64 cfg_seq_num;
+ struct buffer_head *bh = NULL;
+ siginfo_t info;
- spin_lock(&osb->node_map_lock);
- __ocfs_node_map_dup(osb, &temp, target);
- __ocfs_node_map_clear_bit(&temp, bit);
- ret = __ocfs_node_map_is_empty(&temp);
- spin_unlock(&osb->node_map_lock);
+ LOG_ENTRY ();
- return ret;
-}
+ osb = arg;
-static void __ocfs_node_map_set(ocfs_node_map *target,
- ocfs_node_map *from)
-{
- int num_longs, i;
+ sprintf (proc, "ocfs2nm-%d", osb->osb_id);
+ ocfs_daemonize (proc, strlen(proc), 1);
- OCFS_ASSERT(target->num_nodes == from->num_nodes);
- OCFS_ASSERT(target->num_nodes > 0);
+ osb->dlm_task = current;
- num_longs = BITS_TO_LONGS(target->num_nodes);
- for (i=0; i<num_longs; i++)
- target->map[i] = from->map[i];
-}
+ osb->hbt = OCFS_HEARTBEAT_JIFFIES + jiffies;
+ /* The delay changes based on multiplier */
+ while (!(OcfsGlobalCtxt.flags & OCFS_FLAG_SHUTDOWN_VOL_THREAD) &&
+ !(osb->osb_flags & OCFS_OSB_FLAGS_BEING_DISMOUNTED)) {
-void ocfs_recovery_map_set(ocfs_super *osb,
- int num)
-{
- spin_lock(&osb->node_map_lock);
- __ocfs_node_map_clear_bit(&osb->mounted_map, num);
- __ocfs_node_map_set_bit(&osb->recovery_map, num);
- spin_unlock(&osb->node_map_lock);
-}
+ buffer = NULL;
-void ocfs_recovery_map_clear(ocfs_super *osb,
- int num)
-{
- ocfs_node_map_clear_bit(osb, &osb->recovery_map, num);
-}
+ if (!time_after (jiffies, (unsigned long) (osb->hbt)))
+ goto again;
-int ocfs_node_map_iterate(ocfs_super *osb,
- ocfs_node_map *map,
- int idx)
-{
- int i = idx;
+ /* lock publish to prevent overwrites from vote_req and vote_reset */
+ down (&(osb->publish_lock));
- idx = OCFS_INVALID_NODE_NUM;
- spin_lock(&osb->node_map_lock);
- if ((i != OCFS_INVALID_NODE_NUM) &&
- (i >= 0) &&
- (i < map->num_nodes)) {
- while(i < map->num_nodes) {
- if (test_bit(i, map->map)) {
- idx = i;
- break;
- }
- i++;
+ /* Read disk for 4 autoconfig blocks + all nodes publish blocks */
+ status = ocfs_read_blocks(osb,
+ osb->new_autoconfig_blkno,
+ osb->total_autoconfig_blocks,
+ osb->autoconfig_bhs, 0, NULL);
+ if (status < 0) {
+ up (&(osb->publish_lock));
+ LOG_ERROR_STATUS (status);
+ BUG();
}
- }
- spin_unlock(&osb->node_map_lock);
- return idx;
-}
-#if 0
-/* unused (for now) node map functions. */
+ bh = osb->autoconfig_bhs[OCFS_VOLCFG_NEWCFG_SECTORS + osb->node_num];
+ publish = (ocfs_publish *) bh->b_data;
+ if ((osb->check_mounted) && (publish->mounted == 0)) {
+ printk("ocfs2: Heartbeat timed out, volume has been "
+ "recovered from another node!\n");
-/* uses the heartbeat api to test whether a given global node num is
- * heartbeating. Warning: this function can sleep in
- * hb_fill_node_map() */
-int ocfs2_is_node_alive(ocfs_super *osb,
- unsigned int node_num)
-{
- int ret;
- ocfs_node_map tmpmap;
+ BUG();
+ }
+ bh = NULL;
- ocfs_node_map_init(osb, &tmpmap);
+ ocfs_nm_heart_beat (osb, HEARTBEAT_METHOD_DISK, 0);
- ret = hb_fill_node_map(osb->group_inode, &tmpmap, sizeof(tmpmap.map));
- if (ret < 0) {
- LOG_ERROR_STATUS(ret);
- goto bail;
- }
+ /* release publish lock */
+ up (&(osb->publish_lock));
- ret = ocfs_node_map_test_bit(osb, &tmpmap, node_num);
+ /* If another node was added to the config read and update the cfg */
+ node_cfg_hdr =
+ (ocfs_node_config_hdr *) osb->autoconfig_bhs[1]->b_data;
+ num_nodes = node_cfg_hdr->num_nodes;
+ cfg_seq_num = node_cfg_hdr->cfg_seq_num;
-bail:
- return ret;
-}
+ if ((osb->cfg_seq_num != cfg_seq_num) ||
+ (osb->num_cfg_nodes != num_nodes)) {
+ down (&(osb->cfg_lock));
+ status = ocfs_chk_update_config (osb);
+ up (&(osb->cfg_lock));
+ if (status < 0)
+ LOG_ERROR_STATUS (status);
+ }
-static int ocfs_node_map_stringify(ocfs_node_map *map, char **str)
-{
- int i, n;
- char *s;
+ num_nodes = osb->max_nodes;
- OCFS_ASSERT(map->num_nodes > 0);
+ /* Refresh the publish map */
+ ocfs_update_publish_map (osb, &(osb->autoconfig_bhs[OCFS_VOLCFG_NEWCFG_SECTORS]), 0);
- *str = kmalloc( strlen("123 ") * map->num_nodes, GFP_KERNEL);
- if (!(*str))
- return -ENOMEM;
-
- memset(*str, 0, strlen("123 ") * map->num_nodes);
-
- s = *str;
- for (i=0; i<map->num_nodes; i++) {
- if (ocfs_node_map_test_bit(map, i)) {
- n = sprintf(s, "%3d ", i);
- if (n != strlen("123 ")) {
- kfree(*str);
- return -ENOMEM;
- }
- s += n;
+ /* send signal to mount thread to continue */
+ if (atomic_read (&osb->nm_init) < OCFS_HEARTBEAT_INIT) {
+ atomic_inc (&osb->nm_init);
+ } else if (atomic_read(&osb->nm_init) == OCFS_HEARTBEAT_INIT) {
+ wake_up (&osb->nm_init_event);
+ atomic_inc (&osb->nm_init);
}
- }
- return 0;
-}
-void ocfs_node_map_and(ocfs_node_map *target, ocfs_node_map *mask)
-{
- int num_longs, i;
+ osb->hbt = OCFS_HEARTBEAT_JIFFIES + jiffies;
- OCFS_ASSERT(target->num_nodes == mask->num_nodes);
- OCFS_ASSERT(target->num_nodes > 0);
+again:
+ status = 0;
- num_longs = BITS_TO_LONGS(target->num_nodes);
- for (i=0; i<num_longs; i++)
- target->map[i] &= mask->map[i];
-}
+ if ((OcfsGlobalCtxt.flags & OCFS_FLAG_SHUTDOWN_VOL_THREAD) ||
+ (osb->osb_flags & OCFS_OSB_FLAGS_BEING_DISMOUNTED))
+ break;
+ j = jiffies;
+ if (time_after (j, (unsigned long) (osb->hbt))) {
+ osb->hbt = OCFS_HEARTBEAT_JIFFIES + j;
+ }
+ set_current_state (TASK_INTERRUPTIBLE);
+ schedule_timeout (osb->hbt - j);
-int ocfs_node_map_is_equal(ocfs_node_map *map1, ocfs_node_map *map2)
-{
- int num_longs, i;
-
- OCFS_ASSERT(map1->num_nodes == map2->num_nodes);
- OCFS_ASSERT(map1->num_nodes > 0);
-
- num_longs = BITS_TO_LONGS(map1->num_nodes);
- for (i=0; i<num_longs; i++) {
- if (map1->map[i] != map2->map[i])
- return 0;
+ /* ignore the actual signal */
+ if (signal_pending(current)) {
+ dequeue_signal_lock(current, ¤t->blocked, &info);
+ }
}
- return 1;
-}
+ /* Flush all scheduled tasks */
+ flush_scheduled_work();
-// clear all the bits in "target" which are set in "mask"
-static void __ocfs_node_map_clear_bits(ocfs_node_map *target,
- ocfs_node_map *mask)
-{
- int bit, prev=0;
- while (1) {
- bit = find_next_bit (mask->map, mask->num_nodes, prev);
- if (bit >= mask->num_nodes)
- break;
- ocfs_node_map_clear_bit(target, bit);
- prev = bit+1;
- }
-}
+ complete (&(osb->dlm_complete));
-// set all the bits in "target" which are set in "mask"
-void __ocfs_node_map_set_bits(ocfs_node_map *target,
- ocfs_node_map *mask)
-{
- int bit, prev=0;
- while (1) {
- bit = find_next_bit (mask->map, mask->num_nodes, prev);
- if (bit >= mask->num_nodes)
- break;
- ocfs_node_map_set_bit(target, bit);
- prev = bit+1;
- }
-}
-#endif
-
+ LOG_EXIT_INT (0);
+ return 0;
+} /* ocfs_heartbeat_thread */
Modified: branches/endian/src/heartbeat.h
===================================================================
--- branches/endian/src/heartbeat.h 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/heartbeat.h 2005-01-18 21:41:46 UTC (rev 1789)
@@ -26,40 +26,9 @@
#ifndef OCFS2_HEARTBEAT_H
#define OCFS2_HEARTBEAT_H
-void ocfs2_init_node_maps(ocfs_super *osb);
+int ocfs_nm_heart_beat(ocfs_super *osb, __u32 flag, int read_publish);
+void ocfs_update_publish_map(ocfs_super *osb, struct buffer_head *bhs[],
+ int first_time);
+int ocfs_heartbeat_thread(void *arg);
-int ocfs2_register_hb_callbacks(ocfs_super *osb);
-void ocfs2_clear_hb_callbacks(ocfs_super *osb);
-
-/* node map functions - used to keep track of mounted and in-recovery
- * nodes. */
-void ocfs_node_map_init(ocfs_super *osb, ocfs_node_map *map);
-int ocfs_node_map_is_empty(ocfs_super *osb,
- ocfs_node_map *map);
-void ocfs_node_map_set_bit(ocfs_super *osb,
- ocfs_node_map *map,
- int bit);
-void ocfs_node_map_clear_bit(ocfs_super *osb,
- ocfs_node_map *map,
- int bit);
-int ocfs_node_map_test_bit(ocfs_super *osb,
- ocfs_node_map *map,
- int bit);
-int ocfs_node_map_iterate(ocfs_super *osb,
- ocfs_node_map *map,
- int idx);
-static inline int ocfs_node_map_first_set_bit(ocfs_super *osb,
- ocfs_node_map *map)
-{
- return ocfs_node_map_iterate(osb, map, 0);
-}
-void ocfs_recovery_map_set(ocfs_super *osb,
- int num);
-void ocfs_recovery_map_clear(ocfs_super *osb,
- int num);
-/* returns 1 if bit is the only bit set in target, 0 otherwise */
-int ocfs_node_map_is_only(ocfs_super *osb,
- ocfs_node_map *target,
- int bit);
-
#endif /* OCFS2_HEARTBEAT_H */
Modified: branches/endian/src/inode.c
===================================================================
--- branches/endian/src/inode.c 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/inode.c 2005-01-18 21:41:46 UTC (rev 1789)
@@ -39,10 +39,11 @@
#include "ocfs2.h"
#include "alloc.h"
-#include "dlmglue.h"
+#include "dlm.h"
#include "extent_map.h"
#include "file.h"
#include "inode.h"
+#include "lockres.h"
#include "namei.h"
#include "suballoc.h"
#include "super.h"
@@ -56,6 +57,7 @@
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_INODE
+extern struct semaphore recovery_list_sem;
extern struct address_space_operations ocfs_aops;
typedef struct _ocfs_find_inode_args
@@ -159,7 +161,7 @@
LOG_EXIT_PTR(inode);
- return inode;
+ return(inode);
} /* ocfs_iget */
@@ -215,7 +217,7 @@
inode->i_ino = args->ino;
LOG_EXIT_STATUS(0);
- return 0;
+ return(0);
}
#else /* 2.4 kernel stuff */
@@ -305,23 +307,27 @@
i->ip_flags = 0;
atomic_set(&i->ip_clean_buffer_seq, 0);
+ init_rwsem(&(i->ip_node_extend_sem));
i->ip_open_cnt = 0;
spin_lock_init(&i->ip_lock);
ocfs2_extent_map_init(inode);
+ INIT_LIST_HEAD(&i->ip_recovery_list);
INIT_LIST_HEAD(&i->ip_handle_list);
- INIT_LIST_HEAD(&i->ip_io_markers);
i->ip_handle = NULL;
i->ip_next_orphan = NULL;
init_rwsem(&i->ip_alloc_sem);
init_MUTEX(&(i->ip_io_sem));
+ atomic_set(&i->ip_needs_verification, 0);
+ INIT_LIST_HEAD(&i->ip_pending_locks);
+ INIT_LIST_HEAD(&i->ip_j_inode);
/* These should be set in read_inode2. */
i->ip_clusters = 0;
i->ip_blkno = 0ULL;
i->ip_mmu_private = 0ULL;
- i->ip_flags |= OCFS_INODE_INITIALIZED;
+ OCFS_SET_FLAG (i->ip_flags, OCFS_INODE_INITIALIZED);
return 0;
} /* ocfs_inode_init_private */
@@ -336,30 +342,21 @@
ocfs_super *osb;
int status = -EINVAL;
- LOG_ENTRY_ARGS ("(0x%p, size:%llu)\n", inode, fe->i_size);
+ LOG_ENTRY_ARGS ("(0x%p, size:%llu)\n", inode, le64_to_cpu(fe->bryce_i_size));
sb = inode->i_sb;
osb = OCFS_SB(sb);
// this means that read_inode cannot create a superblock
// inode today. change if needed.
- if (!IS_VALID_FILE_ENTRY(fe) || !(fe->i_flags & OCFS2_VALID_FL)) {
+ if (!IS_VALID_FILE_ENTRY(fe) || !(le32_to_cpu(fe->bryce_i_flags) & OCFS2_VALID_FL)) {
LOG_ERROR_ARGS("invalid file entry! i_ino=%lu, "
"fe->i_blkno=%llu, valid_flag = %d\n",
- inode->i_ino, fe->i_blkno,
- (fe->i_flags & OCFS2_VALID_FL));
+ inode->i_ino, le64_to_cpu(fe->bryce_i_blkno),
+ (le32_to_cpu(fe->bryce_i_flags) & OCFS2_VALID_FL));
goto bail;
}
- if (le32_to_cpu(fe->i_fs_generation) != osb->fs_generation) {
- LOG_ERROR_ARGS("file entry generation does not match "
- "superblock! osb->fs_generation=%x, "
- "fe->i_fs_generation=%x\n",
- osb->fs_generation,
- le32_to_cpu(fe->i_fs_generation));
- goto bail;
- }
-
if (!inode->u.generic_ip && ocfs_inode_init_private(inode)) {
/* How can we recover gracefully? */
LOG_ERROR_STR("unable to allocate private data for "
@@ -368,47 +365,39 @@
}
inode->i_version = 1;
- inode->i_generation = le32_to_cpu(fe->i_generation);
- inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
- inode->i_mode = fe->i_mode;
- inode->i_uid = fe->i_uid;
- inode->i_gid = fe->i_gid;
+ inode->i_generation = le32_to_cpu(fe->bryce_i_generation);
+ inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.bryce_i_rdev));
+ inode->i_mode = le16_to_cpu(fe->bryce_i_mode);
+ inode->i_uid = le32_to_cpu(fe->bryce_i_uid);
+ inode->i_gid = le32_to_cpu(fe->bryce_i_gid);
inode->i_blksize = (u32)osb->s_clustersize; // sb->s_blocksize;
-
- /* Fast symlinks will have i_size but no allocated clusters. */
- if (S_ISLNK(inode->i_mode) && !fe->i_clusters)
- inode->i_blocks = 0;
- else
- inode->i_blocks = (fe->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
+ inode->i_blocks = (le64_to_cpu(fe->bryce_i_size) + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
inode->i_mapping->a_ops = &ocfs_aops;
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
inode->i_attr_flags |= ATTR_FLAG_NOATIME;
#endif
inode->i_flags |= S_NOATIME;
- OCFS_SET_INODE_TIME(inode, i_atime, fe->i_atime);
- OCFS_SET_INODE_TIME(inode, i_mtime, fe->i_mtime);
- OCFS_SET_INODE_TIME(inode, i_ctime, fe->i_ctime);
+ OCFS_SET_INODE_TIME(inode, i_atime, le64_to_cpu(fe->bryce_i_atime));
+ OCFS_SET_INODE_TIME(inode, i_mtime, le64_to_cpu(fe->bryce_i_mtime));
+ OCFS_SET_INODE_TIME(inode, i_ctime, le64_to_cpu(fe->bryce_i_ctime));
- OCFS_I(inode)->ip_blkno = fe->i_blkno;
+ OCFS_I(inode)->ip_blkno = le64_to_cpu(fe->bryce_i_blkno);
- OCFS_I(inode)->ip_clusters = fe->i_clusters;
+ OCFS_I(inode)->ip_clusters = le32_to_cpu((fe->bryce_i_clusters));
OCFS_I(inode)->ip_inode = inode;
if (create_ino)
- inode->i_ino = ino_from_blkno(inode->i_sb, fe->i_blkno);
+ inode->i_ino = ino_from_blkno(inode->i_sb, le64_to_cpu(fe->bryce_i_blkno));
LOG_TRACE_ARGS("blkno = %llu, ino = %lu, create_ino = %s\n",
- fe->i_blkno, inode->i_ino,
+ le64_to_cpu(fe->bryce_i_blkno), inode->i_ino,
create_ino ? "true" : "false");
- inode->i_nlink = fe->i_links_count;
+ inode->i_nlink = le16_to_cpu(fe->bryce_i_links_count);
- if (fe->i_flags & OCFS2_LOCAL_ALLOC_FL) {
- OCFS_I(inode)->ip_flags |= OCFS_INODE_BITMAP;
+ if (le32_to_cpu(fe->bryce_i_flags) & OCFS2_LOCAL_ALLOC_FL) {
LOG_TRACE_ARGS("local alloc inode: i_ino=%lu\n", inode->i_ino);
- } else if (fe->i_flags & OCFS2_BITMAP_FL) {
- OCFS_I(inode)->ip_flags |= OCFS_INODE_BITMAP;
- } else if (fe->i_flags & OCFS2_SUPER_BLOCK_FL) {
+ } else if (le32_to_cpu(fe->bryce_i_flags) & OCFS2_SUPER_BLOCK_FL) {
LOG_TRACE_ARGS("superblock inode: i_ino=%lu\n", inode->i_ino);
// we can't actually hit this as read_inode can't handle
// superblocks today ;-)
@@ -420,23 +409,20 @@
atomic_set(GET_INODE_CLEAN_SEQ(inode), atomic_read(&osb->clean_buffer_seq));
inode->i_fop = &ocfs_fops;
inode->i_op = &ocfs_file_iops;
- i_size_write(inode, fe->i_size);
+ inode->i_size = le64_to_cpu(fe->bryce_i_size);
OCFS_I(inode)->ip_mmu_private = inode->i_size;
break;
case S_IFDIR:
atomic_set(GET_INODE_CLEAN_SEQ(inode), atomic_read(&osb->clean_buffer_seq));
inode->i_op = &ocfs_dir_iops;
inode->i_fop = &ocfs_dops;
- i_size_write(inode, fe->i_size);
+ inode->i_size = le64_to_cpu(fe->bryce_i_size);
break;
case S_IFLNK:
atomic_set(GET_INODE_CLEAN_SEQ(inode), atomic_read(&osb->clean_buffer_seq));
- if (ocfs2_inode_is_fast_symlink(inode))
- inode->i_op = &ocfs_fast_symlink_inode_operations;
- else
- inode->i_op = &ocfs_symlink_inode_operations;
+ inode->i_op = &ocfs_symlink_inode_operations;
//inode->i_fop = &ocfs_fops;
- i_size_write(inode, fe->i_size);
+ inode->i_size = le64_to_cpu(fe->bryce_i_size);
break;
default:
init_special_inode(inode, inode->i_mode,
@@ -444,16 +430,7 @@
break;
}
- status = ocfs2_inode_lock_res_init(&OCFS_I(inode)->ip_meta_lockres,
- OCFS_TYPE_META, inode);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
- status = ocfs2_inode_lock_res_init(&OCFS_I(inode)->ip_data_lockres,
- OCFS_TYPE_DATA, inode);
- if (status < 0)
- LOG_ERROR_STATUS(status);
+ status = 0;
bail:
LOG_EXIT_STATUS (status);
return status;
@@ -500,24 +477,30 @@
goto bail;
}
- sysfile = fe->i_flags & OCFS2_SYSTEM_FL;
- if (S_ISCHR(fe->i_mode) || S_ISBLK(fe->i_mode))
- inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
+ sysfile = le32_to_cpu(fe->bryce_i_flags) & OCFS2_SYSTEM_FL;
+ if (S_ISCHR(le16_to_cpu(fe->bryce_i_mode)) || S_ISBLK(le16_to_cpu(fe->bryce_i_mode)))
+ inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.bryce_i_rdev));
status = -EINVAL;
if (ocfs_populate_inode (inode, fe, 0) < 0) {
LOG_ERROR_ARGS("populate inode failed! i_blkno=%llu, i_ino=%lu\n",
- fe->i_blkno, inode->i_ino);
+ le64_to_cpu(fe->bryce_i_blkno), inode->i_ino);
make_bad_inode (inode);
goto bail;
}
- if (args->blkno != fe->i_blkno)
+ if (args->blkno != le64_to_cpu(fe->bryce_i_blkno))
BUG();
if (sysfile)
- OCFS_I(inode)->ip_flags |= OCFS_INODE_SYSTEM_FILE;
+ OCFS_SET_FLAG(OCFS_I(inode)->ip_flags, OCFS_INODE_SYSTEM_FILE);
+ ocfs_init_lockres (osb, inode);
+ status = ocfs_update_lockres(osb, bh, inode, 0);
+ if (status < 0) {
+ make_bad_inode(inode);
+ goto bail;
+ }
status = 0;
bail:
@@ -551,7 +534,7 @@
fe = (ocfs2_dinode *) fe_bh->b_data;
/* zero allocation, zero truncate :) */
- if (!fe->i_clusters)
+ if (!le32_to_cpu(fe->bryce_i_clusters))
goto bail;
handle = ocfs_start_trans(osb, handle, OCFS_INODE_UPDATE_CREDITS);
@@ -621,51 +604,31 @@
goto bail;
}
- spin_lock(&OCFS_I(inode)->ip_lock);
if (OCFS_I(inode)->ip_flags & OCFS_INODE_SKIP_DELETE) {
- spin_unlock(&OCFS_I(inode)->ip_lock);
LOG_TRACE_ARGS("Skipping delete of %lu because another node "
"has done this for us.\n", inode->i_ino);
goto bail;
}
- spin_unlock(&OCFS_I(inode)->ip_lock);
/* If we're coming from process_vote we can't go into our own
* voting [hello, deadlock city!], so unforuntately we just
* have to skip deleting this guy. That's OK though because
* the node who's doing the actual deleting should handle it
* anyway. */
- if (current == osb->vote_task) {
+ if (osb->voting_ino == inode->i_ino) {
LOG_TRACE_ARGS("Skipping delete of %lu because we're currently"
"in process_vote\n", inode->i_ino);
goto bail;
}
- /* ocfs2_meta_lock and friends might igrab / iput this guy, so we
+ /* acquire_lock and friends will igrab / iput this guy, so we
* take an extra ref. to avoid recursive calls to
* delete_inode. */
atomic_inc(&inode->i_count);
- status = ocfs2_meta_lock(inode, NULL, &fe_bh, 1);
- atomic_dec(&inode->i_count);
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, FLAG_FILE_DELETE,
+ &fe_bh, inode);
+ atomic_set(&inode->i_count, 0);
if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- /* While we were waiting for the lock, another node might have
- * asked to delete the inode. Recheck our flags to catch this
- * race and just clear_inode instead.*/
- spin_lock(&OCFS_I(inode)->ip_lock);
- if (OCFS_I(inode)->ip_flags & OCFS_INODE_SKIP_DELETE) {
- spin_unlock(&OCFS_I(inode)->ip_lock);
- LOG_TRACE_ARGS("Skipping delete of %lu because another node "
- "has done this for us.\n", inode->i_ino);
- goto bail;
- }
- spin_unlock(&OCFS_I(inode)->ip_lock);
-
- status = ocfs2_request_delete_vote(inode);
- if (status < 0) {
/* EBUSY here is assumed to mean that other nodes are
* still using the inode. We're done here though, so
* avoid doing anything on disk and let them worry
@@ -676,7 +639,7 @@
}
fe = (ocfs2_dinode *) fe_bh->b_data;
- if (!(fe->i_flags & OCFS2_ORPHANED_FL)) {
+ if (!(le32_to_cpu(fe->bryce_i_flags) & OCFS2_ORPHANED_FL)) {
/* for lack of a better error? */
status = -EEXIST;
LOG_ERROR_STATUS(status);
@@ -684,13 +647,13 @@
}
/* has someone already deleted us?! baaad... */
- if (fe->i_dtime) {
+ if (le64_to_cpu(fe->bryce_i_dtime)) {
status = -EEXIST;
LOG_ERROR_STATUS(status);
goto bail;
}
- if (fe->i_links_count) {
+ if (le16_to_cpu(fe->bryce_i_links_count)) {
status = -EBUSY;
LOG_ERROR_STATUS(status);
goto bail;
@@ -713,11 +676,14 @@
goto bail;
}
ocfs_handle_add_inode(handle, orphan_dir_inode);
- status = ocfs2_meta_lock(orphan_dir_inode, handle, &orphan_dir_bh, 1);
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, 0,
+ &orphan_dir_bh, orphan_dir_inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
}
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 0,
+ orphan_dir_inode);
/* we do this while holding the orphan dir lock because we
* don't want recovery being run from another node to vote for
@@ -729,18 +695,21 @@
goto bail;
}
- inode_alloc_inode = ocfs_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, le16_to_cpu(fe->i_suballoc_node));
+ inode_alloc_inode = ocfs_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, le16_to_cpu(fe->bryce_i_suballoc_node));
if (!inode_alloc_inode) {
status = -EEXIST;
LOG_ERROR_STATUS(status);
goto bail;
}
ocfs_handle_add_inode(handle, inode_alloc_inode);
- status = ocfs2_meta_lock(inode_alloc_inode, handle, &inode_alloc_bh, 1);
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, 0,
+ &inode_alloc_bh, inode_alloc_inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
}
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 0,
+ inode_alloc_inode);
handle = ocfs_start_trans(osb, handle, OCFS_DELETE_INODE_CREDITS);
if (handle == NULL) {
@@ -764,8 +733,8 @@
goto bail;
}
- fe->i_dtime = OCFS_CURRENT_TIME;
- fe->i_flags &= (~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL));
+ cpu_to_le64(fe->bryce_i_dtime) = OCFS_CURRENT_TIME;
+ cpu_to_le32(fe->bryce_i_flags) &= (~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL));
status = ocfs_journal_dirty(handle, fe_bh);
if (status < 0) {
@@ -775,12 +744,10 @@
status = ocfs2_free_dinode(handle, inode_alloc_inode,
inode_alloc_bh, fe);
- if (status < 0) {
+
+ if (status < 0)
LOG_ERROR_STATUS(status);
- goto bail;
- }
- SET_INODE_DELETED(inode);
bail:
if (handle)
ocfs_commit_trans(handle);
@@ -800,6 +767,7 @@
LOG_EXIT();
LOG_CLEAR_CONTEXT();
+ return;
} /* ocfs_delete_inode */
/*
@@ -808,7 +776,6 @@
*/
void ocfs_clear_inode (struct inode *inode)
{
- int status;
ocfs_super *osb;
LOG_SET_CONTEXT(CLEAR_INODE);
@@ -829,19 +796,11 @@
goto bail;
}
- /* We very well may get a clear_inode before all an inodes
- * metadata has hit disk. Of course, we can't drop any cluster
- * locks until the journal has finished with it. */
- if (!INODE_DELETED(inode))
- ocfs2_checkpoint_inode(inode);
+ OCFS_CLEAR_FLAG (OCFS_I(inode)->ip_flags, OCFS_INODE_INITIALIZED);
- OCFS_I(inode)->ip_flags &= ~OCFS_INODE_INITIALIZED;
-
if (OCFS_I(inode)->ip_blkno == -1)
BUG();
- OCFS_ASSERT(list_empty(&OCFS_I(inode)->ip_io_markers));
-
/* blkno == 0 if this inode is newly created and hasn't been
* filled in yet. */
if (OCFS_I(inode)->ip_blkno == 0) {
@@ -851,12 +810,10 @@
ocfs2_extent_map_drop(inode, 0);
- status = ocfs2_drop_inode_locks(inode);
- if (status < 0)
- LOG_ERROR_STATUS(status);
+ down(&recovery_list_sem);
+ list_del(&OCFS_I(inode)->ip_recovery_list);
+ up(&recovery_list_sem);
- ocfs2_lock_res_free(&OCFS_I(inode)->ip_meta_lockres);
- ocfs2_lock_res_free(&OCFS_I(inode)->ip_data_lockres);
/* clean out the inode private ... why?! */
memset(inode->u.generic_ip, 0, sizeof(ocfs_inode_private));
@@ -895,7 +852,7 @@
#endif
if (((u64)block << inode->i_sb->s_blocksize_bits) >=
- i_size_read(inode)) {
+ inode->i_size) {
OCFS_ASSERT(reada);
return NULL;
}
@@ -938,6 +895,7 @@
struct inode *inode = dentry->d_inode;
int status = 0;
ocfs_super *osb;
+ ocfs_lock_res *lockres;
LOG_SET_CONTEXT(REVALIDATE);
@@ -961,18 +919,33 @@
}
spin_unlock(&OCFS_I(inode)->ip_lock);
- status = ocfs2_meta_lock(inode, NULL, NULL, 0);
- if (status < 0) {
- if (status != -EINTR)
- LOG_ERROR_STATUS(status);
+ if (ocfs_node_map_is_only(osb, &osb->publ_map, osb->node_num)) {
+ LOG_TRACE_STR ("Only node alive.");
goto bail;
}
- ocfs2_meta_unlock(inode, 0);
+
+ lockres = GET_INODE_LOCKRES(inode);
+ /* if I hold cache lock, no revalidate needed */
+ ocfs_acquire_lockres_read(inode);
+ if (ocfs_is_local_cache_lock(osb, inode)) {
+ ocfs_release_lockres_read(inode);
+ LOG_TRACE_STR("local cache lock\n");
+ goto bail;
+ }
+ ocfs_release_lockres_read(inode);
+
+ atomic_set(&OCFS_I(inode)->ip_needs_verification, 1);
+ status = ocfs_verify_update_inode(osb, inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ status = -ENOENT;
+ }
+
bail:
LOG_EXIT_STATUS(status);
LOG_CLEAR_CONTEXT();
- return status;
+ return(status);
} /* ocfs_inode_revalidate */
/*
@@ -1000,29 +973,32 @@
}
spin_lock(&OCFS_I(inode)->ip_lock);
- fe->i_clusters = OCFS_I(inode)->ip_clusters;
+ cpu_to_le32(fe->bryce_i_clusters) = OCFS_I(inode)->ip_clusters;
spin_unlock(&OCFS_I(inode)->ip_lock);
- fe->i_size = (u64)i_size_read(inode);
- fe->i_links_count = inode->i_nlink;
- fe->i_uid = inode->i_uid;
- fe->i_gid = inode->i_gid;
- fe->i_mode = inode->i_mode;
- fe->i_atime = ocfs_get_seconds(inode->i_atime);
- fe->i_ctime = ocfs_get_seconds(inode->i_ctime);
- fe->i_mtime = ocfs_get_seconds(inode->i_mtime);
+ cpu_to_le64(fe->bryce_i_size) = (u64)inode->i_size;
+ cpu_to_le16(fe->bryce_i_links_count) = inode->i_nlink;
+ cpu_to_le32(fe->bryce_i_uid) = inode->i_uid;
+ cpu_to_le32(fe->bryce_i_gid) = inode->i_gid;
+ cpu_to_le16(fe->bryce_i_mode) = inode->i_mode;
+ cpu_to_le64(fe->bryce_i_atime) = ocfs_get_seconds(inode->i_atime);
+ cpu_to_le64(fe->bryce_i_ctime) = ocfs_get_seconds(inode->i_ctime);
+ cpu_to_le64(fe->bryce_i_mtime) = ocfs_get_seconds(inode->i_mtime);
#warning "do we want to update these here?"
// fe->i_dtime = ocfs_get_seconds(inode->i_dtime);
+// fe->i_generation = inode->i_generation;
status = ocfs_journal_dirty(handle, bh);
- if (status < 0)
+ if (status < 0) {
LOG_ERROR_STATUS(status);
+ goto leave;
+ }
status = 0;
leave:
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
} /* ocfs_mark_inode_dirty */
/*
@@ -1031,33 +1007,156 @@
* Updates a struct inode from a disk inode.
* does no i/o, only takes ip_lock.
*/
-void ocfs_refresh_inode(struct inode *inode,
- ocfs2_dinode *fe)
+int ocfs_refresh_inode(struct inode *inode,
+ ocfs2_dinode *fe)
{
- ocfs_inode_private *oip = OCFS_I(inode);
+ int status = 0;
+ int drop_map = 0;
ocfs_super *osb = OCFS2_SB(inode->i_sb);
- spin_lock(&oip->ip_lock);
+ spin_lock(&OCFS_I(inode)->ip_lock);
- oip->ip_clusters = fe->i_clusters;
- i_size_write(inode, fe->i_size);
- if (S_ISREG(inode->i_mode)) {
- oip->ip_mmu_private = i_size_read(inode);
+ if (INODE_DELETED(inode)) {
+ LOG_TRACE_ARGS("Inode %llu was marked as deleted!",
+ OCFS_I(inode)->ip_blkno);
+ status = -ENOENT;
+ goto bail;
}
- inode->i_nlink = fe->i_links_count;
- inode->i_uid = fe->i_uid;
- inode->i_gid = fe->i_gid;
- inode->i_mode = fe->i_mode;
- inode->i_blksize = (u32) osb->s_clustersize;
- if (S_ISLNK(inode->i_mode) && !fe->i_clusters)
- inode->i_blocks = 0;
- else
- inode->i_blocks = (i_size_read(inode) + osb->sb->s_blocksize - 1) >> osb->sb->s_blocksize_bits;
- OCFS_SET_INODE_TIME(inode, i_ctime, fe->i_ctime);
- OCFS_SET_INODE_TIME(inode, i_atime, fe->i_atime);
- OCFS_SET_INODE_TIME(inode, i_mtime, fe->i_mtime);
- spin_unlock(&oip->ip_lock);
+ /* Add checks as needed */
+ if ((le64_to_cpu(fe->bryce_i_dtime)) || (!(le32_to_cpu(fe->bryce_i_flags) & OCFS2_VALID_FL))) {
+ if (le64_to_cpu(fe->bryce_i_dtime))
+ LOG_ERROR_ARGS("Inode %lu has dtime = %llu\n",
+ inode->i_ino, le64_to_cpu(fe->bryce_i_dtime));
+ else
+ LOG_TRACE_STR ("File Entry is invalid");
+
+ status = -ENOENT;
+ goto bail;
+ }
+
+ if (inode->i_generation != le32_to_cpu(fe->bryce_i_generation)) {
+ LOG_ERROR_ARGS("Inode %llu is stale! (%u, %u)\n",
+ OCFS_I(inode)->ip_blkno,
+ le32_to_cpu(inode->i_generation),
+ le32_to_cpu(fe->bryce_i_generation));
+ SET_INODE_DELETED(inode);
+ status = -ENOENT;
+ goto bail;
+ }
+
+ if ((OCFS_I(inode)->ip_clusters != le32_to_cpu(fe->bryce_i_clusters)) ||
+ (inode->i_size != le64_to_cpu(fe->bryce_i_size)) ||
+ inode->i_uid != le32_to_cpu(fe->bryce_i_uid) ||
+ inode->i_gid != le32_to_cpu(fe->bryce_i_gid) ||
+ inode->i_mode != le16_to_cpu(fe->bryce_i_mode) ||
+ inode->i_nlink != le16_to_cpu(fe->bryce_i_links_count)){
+
+ if (OCFS_I(inode)->ip_clusters > le32_to_cpu(fe->bryce_i_clusters)) {
+ LOG_TRACE_ARGS("destroying extent maps for %llu, "
+ "ip_clusters = %u, i_clusters = %u\n",
+ OCFS_I(inode)->ip_blkno,
+ OCFS_I(inode)->ip_clusters,
+ le32_to_cpu(fe->bryce_i_clusters));
+ drop_map = 1; /* Because we have the lock here */
+ }
+
+ LOG_TRACE_STR("Allocsize, filesize or seq no did not match");
+ OCFS_I(inode)->ip_clusters = le32_to_cpu(fe->bryce_i_clusters);
+ inode->i_size = le64_to_cpu(fe->bryce_i_size);
+ if (S_ISREG(inode->i_mode)) {
+ OCFS_I(inode)->ip_mmu_private = inode->i_size;
+ }
+ LOG_TRACE_ARGS("verifyupdate: setting nlink from %d to %d for %llu\n",
+ inode->i_nlink, le16_to_cpu(fe->bryce_i_links_count),
+ OCFS_I(inode)->ip_blkno);
+ inode->i_nlink = le16_to_cpu(fe->bryce_i_links_count);
+ inode->i_blocks = (inode->i_size + osb->sb->s_blocksize - 1) >> osb->sb->s_blocksize_bits;
+ inode->i_uid = le32_to_cpu(fe->bryce_i_uid);
+ inode->i_gid = le32_to_cpu(fe->bryce_i_gid);
+ inode->i_mode = le16_to_cpu(fe->bryce_i_mode);
+ inode->i_blksize = (__u32) osb->s_clustersize;
+ OCFS_SET_INODE_TIME(inode, i_ctime, le64_to_cpu(fe->bryce_i_ctime));
+ OCFS_SET_INODE_TIME(inode, i_atime, le64_to_cpu(fe->bryce_i_atime));
+ OCFS_SET_INODE_TIME(inode, i_mtime, le64_to_cpu(fe->bryce_i_mtime));
+
+ if (S_ISCHR(le16_to_cpu(fe->bryce_i_mode)) ||
+ S_ISBLK(le16_to_cpu(fe->bryce_i_mode)) ||
+ S_ISFIFO(le16_to_cpu(fe->bryce_i_mode)) ||
+ S_ISSOCK(le16_to_cpu(fe->bryce_i_mode))) {
+ inode->i_rdev = 0;
+ init_special_inode(inode, inode->i_mode,
+ huge_decode_dev(le64_to_cpu(fe->id1.dev1.bryce_i_rdev)));
+ }
+ }
+
+ atomic_set(&OCFS_I(inode)->ip_needs_verification, 0);
+bail:
+
+ spin_unlock(&OCFS_I(inode)->ip_lock);
+
+ if (drop_map)
+ ocfs2_extent_map_trunc(inode, le32_to_cpu(fe->bryce_i_clusters));
+
+ return(status);
} /* ocfs_refresh_inode */
+/*
+ * ocfs_verify_update_inode()
+ */
+int ocfs_verify_update_inode (ocfs_super * osb, struct inode * inode)
+{
+ int status = 0;
+ struct buffer_head *fe_bh = NULL;
+ ocfs2_dinode *fe;
+ /* We are setting the oin Updated flag in the end. */
+ LOG_ENTRY ();
+
+ OCFS_ASSERT (inode);
+
+ if (OCFS_I(inode)->ip_blkno == 0) {
+ LOG_ERROR_ARGS("inode 0x%lu has zero blkno\n", inode->i_ino);
+ status = -EINVAL;
+ goto leave;
+ }
+
+ spin_lock(&OCFS_I(inode)->ip_lock);
+ if (INODE_DELETED(inode)) {
+ spin_unlock(&OCFS_I(inode)->ip_lock);
+ LOG_TRACE_ARGS("Inode %llu was marked as deleted!",
+ OCFS_I(inode)->ip_blkno);
+ status = -ENOENT;
+ goto leave;
+ }
+ spin_unlock(&OCFS_I(inode)->ip_lock);
+
+ status = ocfs_read_block(osb, OCFS_I(inode)->ip_blkno, &fe_bh,
+ OCFS_BH_CACHED, inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto leave;
+ }
+
+ fe = (ocfs2_dinode *) fe_bh->b_data;
+
+ status = ocfs_refresh_inode(inode, fe);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto leave;
+ }
+
+ ocfs_acquire_lockres_write(inode);
+ status = ocfs_update_lockres (osb, fe_bh, inode, 0);
+ ocfs_release_lockres_write(inode);
+
+ status = 0;
+leave:
+
+ if (fe_bh)
+ brelse(fe_bh);
+
+ LOG_EXIT_STATUS (status);
+ return status;
+} /* ocfs_verify_update_inode */
+
Modified: branches/endian/src/inode.h
===================================================================
--- branches/endian/src/inode.h 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/inode.h 2005-01-18 21:41:46 UTC (rev 1789)
@@ -30,8 +30,8 @@
int *err, int reada);
void ocfs_clear_inode(struct inode *inode);
void ocfs_delete_inode(struct inode *inode);
-struct inode *ocfs_iget(ocfs_super *osb, u64 feoff);
-struct inode *ocfs_ilookup(ocfs_super *osb, u64 feoff);
+struct inode *ocfs_iget(ocfs_super *osb, __u64 feoff);
+struct inode *ocfs_ilookup(ocfs_super *osb, __u64 feoff);
int ocfs_inode_init_private(struct inode *inode);
int ocfs_inode_revalidate(struct dentry *dentry);
int ocfs_populate_inode(struct inode *inode, ocfs2_dinode *fe,
@@ -41,8 +41,9 @@
ssize_t ocfs_rw_direct(int rw, struct file *filp, char *buf,
size_t size, loff_t *offp);
void ocfs_sync_blockdev(struct super_block *sb);
-void ocfs_refresh_inode(struct inode *inode,
- ocfs2_dinode *fe);
+int ocfs_verify_update_inode(ocfs_super *osb, struct inode *inode);
+int ocfs_refresh_inode(struct inode *inode,
+ ocfs2_dinode *fe);
int ocfs_mark_inode_dirty(ocfs_journal_handle *handle,
struct inode *inode,
struct buffer_head *bh);
Modified: branches/endian/src/journal.c
===================================================================
--- branches/endian/src/journal.c 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/journal.c 2005-01-18 21:41:46 UTC (rev 1789)
@@ -35,14 +35,14 @@
#include "ocfs2.h"
#include "alloc.h"
-#include "dlmglue.h"
+#include "dlm.h"
#include "extent_map.h"
-#include "heartbeat.h"
#include "inode.h"
#include "journal.h"
#include "localalloc.h"
+#include "lockres.h"
#include "namei.h"
-#include "slot_map.h"
+#include "nm.h"
#include "super.h"
#include "util.h"
#include "vote.h"
@@ -55,42 +55,124 @@
spinlock_t trans_inc_lock = SPIN_LOCK_UNLOCKED;
+static int ocfs_reset_publish (ocfs_super * osb, __u64 node_num);
static int ocfs_force_read_journal(struct inode *inode);
static int ocfs_recover_node(struct _ocfs_super *osb, int node_num);
static int __ocfs_recovery_thread(void *arg);
static int ocfs_commit_cache (ocfs_super * osb);
static int ocfs_wait_on_mount(ocfs_super *osb);
-static int ocfs2_commit_thread_sleep(ocfs_super *osb);
-static void ocfs_handle_cleanup_locks(ocfs_journal *journal,
- ocfs_journal_handle *handle,
- int set_id);
+static void ocfs_handle_move_locks(ocfs_journal *journal,
+ ocfs_journal_handle *handle);
+static void ocfs_journal_optimize_lock_list(ocfs_journal *journal);
static void ocfs_commit_unstarted_handle(ocfs_journal_handle *handle);
-static int ocfs_journal_toggle_dirty(ocfs_super *osb,
- int dirty);
-static int ocfs2_trylock_journal(ocfs_super *osb,
- int slot_num);
+static void ocfs_journal_optimize_lock_list(ocfs_journal *journal)
+{
+ ocfs_journal_lock *lock = NULL;
+ ocfs_journal_lock *first = NULL; /* 1st update_oin release for
+ * an inode. */
+ struct inode *curr_inode = NULL;
+ struct list_head *p, *n;
+
+ /* 1st pass: optimize away all UPDATE_OIN messages into one.
+ * 2 types of UPDATE_OIN messages as of now: with size change
+ * (truncate / extend) or without size change. We will take
+ * *all* of them (regardless of type) and cram it into one
+ * message. */
+ curr_inode = NULL;
+ list_for_each_safe(p, n, &journal->checkpointing_locks) {
+ lock = list_entry(p, ocfs_journal_lock, lock_list);
+
+ if (lock->inode != curr_inode) {
+ /* Ok, new inode. */
+ first = NULL;
+
+ curr_inode = lock->inode;
+ }
+
+ /* if it's not an update oin then just continue */
+ if (!(lock->flags & FLAG_FILE_UPDATE_OIN))
+ continue;
+
+ if (!first) {
+ first = lock;
+ /* nothing else to do to the 1st update release. */
+ continue;
+ }
+
+ /* only the 1st update_oin lock stays -- the
+ * rest are either num_ident into it or
+ * dropped */
+ list_del(&lock->lock_list);
+ atomic_dec(&journal->num_chkpt_locks);
+
+ /* we only incement first->num_ident if the current
+ * lock has a size change AND first has recorded at
+ * least one size change (which can include
+ * itself). Otherwise we'll have an off by one as a
+ * first with no size change recorded will keep at
+ * least 1 reference for itself. */
+ if ((lock->flags & FLAG_FILE_EXTEND)
+ || (lock->flags & FLAG_FILE_TRUNCATE)) {
+ if ((first->flags & FLAG_FILE_EXTEND)
+ || (first->flags & FLAG_FILE_TRUNCATE))
+ first->num_ident++;
+ else
+ first->drop_holders++;
+ } else {
+ /* Ok, no size change on this particular lock,
+ * so we're discarding it without updating the
+ * num_ident value -- that's fine but we need
+ * to manually drop the lockres->lock_holders
+ * value on it's behalf */
+ first->drop_holders++;
+ }
+
+ /* record a size change in first if there was one. */
+ if (lock->flags & FLAG_FILE_EXTEND)
+ first->flags = first->flags | FLAG_FILE_EXTEND;
+ if (lock->flags & FLAG_FILE_TRUNCATE)
+ first->flags = first->flags | FLAG_FILE_TRUNCATE;
+
+ iput(lock->inode);
+
+ kmem_cache_free(OcfsGlobalCtxt.lock_cache, lock);
+ }
+
+ return;
+}
+
/*
* ocfs_commit_cache()
+ *
+ * This is in journal.c for lack of a better place.
+ *
*/
static int ocfs_commit_cache(ocfs_super *osb)
{
- int status = 0;
- unsigned int flushed;
- unsigned long old_id;
+ int status = 0, tmpstat;
+ unsigned int flushed = 0;
+ unsigned int cmt_locks;
ocfs_journal * journal = NULL;
+ struct list_head *p, *n;
+ ocfs_journal_lock *lock = NULL;
+ struct inode *inode;
+ ocfs_inode_private *ip;
+ ocfs_lock_res *lockres;
LOG_ENTRY();
journal = osb->journal;
- /* Flush all pending commits and checkpoint the journal. */
+ /* Step 1: flush all pending commits and checkpoint the journal. */
down_write(&journal->trans_barrier);
if (atomic_read(&journal->num_trans) == 0) {
- up_write(&journal->trans_barrier);
LOG_TRACE_STR("No transactions for me to flush!");
- goto finally;
+ /* now, we may have locks left to drop even though no
+ * transactions are in the journal. */
+
+ goto drop_locks;
}
journal_lock_updates(journal->k_journal);
@@ -102,19 +184,111 @@
goto finally;
}
- old_id = ocfs_inc_trans_id(journal);
+ ocfs_inc_trans_id(journal);
+drop_locks:
flushed = atomic_read(&journal->num_trans);
atomic_set(&journal->num_trans, 0);
+
+ /* Step 2: Drop any locks acquired during transactions which
+ * have just been checkpointed. */
+ spin_lock(&journal->cmt_lock);
+
+ cmt_locks = atomic_read(&journal->num_cmt_locks);
+
+ atomic_add(atomic_read(&journal->num_cmt_locks),
+ &journal->num_chkpt_locks);
+ atomic_set(&journal->num_cmt_locks, 0);
+
+ /* move the locks off each inode onto the commit threads list. */
+ list_for_each_safe(p, n, &journal->committing_inodes) {
+ ip = list_entry(p, ocfs_inode_private, ip_j_inode);
+ inode = ip->ip_inode;
+
+ if (!list_empty(&OCFS_I(inode)->ip_pending_locks))
+ list_splice_init(&OCFS_I(inode)->ip_pending_locks,
+ (&journal->checkpointing_locks)->prev);
+
+ /* we can now remove the inode from the committing
+ * list. */
+ list_del_init(&OCFS_I(inode)->ip_j_inode);
+ }
+ osb->needs_flush = 0;
+
+ spin_unlock(&journal->cmt_lock);
+
+ /* TODO: Can we assert this anymore and move these lock
+ * releases back up?
+ *
+ * Once we've got cmt_lock, we can let
+ * transactions start again -- it should protect us against
+ * people mucking with the committed list... */
up_write(&journal->trans_barrier);
#ifdef VERBOSE_COMMIT_THREAD
- printk("(%u) commit_thread: flushed transaction %lu (%u handles)\n",
- current->pid, journal->trans_id, flushed);
+ if (flushed || cmt_locks)
+ printk("(%u) commit_thread: flushed %u transactions, "
+ "releasing %u locks\n", current->pid, flushed,
+ cmt_locks);
#endif
- ocfs2_kick_vote_thread(osb);
- wake_up(&journal->j_checkpointed);
+ ocfs_journal_optimize_lock_list(journal);
+#ifdef VERBOSE_COMMIT_THREAD
+ if (flushed || cmt_locks)
+ printk("(%u) commit_thread: after optimization, %u locks "
+ "to release\n", current->pid,
+ atomic_read(&journal->num_chkpt_locks));
+#endif
+
+ p = n = NULL;
+ list_for_each_safe(p, n, &journal->checkpointing_locks) {
+ if (!atomic_read(&journal->num_chkpt_locks))
+ BUG();
+
+ lock = list_entry(p, ocfs_journal_lock, lock_list);
+
+ list_del(&(lock->lock_list));
+
+ if (!lock->inode)
+ BUG();
+
+#if 0
+ /* enable this for tons of output, which will likely
+ * hang your box :) */
+ printk("commit_thread: release lock %u (inode %llu)\n",
+ atomic_read(&journal->num_chkpt_locks),
+ OCFS_I(lock->inode)->ip_blkno);
+#endif
+ tmpstat = 0;
+ if (!INODE_DELETED(lock->inode))
+ tmpstat = ocfs_release_lock_full(osb,
+ lock->type,
+ lock->flags,
+ lock->inode,
+ lock->num_ident);
+ else
+ LOG_ERROR_ARGS("commit_thread: Skipping release for "
+ "inode %llu!\n",
+ OCFS_I(lock->inode)->ip_blkno);
+ if (tmpstat < 0)
+ LOG_ERROR_ARGS("commit_thread: release_lock status is"
+ " %d releasing lock on inode %llu!\n",
+ tmpstat, OCFS_I(lock->inode)->ip_blkno);
+
+ if (lock->drop_holders) {
+ lockres = GET_INODE_LOCKRES(lock->inode);
+ ocfs_acquire_lockres_write(lock->inode);
+ OCFS_ASSERT(lockres->lock_holders >= lock->drop_holders);
+ lockres->lock_holders -= lock->drop_holders;
+ ocfs_release_lockres_write(lock->inode);
+ }
+
+ iput(lock->inode);
+
+ atomic_dec(&journal->num_chkpt_locks);
+ kmem_cache_free(OcfsGlobalCtxt.lock_cache, lock);
+ }
+
finally:
LOG_EXIT_STATUS (status);
return status;
@@ -210,10 +384,10 @@
{
ocfs_journal_handle * retval = NULL;
- retval = kmalloc(sizeof(*retval), GFP_KERNEL);
+ retval = ocfs_malloc(sizeof(*retval));
if (!retval) {
LOG_ERROR_STR("Failed to allocate memory for journal handle!");
- return NULL;
+ return(NULL);
}
memset(retval, 0, sizeof(*retval));
@@ -221,12 +395,13 @@
retval->num_locks = 0;
retval->k_handle = NULL;
+ INIT_LIST_HEAD(&(retval->h_list));
INIT_LIST_HEAD(&(retval->locks));
INIT_LIST_HEAD(&(retval->inode_list));
retval->journal = osb->journal;
retval->osb = osb;
- return retval;
+ return(retval);
}
/* pass it NULL and it will allocate a new handle object for you. If
@@ -278,7 +453,7 @@
handle->flags |= OCFS_HANDLE_STARTED;
LOG_EXIT_PTR(handle);
- return handle;
+ return(handle);
done_free:
@@ -287,7 +462,7 @@
kfree(handle);
}
LOG_EXIT_PTR(NULL);
- return NULL;
+ return(NULL);
} /* ocfs_start_trans */
void ocfs_handle_add_inode(ocfs_journal_handle *handle, struct inode *inode)
@@ -307,6 +482,8 @@
OCFS_I(inode)->ip_handle = handle;
list_del(&(OCFS_I(inode)->ip_handle_list));
list_add_tail(&(OCFS_I(inode)->ip_handle_list), &(handle->inode_list));
+
+ return;
}
static void ocfs_handle_unlock_inodes(ocfs_journal_handle *handle)
@@ -325,6 +502,7 @@
up(&inode->i_sem);
iput(inode);
}
+ return;
}
/* This for loop is for debug purposes. Basically we want to check the
@@ -356,10 +534,14 @@
/* You are allowed to add journal locks before the transaction
* has started. */
osb = handle->osb;
- ocfs_handle_cleanup_locks(osb->journal, handle, 0);
+ ocfs_handle_move_locks(osb->journal, handle);
+ spin_lock(&osb->journal->cmt_lock);
+ osb->needs_flush = 1;
+ spin_unlock(&osb->journal->cmt_lock);
kfree(handle);
LOG_EXIT();
+ return;
}
/*
@@ -379,8 +561,7 @@
if (!(handle->flags & OCFS_HANDLE_STARTED)) {
ocfs_commit_unstarted_handle(handle);
- LOG_EXIT();
- return;
+ goto bail;
}
osb = handle->osb;
@@ -406,14 +587,20 @@
BUG();
}
- ocfs_handle_cleanup_locks(osb->journal, handle, 1);
+ ocfs_handle_move_locks(osb->journal, handle);
+ spin_lock(&osb->journal->cmt_lock);
+ osb->needs_flush = 1;
+ spin_unlock(&osb->journal->cmt_lock);
up_read(&journal->trans_barrier);
handle->k_handle = NULL; /* it's been free'd in journal_stop */
kfree(handle);
+bail:
LOG_EXIT();
+
+ return;
} /* ocfs_commit_trans */
/*
@@ -425,9 +612,8 @@
* during the transaction, so make sure they were taken *before*
* start_trans or we'll have ordering deadlocks.
*
- * WARNING2: Note that we do *not* drop trans_barrier here. This is
- * good because transaction ids haven't yet been recorded on the
- * cluster locks associated with this handle.
+ * This function would be alot simpler if we didn't have to worry
+ * about abort.
*/
int ocfs_extend_trans(ocfs_journal_handle *handle, int nblocks)
{
@@ -465,7 +651,7 @@
bail:
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
}
/*
@@ -519,7 +705,7 @@
LOG_ERROR_ARGS("Error %d getting %d access to buffer!\n",
status, type);
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
} /* ocfs_journal_access */
/*
@@ -543,68 +729,93 @@
(unsigned long long)bh->b_blocknr);
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
} /* ocfs_journal_dirty */
-/* We always assume you're adding a metadata lock at level 'ex' */
-int ocfs_handle_add_lock(ocfs_journal_handle *handle,
- struct inode *inode)
+void ocfs_handle_add_lock(ocfs_journal_handle *handle, __u32 type,
+ __u32 flags, struct inode *inode)
{
- int status;
ocfs_journal_lock *lock;
OCFS_ASSERT(inode);
+
+ LOG_ENTRY_ARGS("(inode=%llu, type=%u, flags=%u)\n",
+ OCFS_I(inode)->ip_blkno, type, flags);
+
lock = kmem_cache_alloc(OcfsGlobalCtxt.lock_cache, GFP_NOFS);
- if (!lock) {
- status = -ENOMEM;
+ if (lock == NULL) {
+ LOG_ERROR_STR("Out of memory -- cannot add lock to release.");
LOG_ERROR_STATUS(-ENOMEM);
- goto bail;
+
+ BUG();
}
+ lock->type = type;
+ lock->flags = flags;
+ lock->inode = inode;
+
+ /* stuff for commit thread optimization. */
+ lock->num_ident = 1;
+ /* this is for *additional* decrements of lock_holders, not
+ * the one given by ocfs_release_lock... */
+ lock->drop_holders = 0;
+
if (!igrab(inode))
BUG();
- lock->jl_inode = inode;
- list_add_tail(&(lock->jl_lock_list), &(handle->locks));
+ list_add_tail(&(lock->lock_list), &(handle->locks));
handle->num_locks++;
+ spin_lock(&handle->journal->cmt_lock);
+ atomic_inc(&handle->journal->num_cmt_locks);
+ spin_unlock(&handle->journal->cmt_lock);
- status = 0;
-bail:
- LOG_EXIT_STATUS(status);
- return status;
+ LOG_EXIT();
+ return;
}
-static void ocfs_handle_cleanup_locks(ocfs_journal *journal,
- ocfs_journal_handle *handle,
- int set_id)
+/* move the locks off a journal handle and onto the
+ * inode->ip_pending_locks. makes sure the inodes are on
+ * journal->committing_inodes so that the commit thread can get them
+ * after checkpoint.
+ *
+ * You want to do this before dropping trans_barrier to prevent the commit
+ * thread from missing the locks.
+ *
+ * TODO: When we get rid of the last checkpointed transactions, we can
+ * just put locks right on the inode in ocfs_handle_add_lock...
+ */
+static void ocfs_handle_move_locks(ocfs_journal *journal,
+ ocfs_journal_handle *handle)
{
struct list_head *p, *n;
ocfs_journal_lock *lock;
struct inode *inode;
list_for_each_safe(p, n, &(handle->locks)) {
- lock = list_entry(p, ocfs_journal_lock, jl_lock_list);
- list_del(&lock->jl_lock_list);
+ lock = list_entry(p, ocfs_journal_lock, lock_list);
+ list_del(&lock->lock_list);
handle->num_locks--;
- inode = lock->jl_inode;
- if (set_id)
- ocfs_set_inode_lock_trans(journal, inode);
- ocfs2_meta_unlock(inode, 1);
- if (atomic_read(&inode->i_count) == 1)
- LOG_ERROR_ARGS("Inode %llu, I'm doing a last iput "
- "for!", OCFS_I(inode)->ip_blkno);
- iput(inode);
- kmem_cache_free(OcfsGlobalCtxt.lock_cache, lock);
+ inode = lock->inode;
+
+ spin_lock(&journal->cmt_lock);
+ /* add the lock to the inode */
+ list_add_tail(&lock->lock_list,
+ &OCFS_I(inode)->ip_pending_locks);
+ /* and make sure the inode is on the journals list */
+ if (list_empty(&OCFS_I(inode)->ip_j_inode))
+ list_add_tail(&OCFS_I(inode)->ip_j_inode,
+ &journal->committing_inodes);
+ spin_unlock(&journal->cmt_lock);
}
+ return;
}
#define OCFS_DEFAULT_COMMIT_INTERVAL (HZ * 5)
/*
* Setup the journal using the journal system file
*/
-int ocfs_journal_init(ocfs_super *osb,
- int *dirty)
+int ocfs_journal_init(ocfs_super *osb)
{
int status = -1;
struct inode *inode = NULL; /* the journal inode */
@@ -617,9 +828,10 @@
if (!osb)
BUG();
+ spin_lock_init(&(osb->journal->cmt_lock));
+
/* already have the inode for our journal */
- inode = ocfs_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
- osb->slot_num);
+ inode = ocfs_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, osb->node_num);
if (inode == NULL) {
LOG_ERROR_STR("access error");
status = -EACCES;
@@ -635,7 +847,8 @@
SET_INODE_JOURNAL(inode);
- status = ocfs2_meta_lock(inode, NULL, &bh, 1);
+ status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE,
+ 0, &bh, inode);
if (status < 0) {
if (status != -EINTR)
LOG_ERROR_STR("Could not get lock on journal!");
@@ -643,26 +856,26 @@
}
fe = (ocfs2_dinode *) bh->b_data;
- if (fe->i_size < OCFS2_MIN_JOURNAL_SIZE) {
+ if (le64_to_cpu(fe->bryce_i_size) < OCFS2_MIN_JOURNAL_SIZE) {
LOG_ERROR_ARGS("Journal file size (%llu) is too small!",
- fe->i_size);
+ le64_to_cpu(fe->bryce_i_size));
status = -EINVAL;
goto done;
}
- LOG_TRACE_ARGS("fe->i_size = %llu\n", fe->i_size);
- LOG_TRACE_ARGS("fe->i_clusters = %u\n", fe->i_clusters);
- LOG_TRACE_ARGS("fe->i_blkno = %llu\n", fe->i_blkno);
+ LOG_TRACE_ARGS("fe->i_size = %llu\n", le64_to_cpu(fe->bryce_i_size));
+ LOG_TRACE_ARGS("fe->i_clusters = %u\n", le32_to_cpu(fe->bryce_i_clusters));
+ LOG_TRACE_ARGS("fe->i_blkno = %llu\n", le64_to_cpu(fe->bryce_i_blkno));
LOG_TRACE_ARGS("inode->ip_clusters = %u\n",
OCFS_I(inode)->ip_clusters);
- if (OCFS_I(inode)->ip_clusters != fe->i_clusters)
+ if (OCFS_I(inode)->ip_clusters != le32_to_cpu(fe->bryce_i_clusters))
LOG_ERROR_ARGS("inode and fe alloc sizes differ! "
"(%u != %u", OCFS_I(inode)->ip_clusters,
- fe->i_clusters);
- if (inode->i_size != fe->i_size)
+ le32_to_cpu(fe->bryce_i_clusters));
+ if (inode->i_size != le64_to_cpu(fe->bryce_i_size))
LOG_ERROR_ARGS("inode and fe i_size's differ! (%llu != %llu",
- inode->i_size, fe->i_size);
+ inode->i_size, le64_to_cpu(fe->bryce_i_size));
OCFS_I(inode)->ip_open_cnt++;
@@ -680,18 +893,21 @@
/* yay, pass the proper info back to our journal structure. */
osb->journal->osb = osb;
+ /* eventually this will be a value passed into us */
+ osb->journal->node_num = osb->node_num;
osb->journal->k_journal = k_journal;
osb->journal->k_inode = inode;
osb->journal->version = OCFS_JOURNAL_CURRENT_VERSION;
osb->journal->lockbh = bh;
atomic_set(&(osb->journal->num_trans), 0);
+ atomic_set(&(osb->journal->num_cmt_locks), 0);
+ atomic_set(&(osb->journal->num_chkpt_locks), 0);
init_rwsem(&(osb->journal->trans_barrier));
- init_waitqueue_head(&osb->journal->j_checkpointed);
osb->journal->state = OCFS_JOURNAL_LOADED;
osb->journal->trans_id = (unsigned long) 1;
+ INIT_LIST_HEAD(&(osb->journal->committing_inodes));
+ INIT_LIST_HEAD(&(osb->journal->checkpointing_locks));
- *dirty = (le32_to_cpu(fe->id1.journal1.ij_flags) &
- OCFS2_JOURNAL_DIRTY_FL);
status = 0;
done:
if (status < 0) {
@@ -705,40 +921,11 @@
}
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
} /* ocfs_journal_init */
-static int ocfs_journal_toggle_dirty(ocfs_super *osb,
- int dirty)
-{
- int status;
- unsigned int flags;
- ocfs_journal * journal = osb->journal;
- struct buffer_head *bh = journal->lockbh;
- ocfs2_dinode *fe;
-
- LOG_ENTRY();
-
- fe = (ocfs2_dinode *) bh->b_data;
- OCFS_ASSERT(IS_VALID_FILE_ENTRY(fe));
-
- flags = le32_to_cpu(fe->id1.journal1.ij_flags);
- if (dirty)
- flags |= OCFS2_JOURNAL_DIRTY_FL;
- else
- flags &= ~OCFS2_JOURNAL_DIRTY_FL;
- fe->id1.journal1.ij_flags = cpu_to_le32(flags);
-
- status = ocfs_write_block(osb, bh, journal->k_inode);
- if (status < 0)
- LOG_ERROR_STATUS(status);
-
- LOG_EXIT_STATUS(status);
- return status;
-}
-
/*
- if the journal has been kmalloc'd it needs to be freed after this call.
+ if the journal has been ocfs_malloc'd it needs to be freed after this call.
*/
void ocfs_journal_shutdown(ocfs_super *osb)
{
@@ -761,7 +948,7 @@
}
/* need to inc inode use count as journal_destroy will iput. */
- if (!igrab(inode))
+ if (ocfs_inc_icount(inode) < 0)
BUG();
num_running_trans = atomic_read(&(osb->journal->num_trans));
@@ -781,8 +968,8 @@
if (osb->commit && osb->commit->c_task) {
/* Wait for the commit thread */
LOG_TRACE_STR ("Waiting for ocfs2commit to exit....");
- atomic_set (&osb->needs_checkpoint, 1);
- wake_up (&osb->checkpoint_event);
+ atomic_set (&osb->flush_event_woken, 1);
+ wake_up (&osb->flush_event);
wait_for_completion(&osb->commit->c_complete);
osb->commit->c_task = NULL;
kfree(osb->commit);
@@ -790,17 +977,15 @@
OCFS_ASSERT(atomic_read(&(osb->journal->num_trans)) == 0);
- status = ocfs_journal_toggle_dirty(osb, 0);
- if (status < 0)
- LOG_ERROR_STATUS(status);
-
/* Shutdown the kernel journal system */
journal_destroy(journal->k_journal);
OCFS_I(inode)->ip_open_cnt--;
/* unlock our journal */
- ocfs2_meta_unlock(inode, 1);
+ status = ocfs_release_lock (osb, OCFS_LKM_EXMODE, 0, inode);
+ if (status < 0)
+ LOG_ERROR_STATUS (status);
brelse (journal->lockbh);
journal->lockbh = NULL;
@@ -812,13 +997,13 @@
if (inode)
iput(inode);
LOG_EXIT();
+ return;
}
int ocfs_journal_load(ocfs_journal *journal)
{
int status = 0;
int olderr = 0;
- int child_pid;
ocfs_super *osb;
LOG_ENTRY();
@@ -841,33 +1026,9 @@
journal_clear_err(journal->k_journal);
}
- status = ocfs_journal_toggle_dirty(osb, 1);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto done;
- }
-
- /* Launch the commit thread */
- osb->commit = kmalloc(sizeof(ocfs_commit_task), GFP_KERNEL);
- if (osb->commit == NULL) {
- LOG_ERROR_STATUS(status = -ENOMEM);
- goto done;
- }
- memset(osb->commit, 0, sizeof(ocfs_commit_task));
- child_pid = kernel_thread (ocfs_commit_thread, osb,
- CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
- if (child_pid < 0) {
- LOG_ERROR_ARGS ("unable to launch ocfs2commit thread, error=%d",
- child_pid);
- status = child_pid;
- goto done;
- } else {
- init_completion (&osb->commit->c_complete);
- }
-
done:
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
}
@@ -883,18 +1044,9 @@
BUG();
status = journal_wipe(journal->k_journal, full);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
- status = ocfs_journal_toggle_dirty(journal->osb, 0);
- if (status < 0)
- LOG_ERROR_STATUS(status);
-
-bail:
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
}
/*
@@ -961,139 +1113,119 @@
if (bhs[i])
brelse(bhs[i]);
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
}
+struct ocfs_recover_arg {
+ ocfs_super *osb;
+ int node_num;
+};
+
static int __ocfs_recovery_thread(void *arg)
{
- ocfs_super *osb = arg;
+ struct ocfs_recover_arg *recover_arg = arg;
+ ocfs_super *osb = recover_arg->osb;
+ int node_num = recover_arg->node_num;
int status = 0;
- int node_num;
char proc[16];
- LOG_ENTRY();
+ LOG_ENTRY_ARGS("(node_num=%d, osb->node_num = %d)\n", node_num,
+ osb->node_num);
sprintf (proc, "ocfs2rec-%d", osb->osb_id);
ocfs_daemonize (proc, strlen(proc), 0);
- status = ocfs_wait_on_mount(osb);
- if (status < 0) {
- if (status == -EBUSY)
- status = 0;
- goto bail;
- }
+#ifdef HAVE_NPTL
+ spin_lock_irq (¤t->sighand->siglock);
+ sigfillset(¤t->blocked);
+ recalc_sigpending();
+ spin_unlock_irq (¤t->sighand->siglock);
+#else
+ spin_lock_irq(¤t->sigmask_lock);
+ sigfillset(¤t->blocked);
+ recalc_sigpending(current);
+ spin_unlock_irq(¤t->sigmask_lock);
+#endif
-restart:
- status = ocfs2_super_lock(osb, 1);
- if (status < 0) {
+ status = ocfs_recover_node(osb, node_num);
+ if (status < 0)
LOG_ERROR_STATUS(status);
- goto bail;
- }
- while(!ocfs_node_map_is_empty(osb, &osb->recovery_map)) {
- node_num = ocfs_node_map_first_set_bit(osb,
- &osb->recovery_map);
- if (node_num == OCFS_INVALID_NODE_NUM) {
- LOG_TRACE_ARGS("Out of nodes to recover.\n");
- break;
- }
+ LOG_EXIT_STATUS(status);
- ocfs_recovery_map_clear(osb, node_num);
- /* TODO: Figure out how we're going to save all the
- * local alloc stuff for after recovery on all nodes
- * is complete? */
- status = ocfs_recover_node(osb, node_num);
- if (status < 0) {
- printk("ocfs2: Error %d recovering node %d on device "
- "(%u,%u)!\n", status, node_num,
- MAJOR(osb->sb->s_dev),MINOR(osb->sb->s_dev));
- printk("ocfs2: Volume requires unmount.\n");
- continue;
- }
- }
- ocfs2_super_unlock(osb, 1);
+ kfree(arg);
+ return status;
+}
-bail:
- down(&osb->recovery_lock);
- if (!ocfs_node_map_is_empty(osb, &osb->recovery_map)) {
- up(&osb->recovery_lock);
- goto restart;
- }
+void ocfs_recovery_thread(ocfs_super *osb, int node_num)
+{
+ struct ocfs_recover_arg *arg;
- osb->recovery_launched = 0;
- wake_up(&osb->recovery_event);
+ LOG_ENTRY_ARGS("(node_num=%d, osb->node_num = %d)\n", node_num,
+ osb->node_num);
- up(&osb->recovery_lock);
+ arg = ocfs_malloc(sizeof(struct ocfs_recover_arg));
+ if (arg == NULL) {
+ LOG_ERROR_STATUS(-ENOMEM);
+ goto done;
+ }
- LOG_EXIT_STATUS(status);
- return status;
-}
+ arg->osb = osb;
+ arg->node_num = node_num;
-void ocfs_recovery_thread(ocfs_super *osb, int node_num)
-{
- LOG_ENTRY_ARGS("(node_num=%d, osb->node_num = %d)\n",
- node_num, osb->node_num);
+ /* atomic_inc this here and let recover_vol dec it when
+ * done. We do it this way to avoid races with umount. */
+ atomic_inc(&osb->num_recovery_threads);
- down(&osb->recovery_lock);
- if (!osb->disable_recovery) {
- /* People waiting on recovery will wait on
- * the recovery map to empty. */
- ocfs_recovery_map_set(osb, node_num);
+ LOG_TRACE_STR("starting recovery thread...");
- LOG_TRACE_STR("starting recovery thread...");
+ kernel_thread(__ocfs_recovery_thread, arg,
+ CLONE_VM | CLONE_FS | CLONE_FILES);
- if (!osb->recovery_launched) {
- kernel_thread(__ocfs_recovery_thread, osb,
- CLONE_VM | CLONE_FS | CLONE_FILES);
- osb->recovery_launched = 1;
- }
- }
- up(&osb->recovery_lock);
- wake_up(&osb->recovery_event);
-
+done:
LOG_EXIT();
+ return;
}
static int ocfs_recover_node(ocfs_super *osb, int node_num)
{
- int status = 0;
- int slot_num;
- unsigned int flags;
+ int status = -1;
+ int tmpstat;
ocfs2_dinode *fe;
ocfs2_dinode *local_alloc = NULL;
struct inode *inode = NULL;
journal_t *k_journal = NULL;
struct buffer_head *bh = NULL;
ocfs_journal * journal = NULL;
- int got_lock = 0, clean_orphans = 0;
- ocfs2_slot_info *si = osb->slot_info;
+ int recovery_lock = 0, got_lock = 0, clean_orphans = 0;
- LOG_ENTRY_ARGS("(node_num=%d, osb->node_num = %d)\n",
- node_num, osb->node_num);
+ LOG_ENTRY_ARGS("(node_num=%d, osb->node_num = %d)\n", node_num,
+ osb->node_num);
- printk("ocfs2_recover_node: checking node %d\n", node_num);
+ if (!osb || (node_num >= osb->max_nodes)) {
+ LOG_ERROR_STATUS (status = -EINVAL);
+ goto done;
+ }
- /* Should not ever be called to recover ourselves -- in that
- * case we should've called ocfs_journal_load instead. */
- if (osb->node_num == node_num)
- BUG();
+ status = ocfs_wait_on_mount(osb);
+ if (status < 0) {
+ if (status == -EBUSY)
+ status = 0;
+ goto done;
+ }
+ journal = osb->journal;
- ocfs2_update_slot_info(si);
- slot_num = ocfs2_node_num_to_slot(si, node_num);
- if (slot_num == OCFS_INVALID_NODE_NUM) {
- printk("ocfs2_recover_node: no slot for this node, so no "
- "recovery required.\n");
+ /* Grab the local recovery resource to ensure no other thread
+ * comes in from this node for recovery */
+ down(&(osb->recovery_lock));
+ recovery_lock = 1;
+ if (osb->disable_recovery) {
+ LOG_TRACE_STR("Shutting down so skipping reovery.");
goto done;
}
- printk("ocfs2_recover_node: node %d was using slot %d\n", node_num,
- slot_num);
-
- journal = osb->journal;
-
/* Ok, look up the inode for our journal */
- inode = ocfs_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
- slot_num);
+ inode = ocfs_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, node_num);
if (inode == NULL) {
LOG_ERROR_STR("access error");
status = -EACCES;
@@ -1109,10 +1241,16 @@
SET_INODE_JOURNAL(inode);
- status = ocfs2_meta_lock_flags(inode, NULL, &bh, 1,
- OCFS2_META_LOCK_RECOVERY);
+ /* Should not ever be called to recover ourselves -- in that
+ * case we should've called ocfs_journal_load instead. */
+ if (osb->node_num == node_num)
+ BUG();
+
+ status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE, FLAG_FILE_RECOVERY,
+ &bh, inode);
+
if (status < 0) {
- LOG_TRACE_ARGS("status returned from ocfs2_meta_lock=%d\n",
+ LOG_TRACE_ARGS("status returned from acquire_lock=%d\n",
status);
if (status != -EINTR)
LOG_ERROR_STR("Could not lock journal!");
@@ -1120,21 +1258,20 @@
}
got_lock = 1;
- fe = (ocfs2_dinode *) bh->b_data;
-
- if (!(le32_to_cpu(fe->id1.journal1.ij_flags) & OCFS2_JOURNAL_DIRTY_FL)) {
+ /* check if that nodes publish sector has been reset (mounted
+ * is set false) if so, we can unlock and quit. otherwise we
+ * should recover. */
+ if (!ocfs_publish_get_mount_state(osb, node_num)) {
LOG_TRACE_ARGS("No recovery required for node %d\n", node_num);
- printk("ocfs2_recover_node: No recovery required for node "
- "%d\n", node_num);
+ status = 0;
goto clear_node;
}
-
- printk("ocfs2: Recovering node %d from slot %d on device (%u,%u)\n",
- node_num, slot_num, MAJOR(osb->sb->s_dev),
- MINOR(osb->sb->s_dev));
+ printk("ocfs2: Recovering node %d from device (%u,%u)\n", node_num,
+ MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
clean_orphans = 1;
- OCFS_I(inode)->ip_clusters = fe->i_clusters;
+ fe = (ocfs2_dinode *) bh->b_data;
+ OCFS_I(inode)->ip_clusters = le32_to_cpu(fe->bryce_i_clusters);
status = ocfs_force_read_journal(inode);
if (status < 0) {
@@ -1154,7 +1291,7 @@
status = journal_load(k_journal);
if (status < 0) {
LOG_ERROR_STATUS(status);
- if (!igrab(inode))
+ if (ocfs_inc_icount(inode) < 0)
BUG();
journal_destroy(k_journal);
goto done;
@@ -1168,157 +1305,74 @@
if (status < 0)
LOG_ERROR_STATUS(status);
- /* mark the node clean. */
- flags = le32_to_cpu(fe->id1.journal1.ij_flags) & ~OCFS2_JOURNAL_DIRTY_FL;
- fe->id1.journal1.ij_flags = cpu_to_le32(flags);
-
- status = ocfs_write_block(osb, bh, inode);
- if (status < 0)
- LOG_ERROR_STATUS(status);
-
- if (!igrab(inode))
+ if (ocfs_inc_icount(inode) < 0)
BUG();
/* shutdown the journal */
journal_destroy(k_journal);
-#warning "we can't complete local alloc recovery in this function!"
/* recover his local alloc file, AFTER recovering his journal... */
- status = ocfs_begin_local_alloc_recovery(osb, slot_num, &local_alloc);
+ status = ocfs_begin_local_alloc_recovery(osb, node_num, &local_alloc);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto done;
}
+ /* clear the publish sector (mark it unmounted and clean) */
+ status = ocfs_reset_publish(osb, node_num);
+ if (status < 0)
+ LOG_ERROR_STATUS(status);
+
status = 0;
clear_node:
- ocfs2_clear_slot(si, slot_num);
- status = ocfs2_update_disk_slots(osb, si);
- if (status < 0)
- LOG_ERROR_STATUS(status);
+ ocfs_recovery_map_clear(osb, node_num);
+ ocfs_recover_oin_locks(osb, node_num);
done:
+ if (recovery_lock)
+ up(&(osb->recovery_lock));
+
/* drop the lock on this nodes journal */
- if (got_lock)
- ocfs2_meta_unlock(inode, 1);
+ if (got_lock) {
+ tmpstat = ocfs_release_lock(osb, OCFS_LKM_EXMODE,
+ FLAG_FILE_RECOVERY,
+ inode);
+ if (tmpstat < 0)
+ LOG_ERROR_STATUS(tmpstat);
+ }
if (inode)
iput(inode);
if (bh)
brelse(bh);
-#if 0
+
if (local_alloc && !status) {
tmpstat = ocfs_complete_local_alloc_recovery(osb, local_alloc);
if (tmpstat < 0)
LOG_ERROR_STATUS(tmpstat);
}
-#endif
+
if (local_alloc)
kfree(local_alloc);
-#if 0
+
if (clean_orphans && !status) {
tmpstat = ocfs_recover_orphans(osb);
if (tmpstat < 0)
LOG_ERROR_STATUS(tmpstat);
}
-#endif
- LOG_EXIT_STATUS(status);
- return status;
-}
+ atomic_dec(&osb->num_recovery_threads);
-/* Test node liveness by trylocking his journal. If we get the lock,
- * we drop it here. Return 0 if we got the lock, -EAGAIN if node is
- * still alive (we couldn't get the lock) and < 0 on error. */
-static int ocfs2_trylock_journal(ocfs_super *osb,
- int slot_num)
-{
- int status, flags;
- struct inode *inode = NULL;
-
- inode = ocfs_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
- slot_num);
- if (inode == NULL) {
- LOG_ERROR_STR("access error");
- status = -EACCES;
- goto bail;
- }
- if (is_bad_inode (inode)) {
- LOG_ERROR_STR("access error (bad inode)");
- iput (inode);
- inode = NULL;
- status = -EACCES;
- goto bail;
- }
- SET_INODE_JOURNAL(inode);
-
- flags = OCFS2_META_LOCK_RECOVERY|OCFS2_META_LOCK_NOQUEUE;
- status = ocfs2_meta_lock_flags(inode, NULL, NULL, 1, flags);
- if (status < 0) {
- if (status != -EAGAIN || status != -EINTR)
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- ocfs2_meta_unlock(inode, 1);
-bail:
- if (inode)
- iput(inode);
-
- return status;
-}
-
-/* Call this underneath ocfs2_super_lock. It also assumes that the
- * slot info struct has been updated from disk. */
-int ocfs2_mark_dead_nodes(ocfs_super *osb)
-{
- int status, i, node_num;
- ocfs2_slot_info *si = osb->slot_info;
-
- /* This is called with the super block cluster lock, so we
- * know that the slot map can't change underneath us. */
-
- spin_lock(&si->si_lock);
- for(i = 0; i < si->si_num_slots; i++) {
- node_num = si->si_global_node_nums[i];
- if (i == osb->slot_num)
- continue;
- if (node_num == OCFS_INVALID_NODE_NUM)
- continue;
- if (ocfs_node_map_test_bit(osb, &osb->recovery_map, node_num))
- continue;
- spin_unlock(&si->si_lock);
-
- /* Ok, we have a slot occupied by another node which
- * is not in the recovery map. We trylock his journal
- * file here to test if he's alive. */
- status = ocfs2_trylock_journal(osb, i);
- if (!status) {
- /* Since we're called from mount, we know that
- * the recovery thread can't race us on
- * setting / checking the recovery bits. */
- ocfs_recovery_thread(osb, node_num);
- } else if ((status < 0) && (status != -EAGAIN)) {
- if (status != -EINTR)
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- spin_lock(&si->si_lock);
- }
- spin_unlock(&si->si_lock);
-
- status = 0;
-bail:
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
}
int ocfs_recover_orphans(ocfs_super *osb)
{
int status = 0;
int have_disk_lock = 0;
+ int tmpstat;
struct inode *inode = NULL;
struct inode *iter;
struct inode *orphan_dir_inode = NULL;
@@ -1327,6 +1381,8 @@
struct ocfs2_dir_entry *de;
struct super_block *sb = osb->sb;
+ down(&osb->orphan_recovery_lock);
+
orphan_dir_inode = ocfs_get_system_file_inode(osb,
ORPHAN_DIR_SYSTEM_INODE,
-1);
@@ -1337,7 +1393,7 @@
}
down(&orphan_dir_inode->i_sem);
- status = ocfs2_meta_lock(orphan_dir_inode, NULL, NULL, 0);
+ status = ocfs_acquire_lock_ro(osb, orphan_dir_inode);
if (status < 0) {
up(&orphan_dir_inode->i_sem);
LOG_ERROR_STATUS(status);
@@ -1347,7 +1403,7 @@
offset = 0;
iter = NULL;
- while(offset < i_size_read(orphan_dir_inode)) {
+ while(offset < orphan_dir_inode->i_size) {
blk = offset >> sb->s_blocksize_bits;
bh = ocfs_bread(orphan_dir_inode, blk, &status, 0);
@@ -1362,7 +1418,7 @@
}
local = 0;
- while(offset < i_size_read(orphan_dir_inode)
+ while(offset < orphan_dir_inode->i_size
&& local < sb->s_blocksize) {
de = (struct ocfs2_dir_entry *) (bh->b_data + local);
@@ -1375,11 +1431,11 @@
goto bail;
}
- local += le16_to_cpu(de->rec_len);
- offset += le16_to_cpu(de->rec_len);
+ local += le16_to_cpu(de->bryce_rec_len);
+ offset += le16_to_cpu(de->bryce_rec_len);
/* I guess we silently fail on no inode? */
- if (!le64_to_cpu(de->inode))
+ if (!le64_to_cpu(de->bryce_inode))
continue;
if (de->file_type > OCFS2_FT_MAX) {
LOG_ERROR_ARGS("block %llu contains invalid "
@@ -1387,8 +1443,8 @@
"%u, name_len = %u, file_type "
"= %u, name='%*s'\n",
(unsigned long long) bh->b_blocknr,
- le64_to_cpu(de->inode),
- le16_to_cpu(de->rec_len),
+ cpu_to_le64(de->bryce_inode),
+ cpu_to_le16(de->bryce_rec_len),
de->name_len,
de->file_type,
de->name_len,
@@ -1400,7 +1456,7 @@
if (de->name_len == 2 && !strncmp("..", de->name, 2))
continue;
- iter = ocfs_iget(osb, de->inode);
+ iter = ocfs_iget(osb, le64_to_cpu(de->bryce_inode));
if (!iter)
continue;
@@ -1411,8 +1467,12 @@
}
up(&orphan_dir_inode->i_sem);
- ocfs2_meta_unlock(orphan_dir_inode, 0);
+ status = ocfs_release_lock_ro(osb, orphan_dir_inode);
have_disk_lock = 0;
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
iput(orphan_dir_inode);
orphan_dir_inode = NULL;
@@ -1424,13 +1484,18 @@
}
bail:
- if (have_disk_lock)
- ocfs2_meta_unlock(orphan_dir_inode, 0);
+ up(&osb->orphan_recovery_lock);
+ if (have_disk_lock) {
+ tmpstat = ocfs_release_lock_ro(osb, orphan_dir_inode);
+ if (tmpstat < 0)
+ LOG_ERROR_STATUS(tmpstat);
+ }
+
if (orphan_dir_inode)
iput(orphan_dir_inode);
- return status;
+ return(status);
}
static int ocfs_wait_on_mount(ocfs_super *osb)
@@ -1455,40 +1520,57 @@
goto retry;
}
-static int ocfs2_commit_thread_sleep(ocfs_super *osb)
+/*
+ * ocfs_reset_publish()
+ *
+ *
+ * called by: old_ocfs_recover_node()
+ *
+ * NOTE: This function is unused. I keep it here because it may be
+ * useful in the future. --Mark (Sept. 22, 2003)
+ */
+static int ocfs_reset_publish (ocfs_super * osb, __u64 node_num)
{
- int status;
- signed long timeout = OCFS_CHECKPOINT_INTERVAL;
- DECLARE_WAITQUEUE(wait, current);
+ int status = 0;
+ ocfs_publish *publish = NULL;
+ struct buffer_head *publish_bh = NULL;
- if (atomic_read(&osb->needs_checkpoint))
- return 0;
+ LOG_ENTRY_ARGS("(0x%p, %llu)\n", osb, node_num);
- status = 0;
- add_wait_queue(&osb->checkpoint_event, &wait);
- while (1) {
- set_current_state(TASK_INTERRUPTIBLE);
+ /* take a lock on the publish sector */
+ down (&(osb->publish_lock));
- if (atomic_read(&osb->needs_checkpoint))
- break;
+ /* Read the publish sector */
+ status = ocfs_read_block(osb, (osb->publish_blkno + node_num),
+ &publish_bh, 0, NULL);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto finally;
+ }
+
+ publish = (ocfs_publish *) publish_bh->b_data;
- if (!signal_pending(current)) {
- timeout = schedule_timeout(timeout);
- if (!timeout) {
- status = -ETIMEDOUT;
- break;
- }
- continue;
- }
- status = -EINTR;
- break;
+ publish->dirty = 0;
+ publish->mounted = 0;
+
+ /* Write the publish sector */
+ status = ocfs_write_block(osb, publish_bh, NULL);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto finally;
}
- set_current_state(TASK_RUNNING);
- remove_wait_queue(&osb->checkpoint_event, &wait);
- return status;
-}
+finally:
+ /* unlock it */
+ up (&(osb->publish_lock));
+ if (publish_bh)
+ brelse(publish_bh);
+
+ LOG_EXIT_STATUS (status);
+ return (status);
+} /* ocfs_reset_publish */
+
int ocfs_commit_thread(void *arg)
{
int status = 0, misses = 0;
@@ -1505,12 +1587,15 @@
misses = 0;
while (1) {
- status = ocfs2_commit_thread_sleep(osb);
- atomic_set (&osb->needs_checkpoint, 0);
+ status = ocfs_wait (osb->flush_event,
+ atomic_read (&osb->flush_event_woken),
+ OCFS_CHECKPOINT_INTERVAL);
+
+ atomic_set (&osb->flush_event_woken, 0);
switch (status) {
case -ETIMEDOUT:
- LOG_TRACE_STR("timed out");
+ LOG_TRACE_STR("FLUSH_EVENT: timed out");
break;
case -EINTR:
LOG_ERROR_STR("Commit thread got a signal!");
@@ -1522,10 +1607,10 @@
}
break;
case 0:
- LOG_TRACE_STR("woken\n");
+ LOG_TRACE_STR("FLUSH_EVENT: woken!!!");
break;
default:
- LOG_ERROR_STR("invalid status!\n");
+ LOG_TRACE_STR("FLUSH_EVENT: ??????");
break;
}
@@ -1542,12 +1627,15 @@
/* we can trust num_trans here because we're
* in shutdown and nobody other than ourselves
* should be able to start more. */
- if (atomic_read(&journal->num_trans) == 0)
+ if ((atomic_read(&journal->num_trans) == 0)
+ && (atomic_read(&journal->num_cmt_locks) == 0))
break;
#ifdef VERBOSE_COMMIT_THREAD
- printk("(%u) commit_thread: %u transactions pending "
- "on shutdown\n",
- current->pid, atomic_read(&journal->num_trans));
+ printk("(%u) commit_thread: %u transactions, %u locks"
+ "pending on shutdown\n",
+ current->pid,
+ atomic_read(&journal->num_trans),
+ atomic_read(&journal->num_cmt_locks));
#endif
goto skip_sleep;
}
@@ -1556,3 +1644,4 @@
complete (&(commit->c_complete));
return 0;
}
+
Modified: branches/endian/src/localalloc.c
===================================================================
--- branches/endian/src/localalloc.c 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/localalloc.c 2005-01-18 21:41:46 UTC (rev 1789)
@@ -31,14 +31,13 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
-#include <linux/bitops.h>
#include "ocfs_log.h"
#include "ocfs.h"
#include "ocfs2.h"
#include "alloc.h"
-#include "dlmglue.h"
+#include "dlm.h"
#include "localalloc.h"
#include "suballoc.h"
#include "sysfile.h"
@@ -54,11 +53,11 @@
static inline int ocfs_local_alloc_window_bits(ocfs_super *osb);
-static u32 ocfs_local_alloc_count_bits(ocfs2_dinode *alloc);
+static __u32 ocfs_local_alloc_count_bits(ocfs2_dinode *alloc);
static int ocfs_local_alloc_find_clear_bits(ocfs_super *osb,
ocfs2_dinode *alloc,
- u32 numbits);
+ __u32 numbits);
static void ocfs_clear_local_alloc(ocfs2_dinode *alloc);
@@ -117,7 +116,7 @@
numbits = 64;
break;
}
- return numbits;
+ return(numbits);
} /* ocfs_local_alloc_window_bits */
int ocfs_alloc_should_use_local(ocfs_super *osb, u64 bits)
@@ -137,7 +136,7 @@
int status = 0;
ocfs2_dinode *alloc = NULL;
struct buffer_head *alloc_bh = NULL;
- u32 num_used;
+ __u32 num_used;
struct inode *inode = NULL;
LOG_ENTRY();
@@ -148,7 +147,7 @@
/* read the alloc off disk */
inode = ocfs_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE,
- osb->slot_num);
+ osb->node_num);
if (!inode) {
LOG_ERROR_STATUS(status=-EINVAL);
goto bail;
@@ -162,7 +161,7 @@
alloc = (ocfs2_dinode *) alloc_bh->b_data;
- if (!(alloc->i_flags & (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) {
+ if (!(le32_to_cpu(alloc->bryce_i_flags) & (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) {
LOG_ERROR_ARGS("Invalid local alloc inode, %llu\n",
OCFS_I(inode)->ip_blkno);
status = -EINVAL;
@@ -176,23 +175,23 @@
* we load it so there should be no bits used from the main
* bitmap. */
if (num_used
- || alloc->id1.bitmap1.i_used
- || alloc->id1.bitmap1.i_total
- || LOCAL_ALLOC(alloc)->la_bm_off) {
+ || le32_to_cpu(alloc->id1.bitmap1.bryce_i_used)
+ || le32_to_cpu(alloc->id1.bitmap1.bryce_i_total)
+ || le32_to_cpu(LOCAL_ALLOC(alloc)->bryce_la_bm_off)) {
LOG_ERROR_ARGS("Local alloc hasn't been recovered!\n"
"found = %u, set = %u, taken = %u, off = %u\n",
num_used,
- le32_to_cpu(alloc->id1.bitmap1.i_used),
- le32_to_cpu(alloc->id1.bitmap1.i_total),
- LOCAL_ALLOC(alloc)->la_bm_off);
+ le32_to_cpu(alloc->id1.bitmap1.bryce_i_used),
+ le32_to_cpu(alloc->id1.bitmap1.bryce_i_total),
+ le32_to_cpu(LOCAL_ALLOC(alloc)->bryce_la_bm_off));
status = -EBUSY;
goto bail;
}
- if (!LOCAL_ALLOC(alloc)->la_size ||
- (LOCAL_ALLOC(alloc)->la_size > ocfs2_local_alloc_size(inode->i_sb))) {
+ if (!le16_to_cpu(LOCAL_ALLOC(alloc)->bryce_la_size) ||
+ (le16_to_cpu(LOCAL_ALLOC(alloc)->bryce_la_size) > ocfs2_local_alloc_size(inode->i_sb))) {
LOG_ERROR_ARGS("Local alloc size is invalid (la_size = %u)\n",
- LOCAL_ALLOC(alloc)->la_size);
+ le16_to_cpu(LOCAL_ALLOC(alloc)->bryce_la_size));
status = -EINVAL;
goto bail;
}
@@ -208,7 +207,7 @@
iput(inode);
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
} /* ocfs_load_local_alloc */
/*
@@ -237,7 +236,7 @@
local_alloc_inode =
ocfs_get_system_file_inode(osb,
LOCAL_ALLOC_SYSTEM_INODE,
- osb->slot_num);
+ osb->node_num);
if (!local_alloc_inode) {
status = -ENOENT;
LOG_ERROR_STATUS(status);
@@ -272,12 +271,15 @@
}
ocfs_handle_add_inode(handle, main_bm_inode);
- status = ocfs2_meta_lock(main_bm_inode, handle, &main_bm_bh, 1);
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE,
+ 0, &main_bm_bh, main_bm_inode);
if (status < 0) {
if (status != -EINTR)
LOG_ERROR_STATUS (status);
goto bail;
}
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
+ 0, main_bm_inode);
/* WINDOW_MOVE_CREDITS is a bit heavy... */
handle = ocfs_start_trans(osb, handle, OCFS_WINDOW_MOVE_CREDITS);
@@ -336,6 +338,7 @@
kfree(alloc_copy);
LOG_EXIT();
+ return;
} /* ocfs_shutdown_local_alloc */
/*
@@ -348,7 +351,7 @@
* caller to process with ocfs_complete_local_alloc_recovery
*/
int ocfs_begin_local_alloc_recovery(ocfs_super *osb,
- int slot_num,
+ int node_num,
ocfs2_dinode **alloc_copy)
{
int status = 0;
@@ -356,13 +359,13 @@
struct inode *inode = NULL;
ocfs2_dinode *alloc;
- LOG_ENTRY_ARGS("(slot_num = %d)\n", slot_num);
+ LOG_ENTRY_ARGS("(node_num = %d)\n", node_num);
*alloc_copy = NULL;
inode = ocfs_get_system_file_inode(osb,
LOCAL_ALLOC_SYSTEM_INODE,
- slot_num);
+ node_num);
if (!inode) {
LOG_ERROR_STATUS(status=-EINVAL);
goto bail;
@@ -402,7 +405,7 @@
iput(inode);
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
} /* ocfs_begin_local_alloc_recovery */
/*
@@ -438,12 +441,15 @@
}
ocfs_handle_add_inode(handle, main_bm_inode);
- status = ocfs2_meta_lock(main_bm_inode, handle, &main_bm_bh, 1);
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE,
+ 0, &main_bm_bh, main_bm_inode);
if (status < 0) {
if (status != -EINTR)
LOG_ERROR_STATUS (status);
goto bail;
}
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
+ 0, main_bm_inode);
handle = ocfs_start_trans(osb, handle, OCFS_WINDOW_MOVE_CREDITS);
if (!handle) {
@@ -501,7 +507,7 @@
local_alloc_inode =
ocfs_get_system_file_inode(osb,
LOCAL_ALLOC_SYSTEM_INODE,
- osb->slot_num);
+ osb->node_num);
if (!local_alloc_inode) {
status = -ENOENT;
LOG_ERROR_STATUS(status);
@@ -529,11 +535,11 @@
alloc = (ocfs2_dinode *) osb->local_alloc_bh->b_data;
- OCFS_ASSERT(le32_to_cpu(alloc->id1.bitmap1.i_used) ==
+ OCFS_ASSERT(le32_to_cpu(alloc->id1.bitmap1.bryce_i_used) ==
ocfs_local_alloc_count_bits(alloc));
- free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
- le32_to_cpu(alloc->id1.bitmap1.i_used);
+ free_bits = le32_to_cpu(alloc->id1.bitmap1.bryce_i_total) -
+ le32_to_cpu(alloc->id1.bitmap1.bryce_i_used);
if (bits_wanted > free_bits) {
/* uhoh, window change time. */
status =
@@ -555,7 +561,7 @@
iput(local_alloc_inode);
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
}
int ocfs_claim_local_alloc_bits(ocfs_super *osb,
@@ -587,7 +593,7 @@
}
bitmap = LOCAL_ALLOC(alloc)->la_bitmap;
- *bit_off = LOCAL_ALLOC(alloc)->la_bm_off + start;
+ *bit_off = le32_to_cpu(LOCAL_ALLOC(alloc)->bryce_la_bm_off) + start;
/* local alloc is always contiguous by nature -- we never
* delete bits from it! */
*num_bits = bits_wanted;
@@ -603,8 +609,7 @@
while(bits_wanted--)
ocfs2_set_bit(start++, bitmap);
- alloc->id1.bitmap1.i_used = cpu_to_le32(*num_bits +
- le32_to_cpu(alloc->id1.bitmap1.i_used));
+ cpu_to_le32(alloc->id1.bitmap1.bryce_i_used) += *num_bits;
status = ocfs_journal_dirty(handle, osb->local_alloc_bh);
if (status < 0) {
@@ -615,26 +620,30 @@
status = 0;
bail:
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
}
/*
* ocfs_local_alloc_count_bits
*/
-static u32 ocfs_local_alloc_count_bits(ocfs2_dinode *alloc)
+static __u32 ocfs_local_alloc_count_bits(ocfs2_dinode *alloc)
{
int i;
- u8 *buffer;
- u32 count = 0;
+ __u8 tmp;
+ __u8 *buffer;
+ __u32 count = 0;
LOG_ENTRY();
buffer = LOCAL_ALLOC(alloc)->la_bitmap;
- for (i = 0; i < LOCAL_ALLOC(alloc)->la_size; i++)
- count += hweight8(buffer[i]);
+ for (i = 0; i < le16_to_cpu(LOCAL_ALLOC(alloc)->bryce_la_size); i++) {
+ memcpy(&tmp, buffer, 1);
+ count+= BITCOUNT(tmp);
+ buffer++;
+ }
LOG_EXIT_ULONG ((unsigned long)count);
- return count;
+ return(count);
} /* ocfs_local_alloc_count_bits */
/*
@@ -642,14 +651,14 @@
*/
static int ocfs_local_alloc_find_clear_bits(ocfs_super *osb,
ocfs2_dinode *alloc,
- u32 numbits)
+ __u32 numbits)
{
int numfound, bitoff, left, startoff, lastzero;
void *bitmap = NULL;
LOG_ENTRY_ARGS("(numbits wanted = %u)\n", numbits);
- if (!alloc->id1.bitmap1.i_total) {
+ if (!le32_to_cpu(alloc->id1.bitmap1.bryce_i_total)) {
LOG_TRACE_STR("No bits in my window!");
bitoff = -1;
goto bail;
@@ -659,7 +668,7 @@
numfound = bitoff = startoff = 0;
lastzero = -1;
- left = le32_to_cpu(alloc->id1.bitmap1.i_total);
+ left = le32_to_cpu(alloc->id1.bitmap1.bryce_i_total);
while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) {
if (bitoff == left) {
/* LOG_TRACE_ARGS("bitoff (%d) == left", bitoff); */
@@ -696,7 +705,7 @@
bail:
LOG_EXIT_STATUS(bitoff);
- return bitoff;
+ return(bitoff);
} /* ocfs_local_find_alloc_clear_bits */
/*
@@ -707,13 +716,14 @@
int i;
LOG_ENTRY();
- alloc->id1.bitmap1.i_total = 0;
- alloc->id1.bitmap1.i_used = 0;
- LOCAL_ALLOC(alloc)->la_bm_off = 0;
- for(i = 0; i < LOCAL_ALLOC(alloc)->la_size; i++)
+ cpu_to_le32(alloc->id1.bitmap1.bryce_i_total) = 0;
+ cpu_to_le32(alloc->id1.bitmap1.bryce_i_used) = 0;
+ cpu_to_le32(LOCAL_ALLOC(alloc)->bryce_la_bm_off) = 0;
+ for(i = 0; i < le16_to_cpu(LOCAL_ALLOC(alloc)->bryce_la_size); i++)
LOCAL_ALLOC(alloc)->la_bitmap[i] = 0;
LOG_EXIT();
+ return;
} /* ocfs_clear_local_alloc */
#if 0
@@ -756,25 +766,25 @@
void *bitmap;
LOG_ENTRY_ARGS("total = %u, COUNT = %u, used = %u\n",
- le32_to_cpu(alloc->id1.bitmap1.i_total),
+ le32_to_cpu(alloc->id1.bitmap1.bryce_i_total),
ocfs_local_alloc_count_bits(alloc),
- le32_to_cpu(alloc->id1.bitmap1.i_used));
+ le32_to_cpu(alloc->id1.bitmap1.bryce_i_used));
- if (!alloc->id1.bitmap1.i_total) {
+ if (!le32_to_cpu(alloc->id1.bitmap1.bryce_i_total)) {
LOG_TRACE_STR("nothing to sync!");
goto bail;
}
- if (alloc->id1.bitmap1.i_used == alloc->id1.bitmap1.i_total) {
+ if (le32_to_cpu(alloc->id1.bitmap1.bryce_i_used) == le32_to_cpu(alloc->id1.bitmap1.bryce_i_total)) {
LOG_TRACE_STR("all bits were taken!");
goto bail;
}
la_start_blk = ocfs2_clusters_to_blocks(osb->sb,
- LOCAL_ALLOC(alloc)->la_bm_off);
+ le32_to_cpu(LOCAL_ALLOC(alloc)->bryce_la_bm_off));
bitmap = LOCAL_ALLOC(alloc)->la_bitmap;
start = count = bit_off = 0;
- left = le32_to_cpu(alloc->id1.bitmap1.i_total);
+ left = le32_to_cpu(alloc->id1.bitmap1.bryce_i_total);
while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start))
!= -1) {
@@ -810,7 +820,7 @@
bail:
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
} /* ocfs_sync_local_to_main */
static int ocfs_local_alloc_reserve_for_window(ocfs_super *osb,
@@ -849,7 +859,7 @@
}
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
}
/*
@@ -868,7 +878,7 @@
LOG_ENTRY();
alloc = (ocfs2_dinode *) osb->local_alloc_bh->b_data;
- if (alloc->id1.bitmap1.i_total)
+ if (le32_to_cpu(alloc->id1.bitmap1.bryce_i_total))
LOG_TRACE_STR("asking me to alloc a new window over a"
" non-empty one");
@@ -889,24 +899,24 @@
alloc = (ocfs2_dinode *) osb->local_alloc_bh->b_data;
- LOCAL_ALLOC(alloc)->la_bm_off = cluster_off;
- alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count);
+ cpu_to_le32(LOCAL_ALLOC(alloc)->bryce_la_bm_off) = cluster_off;
+ cpu_to_le32(alloc->id1.bitmap1.bryce_i_total) = le32_to_cpu(cluster_count);
/* just in case... In the future when we find space ourselves,
* we don't have to get all contiguous -- but we'll have to
* set all previously used bits in bitmap and update
* la_bits_set before setting the bits in the main bitmap. */
- alloc->id1.bitmap1.i_used = 0;
+ cpu_to_le32(alloc->id1.bitmap1.bryce_i_used) = 0;
memset(LOCAL_ALLOC(alloc)->la_bitmap, 0,
- LOCAL_ALLOC(alloc)->la_size);
+ le16_to_cpu(LOCAL_ALLOC(alloc)->bryce_la_size));
LOG_TRACE_STR("New window allocated:");
LOG_TRACE_ARGS("window la_bm_off = %u\n",
- LOCAL_ALLOC(alloc)->la_bm_off);
- LOG_TRACE_ARGS("window bits = %u\n", le32_to_cpu(alloc->id1.bitmap1.i_total));
+ le32_to_cpu(LOCAL_ALLOC(alloc)->bryce_la_bm_off));
+ LOG_TRACE_ARGS("window bits = %u\n", le32_to_cpu(alloc->id1.bitmap1.bryce_i_total));
bail:
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
} /* ocfs_local_alloc_new_window */
/* Note that we do *NOT* lock the local alloc inode here as
@@ -1015,6 +1025,6 @@
ocfs_free_alloc_context(ac);
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
}
Modified: branches/endian/src/namei.c
===================================================================
--- branches/endian/src/namei.c 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/namei.c 2005-01-18 21:41:46 UTC (rev 1789)
@@ -50,14 +50,14 @@
#include "alloc.h"
#include "dcache.h"
#include "dir.h"
-#include "dlmglue.h"
+#include "dlm.h"
#include "extent_map.h"
#include "file.h"
#include "sysfile.h"
#include "inode.h"
+#include "lockres.h"
#include "namei.h"
#include "suballoc.h"
-#include "symlink.h"
#include "util.h"
#include "vote.h"
@@ -101,8 +101,10 @@
static int ocfs_double_lock(ocfs_super *osb,
ocfs_journal_handle *handle,
+ __u32 type1, __u32 flags1,
struct buffer_head **bh1,
struct inode *inode1,
+ __u32 type2, __u32 flags2,
struct buffer_head **bh2,
struct inode *inode2);
@@ -127,9 +129,9 @@
struct buffer_head *parent_fe_bh,
struct buffer_head *insert_bh)
{
- return __ocfs_add_entry(handle, dentry->d_parent->d_inode,
+ return(__ocfs_add_entry(handle, dentry->d_parent->d_inode,
dentry->d_name.name, dentry->d_name.len,
- inode, blkno, parent_fe_bh, insert_bh);
+ inode, blkno, parent_fe_bh, insert_bh));
}
/*
@@ -154,6 +156,7 @@
dentry->d_name.len, dentry->d_name.name);
if (dentry->d_name.len > OCFS2_MAX_FILENAME_LENGTH) {
+ LOG_ERROR_STR ("name too long");
ret = ERR_PTR (-ENAMETOOLONG);
goto bail;
}
@@ -220,15 +223,15 @@
memset(new_bh->b_data, 0, osb->sb->s_blocksize);
de = (struct ocfs2_dir_entry *) new_bh->b_data;
- de->inode = cpu_to_le64(OCFS_I(inode)->ip_blkno);
+ de->bryce_inode = cpu_to_le64(OCFS_I(inode)->ip_blkno);
de->name_len = 1;
- de->rec_len =
+ de->bryce_rec_len =
cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
strcpy (de->name, ".");
ocfs_set_de_type(de, S_IFDIR);
- de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
- de->inode = cpu_to_le64(OCFS_I(parent)->ip_blkno);
- de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize -
+ de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->bryce_rec_len));
+ de->bryce_inode = cpu_to_le64(OCFS_I(parent)->ip_blkno);
+ de->bryce_rec_len = cpu_to_le16(inode->i_sb->s_blocksize -
OCFS2_DIR_REC_LEN(1));
de->name_len = 2;
strcpy (de->name, "..");
@@ -240,7 +243,7 @@
goto bail;
}
- i_size_write(inode, inode->i_sb->s_blocksize);
+ inode->i_size = inode->i_sb->s_blocksize;
inode->i_nlink = 2;
inode->i_blocks = 1;
status = ocfs_mark_inode_dirty(handle, inode, fe_bh);
@@ -255,7 +258,7 @@
brelse(new_bh);
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
}
static int ocfs_mknod(struct inode *dir, struct dentry *dentry,
@@ -263,7 +266,7 @@
{
int status = 0;
struct buffer_head *parent_fe_bh = NULL;
- u64 file_off;
+ __u64 file_off;
ocfs_journal_handle *handle = NULL;
ocfs_super *osb;
ocfs2_dinode *fe = NULL;
@@ -279,8 +282,21 @@
LOG_ENTRY_ARGS ("(0x%p, 0x%p, %d, %lu, '%*s')\n", dir, dentry, mode,
(unsigned long)dev, dentry->d_name.len, dentry->d_name.name);
+#ifdef PURE_EVIL
+ if (evil_filename_check(EVIL_DENTRY, dentry)) {
+ LOG_ERROR_ARGS("EVIL MKNOD... creating the file here!\n");
+ LOG_ERROR_ARGS ("(0x%p, 0x%p, %d, %lu, '%*s')\n", dir, dentry, mode,
+ (unsigned long)dev, dentry->d_name.len, dentry->d_name.name);
+ }
+#endif
+
/* get our super block */
osb = OCFS_SB(dir->i_sb);
+ if (osb->osb_flags & OCFS_OSB_FLAGS_SHUTDOWN) {
+ LOG_ERROR_STR ("Volume has been shutdown");
+ status = -EACCES;
+ goto leave;
+ }
if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
printk("inode %llu has i_nlink of %u\n",
@@ -307,15 +323,18 @@
goto leave;
}
- status = ocfs2_meta_lock(dir, handle, &parent_fe_bh, 1);
+ /* lock the parent directory */
+ status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE, 0,
+ &parent_fe_bh, dir);
if (status < 0) {
if (status != -EINTR)
LOG_ERROR_STATUS (status);
goto leave;
}
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 0, dir);
dirfe = (ocfs2_dinode *) parent_fe_bh->b_data;
- if (!dirfe->i_links_count) {
+ if (!le16_to_cpu(dirfe->bryce_i_links_count)) {
/* can't make a file in a deleted directory. */
status = -ENOENT;
goto leave;
@@ -369,17 +388,16 @@
LOG_ERROR_ARGS("populate inode failed! bh->b_blocknr=%llu, "
"i_blkno=%llu, i_ino=%lu\n",
(unsigned long long)new_fe_bh->b_blocknr,
- fe->i_blkno, inode->i_ino);
+ le64_to_cpu(fe->bryce_i_blkno), inode->i_ino);
BUG();
}
+ file_off = le64_to_cpu(fe->bryce_i_blkno) << dir->i_sb->s_blocksize_bits;
ocfs_inode_set_new(osb, inode);
- status = ocfs2_create_new_inode_locks(inode);
- if (status < 0)
- LOG_ERROR_STATUS(status);
- file_off = fe->i_blkno << dir->i_sb->s_blocksize_bits;
+ ocfs_init_lockres(osb, inode);
+ status = ocfs_update_lockres(osb, new_fe_bh, inode, 0);
if (S_ISDIR (mode)) {
status = ocfs_fill_new_dir(osb, handle, dir, inode,
new_fe_bh, data_ac);
@@ -394,7 +412,7 @@
LOG_ERROR_STATUS(status);
goto leave;
}
- dirfe->i_links_count++;
+ cpu_to_le16(dirfe->bryce_i_links_count)++;
status = ocfs_journal_dirty(handle, parent_fe_bh);
if (status < 0) {
LOG_ERROR_STATUS(status);
@@ -403,7 +421,7 @@
dir->i_nlink++;
}
- status = ocfs_add_entry(handle, dentry, inode, fe->i_blkno,
+ status = ocfs_add_entry(handle, dentry, inode, le64_to_cpu(fe->bryce_i_blkno),
parent_fe_bh, de_bh);
if (status < 0) {
LOG_ERROR_STATUS (status);
@@ -444,7 +462,7 @@
LOG_EXIT_STATUS(status);
LOG_CLEAR_CONTEXT();
- return status;
+ return(status);
} /* ocfs_mknod */
/*
@@ -498,44 +516,53 @@
fe = (ocfs2_dinode *) (*new_fe_bh)->b_data;
memset(fe, 0, osb->sb->s_blocksize);
inode->i_ino = ino_from_blkno(osb->sb, fe_blkno);
+
+#ifdef PURE_EVIL
+ if (evil_filename_check(EVIL_DENTRY, dentry)) {
+ LOG_ERROR_ARGS("EVIL MKNOD_LOCKED: i_ino to watch is %lu\n", inode->i_ino);
+ }
+#endif
spin_lock(&osb->s_next_gen_lock);
inode->i_generation = osb->s_next_generation++;
spin_unlock(&osb->s_next_gen_lock);
- fe->i_generation = cpu_to_le32(inode->i_generation);
- fe->i_fs_generation = cpu_to_le32(osb->fs_generation);
- fe->i_blkno = fe_blkno;
- fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
- fe->i_suballoc_node = cpu_to_le16(osb->slot_num);
- fe->i_uid = current->fsuid;
+ cpu_to_le32(fe->bryce_i_generation) = le32_to_cpu(inode->i_generation);
+ cpu_to_le64(fe->bryce_i_blkno) = fe_blkno;
+ cpu_to_le16(fe->bryce_i_suballoc_bit) = suballoc_bit;
+ cpu_to_le16(fe->bryce_i_suballoc_node) = osb->node_num;
+ cpu_to_le32(fe->bryce_i_uid) = current->fsuid;
if (dir->i_mode & S_ISGID) {
- fe->i_gid = dir->i_gid;
+ cpu_to_le32(fe->bryce_i_gid) = dir->i_gid;
if (S_ISDIR(mode))
mode |= S_ISGID;
} else
- fe->i_gid = current->fsgid;
- fe->i_mode = mode;
+ cpu_to_le32(fe->bryce_i_gid) = current->fsgid;
+ cpu_to_le16(fe->bryce_i_mode) = mode;
if (S_ISCHR (mode) || S_ISBLK (mode))
- fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
+ cpu_to_le64(fe->id1.dev1.bryce_i_rdev) = huge_encode_dev(dev);
if (S_ISLNK (mode) || S_ISDIR (mode) || S_ISREG (mode))
atomic_set(GET_INODE_CLEAN_SEQ(inode), atomic_read(&osb->clean_buffer_seq));
if (S_ISDIR (mode))
- fe->i_links_count = 2;
+ cpu_to_le16(fe->bryce_i_links_count) = 2;
else
- fe->i_links_count = 1;
+ cpu_to_le16(fe->bryce_i_links_count) = 1;
- fe->i_last_eb_blk = 0;
+ cpu_to_le64(fe->bryce_i_last_eb_blk) = 0;
strcpy (fe->i_signature, OCFS2_INODE_SIGNATURE);
- fe->i_flags |= OCFS2_VALID_FL;
- fe->i_atime = fe->i_ctime = fe->i_mtime = OCFS_CURRENT_TIME;
- fe->i_dtime = 0;
+ cpu_to_le32(fe->bryce_i_flags) |= OCFS2_VALID_FL;
+ cpu_to_le16(DISK_LOCK(fe)->bryce_dl_master) = osb->node_num;
+ DISK_LOCK(fe)->dl_level = OCFS_LKM_EXMODE;
+ cpu_to_le64(fe->bryce_i_atime) = OCFS_CURRENT_TIME;
+ cpu_to_le64(fe->bryce_i_ctime) = OCFS_CURRENT_TIME;
+ cpu_to_le64(fe->bryce_i_mtime) = OCFS_CURRENT_TIME;
+ cpu_to_le64(fe->bryce_i_dtime) = 0;
fel = &fe->id2.i_list;
- fel->l_tree_depth = 0;
- fel->l_next_free_rec = 0;
- fel->l_count = ocfs2_extent_recs_per_inode(osb->sb);
+ cpu_to_le16(fel->bryce_l_tree_depth) = 0;
+ cpu_to_le16(fel->bryce_l_next_free_rec) = 0;
+ cpu_to_le16(fel->bryce_l_count) = ocfs2_extent_recs_per_inode(osb->sb);
fe = NULL;
@@ -636,12 +663,15 @@
goto bail;
}
- err = ocfs2_meta_lock(dir, handle, &parent_fe_bh, 1);
+ /* lock the parent directory */
+ err = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE, 0,
+ &parent_fe_bh, dir);
if (err < 0) {
if (err != -EINTR)
LOG_ERROR_STATUS (err);
goto bail;
}
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 0, dir);
err = ocfs_prepare_dir_for_insert(osb, dir, parent_fe_bh,
dentry->d_name.name,
@@ -651,15 +681,17 @@
goto bail;
}
- err = ocfs2_meta_lock(inode, handle, &fe_bh, 1);
+ err = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE, 0, &fe_bh, inode);
if (err < 0) {
if (err != -EINTR)
LOG_ERROR_STATUS (err);
goto bail;
}
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, FLAG_FILE_UPDATE_OIN,
+ inode);
fe = (ocfs2_dinode *) fe_bh->b_data;
- if (fe->i_links_count >= OCFS2_LINK_MAX) {
+ if (le16_to_cpu(fe->bryce_i_links_count) >= OCFS2_LINK_MAX) {
err = -EMLINK;
goto bail;
}
@@ -677,14 +709,14 @@
goto bail;
}
- fe->i_links_count++;
- fe->i_ctime = OCFS_CURRENT_TIME;
- inode->i_nlink = fe->i_links_count;
- OCFS_SET_INODE_TIME(inode, i_ctime, fe->i_ctime);
+ cpu_to_le16(fe->bryce_i_links_count)++;
+ cpu_to_le64(fe->bryce_i_ctime) = OCFS_CURRENT_TIME;
+ inode->i_nlink = le16_to_cpu(fe->bryce_i_links_count);
+ OCFS_SET_INODE_TIME(inode, i_ctime, le64_to_cpu(fe->bryce_i_ctime));
err = ocfs_journal_dirty(handle, fe_bh);
if (err < 0) {
- fe->i_links_count--;
+ cpu_to_le16(fe->bryce_i_links_count)--;
inode->i_nlink--;
LOG_ERROR_STATUS(err);
goto bail;
@@ -693,7 +725,7 @@
err = ocfs_add_entry(handle, dentry, inode, OCFS_I(inode)->ip_blkno,
parent_fe_bh, de_bh);
if (err) {
- fe->i_links_count--;
+ cpu_to_le16(fe->bryce_i_links_count)--;
inode->i_nlink--;
LOG_ERROR_STATUS(err);
goto bail;
@@ -760,11 +792,13 @@
goto leave;
}
- status = ocfs2_meta_lock(dir, handle, &parent_node_bh, 1);
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, 0,
+ &parent_node_bh, dir);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto leave;
}
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 0, dir);
status = ocfs_find_files_on_disk(osb, dentry->d_name.name,
dentry->d_name.len, &blkno,
@@ -778,20 +812,16 @@
if (blkno != OCFS_I(inode)->ip_blkno)
BUG();
- status = ocfs2_meta_lock(inode, handle, &fe_bh, 1);
+ status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE,
+ FLAG_RELEASE_DENTRY, &fe_bh, inode);
if (status < 0) {
if (status != -EINTR)
LOG_ERROR_STATUS (status);
goto leave;
}
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, FLAG_RELEASE_DENTRY,
+ inode);
- status = ocfs2_request_unlink_vote(inode);
- if (status < 0) {
- /* This vote should succeed under all normal circumstances. */
- LOG_ERROR_STATUS(status);
- goto leave;
- }
-
if (S_ISDIR (inode->i_mode)) {
if (!ocfs_empty_dir(inode)) {
status = -ENOTEMPTY;
@@ -826,14 +856,14 @@
}
fe = (ocfs2_dinode *) fe_bh->b_data;
- if (fe->i_links_count != inode->i_nlink) {
+ if (le16_to_cpu(fe->bryce_i_links_count) != inode->i_nlink) {
printk("ocfs_unlink: hmm, inode has nlink = %u, fe has "
"link_cnt = %u. Setting inode from fe.\n",
- inode->i_nlink, fe->i_links_count);
- inode->i_nlink = fe->i_links_count;
+ inode->i_nlink, le16_to_cpu(fe->bryce_i_links_count));
+ inode->i_nlink = le16_to_cpu(fe->bryce_i_links_count);
}
- if (S_ISDIR(inode->i_mode) || (fe->i_links_count == 1)) {
+ if (S_ISDIR(inode->i_mode) || (le16_to_cpu(fe->bryce_i_links_count) == 1)) {
status = ocfs_orphan_add(osb, handle, inode, fe, orphan_name,
orphan_entry_bh);
if (status < 0) {
@@ -850,10 +880,10 @@
}
if (S_ISDIR (inode->i_mode)) {
- fe->i_links_count = 0;
+ cpu_to_le16(fe->bryce_i_links_count) = 0;
inode->i_nlink = 0;
} else {
- fe->i_links_count--;
+ cpu_to_le16(fe->bryce_i_links_count)--;
inode->i_nlink--;
}
@@ -913,14 +943,16 @@
*/
static int ocfs_double_lock(ocfs_super *osb,
ocfs_journal_handle *handle,
+ __u32 type1, __u32 flags1,
struct buffer_head **bh1,
struct inode *inode1,
+ __u32 type2, __u32 flags2,
struct buffer_head **bh2,
struct inode *inode2)
{
- int status;
- ocfs_inode_private *oip1 = OCFS_I(inode1);
- ocfs_inode_private *oip2 = OCFS_I(inode2);
+ int status = 0;
+ __u64 tmpid, id1, id2;
+ __u32 tmptype, tmpflags;
struct buffer_head **tmpbh;
struct inode *tmpinode;
@@ -930,16 +962,31 @@
OCFS_ASSERT(handle);
+ id1 = OCFS_I(inode1)->ip_blkno;
+ id2 = OCFS_I(inode2)->ip_blkno;
+
if (*bh1)
*bh1 = NULL;
if (*bh2)
*bh2 = NULL;
/* we always want to lock the one with the lower lockid first. */
- if (oip1->ip_blkno != oip2->ip_blkno) {
- if (oip1->ip_blkno < oip2->ip_blkno) {
+ if (id1 != id2) {
+ if (id1 < id2) {
/* switch id1 and id2 around */
LOG_TRACE_STR("switching them around...");
+ tmpid = id2;
+ id2 = id1;
+ id1 = tmpid;
+
+ tmptype = type2;
+ type2 = type1;
+ type1 = tmptype;
+
+ tmpflags = flags2;
+ flags2 = flags1;
+ flags1 = tmpflags;
+
tmpbh = bh2;
bh2 = bh1;
bh1 = tmpbh;
@@ -949,26 +996,29 @@
inode1 = tmpinode;
}
/* lock id2 */
- status = ocfs2_meta_lock(inode2, handle, bh2, 1);
+ status = ocfs_acquire_lock(osb, type2, flags2, bh2, inode2);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto bail;
}
+ ocfs_handle_add_lock(handle, type2, flags2, inode2);
}
/* lock id1 */
- status = ocfs2_meta_lock(inode1, handle, bh1, 1);
+ status = ocfs_acquire_lock(osb, type1, flags1,
+ bh1, inode1);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto bail;
}
+ ocfs_handle_add_lock(handle, type1, flags1, inode1);
bail:
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
} /* ocfs_double_lock */
#define PARENT_INO(buffer) \
((struct ocfs2_dir_entry *) ((char *) buffer + \
- le16_to_cpu(((struct ocfs2_dir_entry *) buffer)->rec_len)))->inode
+ le16_to_cpu(((struct ocfs2_dir_entry *) buffer)->bryce_rec_len)))->bryce_inode
/*
* ocfs_rename()
@@ -996,6 +1046,7 @@
struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
// this is the 1st dirent bh
nlink_t old_dir_nlink = old_dir->i_nlink, new_dir_nlink = new_dir->i_nlink;
+ struct buffer_head *lockbh = NULL;
LOG_SET_CONTEXT(RENAME);
@@ -1007,7 +1058,7 @@
osb = OCFS_SB(old_dir->i_sb);
if (new_inode) {
- if (!igrab(new_inode))
+ if (ocfs_inc_icount(new_inode) < 0)
BUG();
}
@@ -1027,7 +1078,11 @@
/* if old and new are the same, this'll just do one lock. */
status = ocfs_double_lock(osb, handle,
+ OCFS_LKM_EXMODE,
+ 0,
&old_dir_bh, old_dir,
+ OCFS_LKM_EXMODE,
+ 0,
&new_dir_bh, new_dir);
if (status < 0) {
LOG_ERROR_STATUS(status);
@@ -1051,17 +1106,20 @@
/* Directories actually require metadata updates to
* the directory info so we can't get away with not
* doing node locking on it. */
- status = ocfs2_meta_lock(old_inode, handle, NULL, 1);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE,
+ FLAG_RELEASE_DENTRY|FLAG_FILE_RENAME,
+ &lockbh, old_inode);
+ if (lockbh) {
+ brelse(lockbh);
+ lockbh = NULL;
}
-
- status = ocfs2_request_rename_vote(old_inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
}
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
+ FLAG_RELEASE_DENTRY|FLAG_FILE_RENAME,
+ old_inode);
status = -EIO;
old_inode_de_bh = ocfs_bread (old_inode, 0, &status, 0);
@@ -1079,7 +1137,7 @@
} else {
/* Ah, the simple case - we're a file so just send a
* message. */
- status = ocfs2_request_rename_vote(old_inode);
+ status = ocfs_notify_on_rename(osb, old_inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
@@ -1099,7 +1157,7 @@
* and merrily kill the link to whatever was created under the
* same name. Goodbye sticky bit ;-<
*/
- if (le64_to_cpu(old_de->inode) != OCFS_I(old_inode)->ip_blkno)
+ if (le64_to_cpu(old_de->bryce_inode) != OCFS_I(old_inode)->ip_blkno)
goto bail;
/* check if the target already exists (in which case we need
@@ -1130,18 +1188,16 @@
if (newfe_blkno != OCFS_I(new_inode)->ip_blkno)
BUG();
- status = ocfs2_meta_lock(new_inode, handle, &newfe_bh, 1);
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE,
+ FLAG_RELEASE_DENTRY, &newfe_bh,
+ new_inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
}
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
+ FLAG_RELEASE_DENTRY, new_inode);
- status = ocfs2_request_unlink_vote(new_inode);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
newfe = (ocfs2_dinode *) newfe_bh->b_data;
LOG_TRACE_ARGS("aha rename over existing... new_de=%p "
@@ -1191,7 +1247,7 @@
goto bail;
}
- if (S_ISDIR(new_inode->i_mode) || (newfe->i_links_count == 1)){
+ if (S_ISDIR(new_inode->i_mode) || (le16_to_cpu(newfe->bryce_i_links_count) == 1)){
status = ocfs_orphan_add(osb, handle, new_inode,
newfe, orphan_name,
orphan_entry_bh);
@@ -1208,7 +1264,7 @@
LOG_ERROR_STATUS (status);
goto bail;
}
- new_de->inode =
+ new_de->bryce_inode =
le64_to_cpu(OCFS_I(old_inode)->ip_blkno);
new_de->file_type = old_de->file_type;
new_dir->i_version++;
@@ -1219,9 +1275,9 @@
}
if (S_ISDIR (new_inode->i_mode))
- newfe->i_links_count = 0;
+ cpu_to_le16(newfe->bryce_i_links_count) = 0;
else
- newfe->i_links_count--;
+ cpu_to_le16(newfe->bryce_i_links_count)--;
status = ocfs_journal_dirty(handle, newfe_bh);
if (status < 0) {
@@ -1282,7 +1338,7 @@
new_dir_bh,
OCFS_JOURNAL_ACCESS_WRITE);
fe = (ocfs2_dinode *) new_dir_bh->b_data;
- fe->i_links_count = new_dir->i_nlink;
+ cpu_to_le16(fe->bryce_i_links_count) = new_dir->i_nlink;
status = ocfs_journal_dirty(handle, new_dir_bh);
}
}
@@ -1297,7 +1353,7 @@
status = ocfs_journal_access(handle, old_dir,
old_dir_bh, OCFS_JOURNAL_ACCESS_WRITE);
fe = (ocfs2_dinode *) old_dir_bh->b_data;
- fe->i_links_count = old_dir->i_nlink;
+ cpu_to_le16(fe->bryce_i_links_count) = old_dir->i_nlink;
status = ocfs_journal_dirty(handle, old_dir_bh);
}
}
@@ -1356,13 +1412,13 @@
int p_blocks;
int virtual, blocks, status, i, bytes_left;
- bytes_left = i_size_read(inode) + 1;
+ bytes_left = inode->i_size + 1;
/* we can't trust i_blocks because we're actually going to
* write i_size + 1 bytes. */
blocks = (bytes_left + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
LOG_ENTRY_ARGS("i_blocks = %lu, i_size = %llu, blocks = %d\n",
- inode->i_blocks, i_size_read(inode), blocks);
+ inode->i_blocks, inode->i_size, blocks);
/* Sanity check -- make sure we're going to fit. */
if (bytes_left > ocfs2_clusters_to_bytes(sb, OCFS_I(inode)->ip_clusters)) {
@@ -1371,7 +1427,7 @@
goto bail;
}
- bhs = kmalloc(sizeof(struct buffer_head *) * blocks, GFP_KERNEL);
+ bhs = ocfs_malloc(sizeof(struct buffer_head *) * blocks);
if (!bhs) {
status = -ENOMEM;
LOG_ERROR_STATUS(status);
@@ -1444,7 +1500,7 @@
}
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
} /* ocfs_create_symlink_data */
/*
@@ -1453,17 +1509,19 @@
*/
static int ocfs_symlink (struct inode *dir, struct dentry *dentry, const char *symname)
{
- int status, l, credits;
- u64 newsize;
ocfs_super *osb = NULL;
struct inode *inode = NULL;
+ __u64 newsize;
+ int status;
struct super_block *sb;
+ int l;
struct buffer_head *new_fe_bh = NULL;
struct buffer_head *de_bh = NULL;
struct buffer_head *parent_fe_bh = NULL;
ocfs2_dinode *fe = NULL;
ocfs2_dinode *dirfe;
ocfs_journal_handle *handle = NULL;
+ int credits;
ocfs2_alloc_context *inode_ac = NULL;
ocfs2_alloc_context *data_ac = NULL;
@@ -1476,8 +1534,6 @@
sb = dir->i_sb;
osb = OCFS_SB(sb);
- l = strlen(symname) + 1;
-
inode = new_inode (sb);
if (IS_ERR (inode)) {
status = PTR_ERR(inode);
@@ -1493,6 +1549,9 @@
goto bail;
}
+ l = strlen (symname) + 1;
+ newsize = l - 1;
+
credits = ocfs_calc_symlink_credits(sb);
handle = ocfs_alloc_handle(osb);
@@ -1502,15 +1561,17 @@
}
/* lock the parent directory */
- status = ocfs2_meta_lock(dir, handle, &parent_fe_bh, 1);
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, 0,
+ &parent_fe_bh, dir);
if (status < 0) {
if (status != -EINTR)
LOG_ERROR_STATUS (status);
goto bail;
}
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 0, dir);
dirfe = (ocfs2_dinode *) parent_fe_bh->b_data;
- if (!dirfe->i_links_count) {
+ if (!le32_to_cpu(dirfe->bryce_i_links_count)) {
/* can't make a file in a deleted directory. */
status = -ENOENT;
goto bail;
@@ -1530,13 +1591,10 @@
goto bail;
}
- /* don't reserve bitmap space for fast symlinks. */
- if (l > ocfs2_fast_symlink_chars(sb)) {
- status = ocfs_reserve_clusters(osb, handle, 1, &data_ac);
- if (status < 0) {
- LOG_ERROR_STATUS (status = -ENOMEM);
- goto bail;
- }
+ status = ocfs_reserve_clusters(osb, handle, 1, &data_ac);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status = -ENOMEM);
+ goto bail;
}
handle = ocfs_start_trans(osb, handle, credits);
@@ -1560,37 +1618,29 @@
LOG_ERROR_ARGS("populate inode failed! bh->b_blocknr=%llu, "
"i_blkno=%llu, i_ino=%lu\n",
(unsigned long long)new_fe_bh->b_blocknr,
- fe->i_blkno, inode->i_ino);
+ le64_to_cpu(fe->bryce_i_blkno), inode->i_ino);
BUG();
}
ocfs_inode_set_new(osb, inode);
- status = ocfs2_create_new_inode_locks(inode);
+ ocfs_init_lockres(osb, inode);
+ status = ocfs_update_lockres(osb, new_fe_bh, inode, 0);
if (status < 0)
LOG_ERROR_STATUS(status);
- inode->i_rdev = 0;
- newsize = l - 1;
- if (l > ocfs2_fast_symlink_chars(sb)) {
- inode->i_op = &ocfs_symlink_inode_operations;
- status = ocfs_extend_allocation(osb, inode, 1, new_fe_bh,
- handle, data_ac, NULL, NULL);
- if (status < 0) {
- if (status != -ENOSPC && status != -EINTR) {
- LOG_ERROR_ARGS ("Failed to extend file to %llu", newsize);
- LOG_ERROR_STATUS(status);
- status = -ENOSPC;
- }
- goto bail;
+ status = ocfs_extend_allocation(osb, inode, 1, new_fe_bh, handle,
+ data_ac, NULL, NULL);
+ if (status < 0) {
+ if (status != -ENOSPC && status != -EINTR) {
+ LOG_ERROR_ARGS ("Failed to extend file to %llu", newsize);
+ LOG_ERROR_STATUS(status);
+ status = -ENOSPC;
}
- i_size_write(inode, newsize);
- inode->i_blocks = (newsize + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
- } else {
- inode->i_op = &ocfs_fast_symlink_inode_operations;
- memcpy((char *) fe->id2.i_symlink, symname, l);
- i_size_write(inode, newsize);
- inode->i_blocks = 0;
+ goto bail;
}
+ inode->i_rdev = 0;
+ inode->i_size = newsize;
+ inode->i_blocks = (newsize + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
status = ocfs_mark_inode_dirty(handle, inode, new_fe_bh);
if (status < 0) {
@@ -1598,15 +1648,13 @@
goto bail;
}
- if (!ocfs2_inode_is_fast_symlink(inode)) {
- status = ocfs_create_symlink_data(osb, handle, inode, symname);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto bail;
- }
+ status = ocfs_create_symlink_data(osb, handle, inode, symname);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto bail;
}
- status = ocfs_add_entry(handle, dentry, inode, fe->i_blkno,
+ status = ocfs_add_entry(handle, dentry, inode, le64_to_cpu(fe->bryce_i_blkno),
parent_fe_bh, de_bh);
if (status < 0) {
LOG_ERROR_STATUS (status);
@@ -1640,7 +1688,7 @@
int ocfs_check_dir_entry (struct inode * dir, struct ocfs2_dir_entry * de, struct buffer_head * bh, unsigned long offset)
{
const char * error_msg = NULL;
- const int rlen = le16_to_cpu(de->rec_len);
+ const int rlen = le16_to_cpu(de->bryce_rec_len);
if (rlen < OCFS2_DIR_REC_LEN(1))
error_msg = "rec_len is smaller than minimal";
@@ -1655,7 +1703,7 @@
LOG_ERROR_ARGS("bad entry in directory #%llu: %s - "
"offset=%lu, inode=%llu, rec_len=%d, name_len=%d",
OCFS_I(dir)->ip_blkno, error_msg, offset,
- le64_to_cpu(de->inode),
+ le64_to_cpu(de->bryce_inode),
rlen, de->name_len);
return error_msg == NULL ? 1 : 0;
}
@@ -1702,29 +1750,29 @@
retval = -EEXIST;
goto bail;
}
- if (((le64_to_cpu(de->inode) == 0) &&
- (le16_to_cpu(de->rec_len) >= rec_len)) ||
- (le16_to_cpu(de->rec_len) >=
+ if (((le64_to_cpu(de->bryce_inode) == 0) &&
+ (le16_to_cpu(de->bryce_rec_len) >= rec_len)) ||
+ (le16_to_cpu(de->bryce_rec_len) >=
(OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) {
status = ocfs_journal_access(handle, dir, insert_bh,
OCFS_JOURNAL_ACCESS_WRITE);
/* By now the buffer is marked for journaling */
- offset += le16_to_cpu(de->rec_len);
- if (le64_to_cpu(de->inode)) {
+ offset += le16_to_cpu(de->bryce_rec_len);
+ if (le64_to_cpu(de->bryce_inode)) {
de1 = (struct ocfs2_dir_entry *) ((char *) de +
OCFS2_DIR_REC_LEN(de->name_len));
- de1->rec_len =
- cpu_to_le16(le16_to_cpu(de->rec_len) -
+ de1->bryce_rec_len =
+ cpu_to_le16(le16_to_cpu(de->bryce_rec_len) -
OCFS2_DIR_REC_LEN(de->name_len));
- de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
+ de->bryce_rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
de = de1;
}
de->file_type = OCFS2_FT_UNKNOWN;
if (blkno) {
- de->inode = cpu_to_le64(blkno);
+ de->bryce_inode = cpu_to_le64(blkno);
ocfs_set_de_type(de, inode->i_mode);
} else
- de->inode = 0;
+ le64_to_cpu(de->bryce_inode) = 0;
de->name_len = namelen;
memcpy (de->name, name, namelen);
@@ -1734,8 +1782,8 @@
retval = 0;
goto bail;
}
- offset += le16_to_cpu(de->rec_len);
- de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
+ offset += le16_to_cpu(de->bryce_rec_len);
+ de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->bryce_rec_len));
}
/* when you think about it, the assert above should prevent us
@@ -1775,17 +1823,17 @@
goto bail;
}
if (pde)
- pde->rec_len = cpu_to_le16(le16_to_cpu(pde->rec_len) +
- le16_to_cpu(de->rec_len));
+ pde->bryce_rec_len = cpu_to_le16(le16_to_cpu(pde->bryce_rec_len) +
+ le16_to_cpu(de->bryce_rec_len));
else
- de->inode = 0;
+ cpu_to_le64(de->bryce_inode) = 0;
dir->i_version++;
status = ocfs_journal_dirty(handle, bh);
goto bail;
}
- i += le16_to_cpu(de->rec_len);
+ i += le16_to_cpu(de->bryce_rec_len);
pde = de;
- de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len));
+ de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->bryce_rec_len));
}
bail:
LOG_EXIT_STATUS(status);
@@ -1826,7 +1874,7 @@
goto bail;
}
/* prevent looping on a bad block */
- de_len = le16_to_cpu(de->rec_len);
+ de_len = le16_to_cpu(de->bryce_rec_len);
if (de_len <= 0) {
ret = -1;
goto bail;
@@ -1862,7 +1910,7 @@
*res_dir = NULL;
sb = dir->i_sb;
- nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
+ nblocks = dir->i_size >> sb->s_blocksize_bits;
start = OCFS_I(dir)->ip_dir_start_lookup;
if (start >= nblocks)
start = 0;
@@ -1926,7 +1974,7 @@
* search the last part of the directory before giving up.
*/
block = nblocks;
- nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
+ nblocks = dir->i_size >> sb->s_blocksize_bits;
if (block < nblocks) {
start = 0;
goto restart;
@@ -1972,7 +2020,7 @@
*retval = name;
bail:
LOG_EXIT_STATUS(namelen);
- return namelen;
+ return(namelen);
}
static int ocfs_prepare_orphan_dir(ocfs_super *osb,
@@ -2007,11 +2055,14 @@
}
ocfs_handle_add_inode(handle, orphan_dir_inode);
- status = ocfs2_meta_lock(orphan_dir_inode, handle, &orphan_dir_bh, 1);
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, 0,
+ &orphan_dir_bh, orphan_dir_inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto leave;
}
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 0,
+ orphan_dir_inode);
status = ocfs_prepare_dir_for_insert(osb, orphan_dir_inode,
orphan_dir_bh, name, namelen,
@@ -2033,7 +2084,7 @@
brelse(orphan_dir_bh);
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
}
/*
@@ -2082,8 +2133,8 @@
* underneath us... */
orphan_fe = (ocfs2_dinode *) orphan_dir_bh->b_data;
if (S_ISDIR(inode->i_mode))
- orphan_fe->i_links_count++;
- orphan_dir_inode->i_nlink = orphan_fe->i_links_count;
+ cpu_to_le16(orphan_fe->bryce_i_links_count)++;
+ orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->bryce_i_links_count);
status = ocfs_journal_dirty(handle, orphan_dir_bh);
if (status < 0) {
@@ -2099,7 +2150,7 @@
goto leave;
}
- fe->i_flags |= OCFS2_ORPHANED_FL;
+ cpu_to_le32(fe->bryce_i_flags) |= OCFS2_ORPHANED_FL;
leave:
if (orphan_dir_inode)
iput(orphan_dir_inode);
@@ -2108,7 +2159,7 @@
brelse(orphan_dir_bh);
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
} /* ocfs_orphan_add */
/* unlike orphan_add, we expect the orphan dir to already be locked here. */
@@ -2173,8 +2224,8 @@
/* do the i_nlink dance! :) */
orphan_fe = (ocfs2_dinode *) orphan_dir_bh->b_data;
if (S_ISDIR(inode->i_mode))
- orphan_fe->i_links_count--;
- orphan_dir_inode->i_nlink = orphan_fe->i_links_count;
+ cpu_to_le16(orphan_fe->bryce_i_links_count)--;
+ orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->bryce_i_links_count);
status = ocfs_journal_dirty(handle, orphan_dir_bh);
if (status < 0) {
Modified: branches/endian/src/namei.h
===================================================================
--- branches/endian/src/namei.h 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/namei.h 2005-01-18 21:41:46 UTC (rev 1789)
@@ -41,7 +41,7 @@
{
if (len != de->name_len)
return 0;
- if (!de->inode)
+ if (!le64_to_cpu(de->bryce_inode))
return 0;
return !memcmp(name, de->name, len);
}
Modified: branches/endian/src/ocfs.h
===================================================================
--- branches/endian/src/ocfs.h 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/ocfs.h 2005-01-18 21:41:46 UTC (rev 1789)
@@ -42,12 +42,37 @@
# include <linux/tqueue.h>
#endif
-#include <cluster/util.h>
-#include <cluster/dlmcommon.h>
-#include <cluster/nodemanager.h>
-#include <cluster/tcp.h>
-#include <cluster/dlmmod.h>
+enum
+{
+ OCFS_VOTE_REQUEST = 1,
+ OCFS_VOTE_REPLY,
+ OCFS_INFO_DISMOUNT
+};
+enum {
+ DISK_VOTE,
+ COMM_VOTE
+};
+
+enum {
+ INVALID_REQUEST, // reply with a NO vote
+ UPDATE_OIN_INODE, // update both oin and inode
+ DELETE_ACQUIRE,// delete or rename acquire request
+ DELETE_RELEASE,// delete or rename release request
+ CHANGE_MASTER, // request to change master to requestor
+ NOT_MASTER, // I am not master, retry
+ REMASTER_THIS, // remaster lock to me
+ REMASTER_REQUESTOR, // remaster lock to requestor
+ DROP_READONLY, // RO cachelock needs to convert to RW
+ READONLY, // a RW or RO cachelock, requesting RO
+ RELEASE_DENTRY,
+ TRUNCATE_PAGES // truncate page caches of a file
+};
+
+#define OCFS_MAX_DLM_PKT_SIZE 256
+#define OCFS_DLM_MAX_MSG_SIZE 256
+#define OCFS_DLM_MSG_MAGIC 0x79677083
+
/* convenience macro */
#define OCFS_ASSERT(x) do { if (!(x)) BUG(); } while (0)
@@ -60,17 +85,143 @@
} while (0)
+#define BITCOUNT(x) (((BX_(x)+(BX_(x)>>4)) & 0x0F0F0F0F) % 255)
+#define BX_(x) ((x) - (((x)>>1)&0x77777777) \
+ - (((x)>>2)&0x33333333) \
+ - (((x)>>3)&0x11111111))
+
+
#define OCFS_CURRENT_TIME ocfs_get_seconds(CURRENT_TIME)
#define OCFS_SET_INODE_TIME(i, x, y) (ocfs_get_seconds(i->x) = (y))
+
+#define MISS_COUNT_WARNING 20
+#define MISS_COUNT_EMERGENCY 40
+#define MISS_COUNT_NODE_DEAD 60
+
+/*
+** The following flag values reflect the operation to be performed
+** by ocfs_create_modify_file
+*/
+// FILEFLAG MASK
+#define FLAG_ACQUIRE_LOCK 0x00000001
+#define FLAG_RELEASE_LOCK 0x00000002
+#define FLAG_FILE_EXTEND 0x00000004
+#define FLAG_FILE_DELETE 0x00000008
+#define FLAG_FILE_RENAME 0x00000010
+#define FLAG_FILE_RECOVERY 0x00000020
+#define FLAG_FILE_UPDATE_OIN 0x00000040
+#define FLAG_RELEASE_DENTRY 0x00000080
+#define FLAG_CHANGE_MASTER 0x00000100
+#define FLAG_DIR 0x00000200
+#define FLAG_REMASTER 0x00000400
+#define FLAG_FAST_PATH_LOCK 0x00000800
+#define FLAG_TRUNCATE_PAGES 0x00001000
+#define FLAG_FILE_TRUNCATE 0x00002000
+#define FLAG_DROP_READONLY 0x00004000
+#define FLAG_READONLY 0x00008000
+#define FLAG_FILE_UNUSED01 0x00010000
+#define FLAG_FILE_UNUSED02 0x00020000
+#define FLAG_FILE_UNUSED03 0x00040000
+#define FLAG_FILE_UNUSED04 0x00080000
+#define FLAG_FILE_UNUSED05 0x00100000
+#define FLAG_FILE_UNUSED06 0x00200000
+#define FLAG_FILE_UNUSED07 0x00400000
+#define FLAG_FILE_UNUSED08 0x00800000
+#define FLAG_FILE_UNUSED09 0x01000000
+#define FLAG_FILE_UNUSED10 0x02000000
+#define FLAG_FILE_UNUSED11 0x04000000
+#define FLAG_FILE_UNUSED12 0x08000000
+#define FLAG_FILE_UNUSED13 0x10000000
+#define FLAG_FILE_UNUSED14 0x20000000
+#define FLAG_FILE_UNUSED15 0x40000000
+#define FLAG_FILE_UNUSED16 0x80000000
+
#define OCFS_MAX_OSB_ID 65536
+
+#define HEARTBEAT_METHOD_DISK (1)
+#define HEARTBEAT_METHOD_IPC (2)
+
+
+enum
+{
+ LEFT_NO_OVERLAP,
+ LEFT_ADJACENT,
+ LEFT_OVERLAP,
+ FULLY_CONTAINED,
+ FULLY_CONTAINING,
+ RIGHT_OVERLAP,
+ RIGHT_ADJACENT,
+ RIGHT_NO_OVERLAP
+};
+
+
+/*
+** Extents Defines
+*/
+
+typedef enum _ocfs_ext_flag {
+ LOCAL_EXT = 1,
+ NONLOCAL_EXT = 2
+} ocfs_ext_flag;
+
+/* The following are standard DLM lock types, of which we currently
+ * only use a couple. */
+#define OCFS_LKM_NLMODE (0) /* null lock */
+#define OCFS_LKM_CRMODE (1) /* concurrent read */
+#define OCFS_LKM_CWMODE (2) /* concurrent write */
+#define OCFS_LKM_PRMODE (3) /* protected read */
+#define OCFS_LKM_PWMODE (4) /* protected write */
+#define OCFS_LKM_EXMODE (5) /* exclusive */
+
#define OCFS_INVALID_NODE_NUM -1
+/* lockres->lock_state bits */
+enum {
+ LOCK_STATE_READONLY,
+ LOCK_STATE_READONLY_DROPPING,
+ LOCK_STATE_BLOCK_EXCLUSIVE,
+ LOCK_STATE_BLOCK_READONLY
+};
+
+enum {
+ NO_LOCK=0,
+ READ_LOCK,
+ WRITE_LOCK
+};
+
+
+
+/* osb->osb_flags flags */
+#define OCFS_OSB_FLAGS_BEING_DISMOUNTED (0x00000004)
+#define OCFS_OSB_FLAGS_SHUTDOWN (0x00000008)
+#define OCFS_OSB_FLAGS_INITIALIZED (0x00000020)
+
/* OcfsGlobalCtxt.flags flags */
#define OCFS_FLAG_GLBL_CTXT_RESOURCE_INITIALIZED (0x00000001)
#define OCFS_FLAG_MEM_LISTS_INITIALIZED (0x00000002)
+#define OCFS_FLAG_SHUTDOWN_VOL_THREAD (0x00000004)
+/*
+** Information on Publish sector of each node
+*/
+#define DISK_HBEAT_COMM_ON 20 /* in the order of 5 secs */
+#define DISK_HBEAT_NO_COMM 4 /* in the order of 1 sec */
+#define DISK_HBEAT_INVALID 0 /* in the order of 100ms */
+
+
+/*
+** Information on Vote sector of each node
+*/
+// VOTEFLAG MASK
+#define FLAG_VOTE_NODE 0x1
+#define FLAG_VOTE_OIN_UPDATED 0x2
+#define FLAG_VOTE_OIN_ALREADY_INUSE 0x4
+#define FLAG_VOTE_UPDATE_RETRY 0x8
+#define FLAG_VOTE_FILE_DEL 0x10
+
+
#define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGHUP) | \
sigmask(SIGINT) | sigmask(SIGQUIT))
@@ -78,12 +229,21 @@
#define OCFS_LINUX_MAX_FILE_SIZE 9223372036854775807LL
+#define OCFS_VOLCFG_LOCK_ITERATE (HZ/10) /* in jiffies */
+#define OCFS_VOLCFG_LOCK_TIME 1000 /* in ms */
+#define OCFS_VOLCFG_HDR_SECTORS 2 /* in sectors */
+#define OCFS_VOLCFG_NEWCFG_SECTORS 4 /* in sectors */
+
+#define OCFS_NM_HEARTBEAT_TIME 500 /* in ms */
+#define OCFS_HEARTBEAT_INIT 10 /* number of NM iterations to stabilize the publish map */
+
#ifndef O_DIRECT
#warning this depends on the architecture!
#define O_DIRECT 040000
#endif
+/* sm - ocfs 1.0 fails to set fe->sig for dirs */
#define IS_VALID_FILE_ENTRY(ptr) \
(!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE))
#define IS_VALID_EXTENT_BLOCK(ptr) \
@@ -92,9 +252,27 @@
(!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE))
/*
+** Macros
+*/
+#define OCFS_SET_FLAG(flag, value) ((flag) |= (value))
+#define OCFS_CLEAR_FLAG(flag, value) ((flag) &= ~(value))
+
+
+/*
** Structures...
*/
+
+static inline void *ocfs_malloc(size_t size)
+{
+ void *p = kmalloc(size, GFP_KERNEL);
+ if (p)
+ memset(p, 0, size);
+ return p;
+}
+
+
+
#define OCFS_NAME "OCFS2"
/* ioctl commands */
@@ -104,21 +282,51 @@
#define OCFS_SB(sb) ((ocfs_super *)OCFS_GENERIC_SB_MEMBER(sb))
#define OCFS2_SB(sb) ((ocfs_super *)OCFS_GENERIC_SB_MEMBER(sb))
+#define OCFS_IPC_DEFAULT_PORT 7001
+
+
+#define OCFS_IPC_DLM_VERSION 0x0201
+
+
+/* =========================================================== */
+
/* This totally sucks that we have to include these here
* FIXME: Make them seperately includable. */
#include "ocfs2_fs.h"
+#include "ocfs2_disk_dlm.h"
+typedef struct _BARF_BARF_BARF
+{
+ char node_name[MAX_NODE_NAME_LENGTH];
+ ocfs_guid guid;
+ ocfs_ipc_config_info ipc_config;
+}
+BARF_BARF_BARF;
+
+
typedef struct _ocfs_super ocfs_super;
+typedef struct _ocfs_lock_res ocfs_lock_res;
/* this limits us to 256 nodes
* if we need more, we can do a kmalloc for the map */
#define OCFS_NODE_MAP_MAX_NODES 256
typedef struct _ocfs_node_map
{
- u16 num_nodes;
+ __u16 num_nodes;
unsigned long map[BITS_TO_LONGS(OCFS_NODE_MAP_MAX_NODES)];
} ocfs_node_map;
+struct _ocfs_lock_res
+{
+ __s16 master_node_num; /* Master Node */
+ __u32 lock_holders;
+ __u32 uncommitted_holders;
+ __u8 lock_type;
+ struct rw_semaphore lock;
+ unsigned long readonly_state;
+ ocfs_node_map readonly_map;
+};
+
struct _ocfs_journal_handle;
/* I hate our includes */
@@ -127,65 +335,6 @@
struct rb_root em_extents;
};
-enum ocfs2_ast_action {
- OCFS2_AST_INVALID = 0,
- OCFS2_AST_ATTACH,
- OCFS2_AST_CONVERT,
- OCFS2_AST_DOWNCONVERT,
-};
-
-/* actions for an unlockast function to take. */
-enum ocfs2_unlock_action {
- OCFS2_UNLOCK_INVALID = 0,
- OCFS2_UNLOCK_CANCEL_CONVERT,
- OCFS2_UNLOCK_DROP_LOCK,
-};
-
-enum ocfs2_lock_type {
- OCFS_TYPE_META = 0,
- OCFS_TYPE_DATA,
- OCFS_TYPE_SUPER,
- OCFS_NUM_LOCK_TYPES
-};
-
-/* ocfs2_lock_res->l_flags flags. */
-#define OCFS2_LOCK_ATTACHED (0x00000001) /* have we initialized
- * the lvb */
-#define OCFS2_LOCK_BUSY (0x00000002) /* we are currently in
- * dlm_lock */
-#define OCFS2_LOCK_BLOCKED (0x00000004) /* blocked waiting to
- * downconvert*/
-#define OCFS2_LOCK_LOCAL (0x00000008) /* newly created inode */
-#define OCFS2_LOCK_NEEDS_REFRESH (0x00000010)
-#define OCFS2_LOCK_REFRESHING (0x00000020)
-
-struct ocfs2_lock_res_ops;
-
-typedef struct _ocfs2_lock_res {
- void *l_priv;
- struct ocfs2_lock_res_ops *l_ops;
- spinlock_t l_lock;
-
- struct list_head l_blocked_list;
-
- enum ocfs2_lock_type l_type;
- int l_flags;
- char *l_name;
- int l_level;
- unsigned int l_ro_holders;
- unsigned int l_ex_holders;
- dlm_lockstatus l_lksb;
- u32 l_local_seq;
-
- /* used from AST/BAST funcs. */
- enum ocfs2_ast_action l_action;
- enum ocfs2_unlock_action l_unlock_action;
- int l_requested;
- int l_blocking;
-
- wait_queue_head_t l_event;
-} ocfs2_lock_res;
-
/* OCFS2 Inode Private Data */
typedef struct _ocfs_inode_private
{
@@ -195,20 +344,19 @@
u64 ip_blkno;
- ocfs2_lock_res ip_meta_lockres;
- ocfs2_lock_res ip_data_lockres;
-
/* protects allocation changes on this inode. */
struct rw_semaphore ip_alloc_sem;
/* These fields are protected by ip_lock */
spinlock_t ip_lock;
- u32 ip_open_cnt;
+ __u32 ip_open_cnt;
u32 ip_clusters;
u64 ip_mmu_private;
+ __u32 ip_open_flags;
struct ocfs2_extent_map ip_map;
- struct list_head ip_io_markers;
+ atomic_t ip_needs_verification;
+
struct semaphore ip_io_sem;
/* Used by the journalling code to attach an inode to a
@@ -218,21 +366,40 @@
struct list_head ip_handle_list;
struct _ocfs_journal_handle *ip_handle;
+ /* ip_node_extend_sem locks out extends on behalf of other nodes. */
+ struct rw_semaphore ip_node_extend_sem;
+
+ struct list_head ip_recovery_list;/* protected by recovery_list_sem */
+ __s32 ip_num_extends; /* protected by oin_num_ext_lock */
+
atomic_t ip_clean_buffer_seq;
- u32 ip_flags; /* see below */
+ __u32 ip_flags; /* see below */
/* protected by recovery_lock. */
struct inode *ip_next_orphan;
- u32 ip_dir_start_lookup;
+ ocfs_lock_res ip_lockres;
+ __u32 ip_dir_start_lookup;
- /* next two are protected by trans_inc_lock */
- /* which transaction were we created on? Zero if none. */
+ /* ip_pending_locks and ip_j_inode are protected by the
+ * journals cmt_lock.
+ * ip_pending_locks: disk locks for this inode which have to be
+ * released once their transaction checkpoints
+ * ip_j_inode: list_head for journal->committing_inodes. */
+ struct list_head ip_pending_locks;
+ struct list_head ip_j_inode;
+
+ /* protected by trans_inc_lock, which transaction were we
+ * created on? Zero if none. */
unsigned long ip_created_trans;
- /* last transaction we were a part of. */
- unsigned long ip_last_trans;
} ocfs_inode_private;
+/* Eventually, the 'flags' and 'open_flags' fields need to be
+ * merged. */
+/* open flags */
+#define OCFS_OIN_OPEN_FOR_DIRECTIO (0x00000001)
+#define OCFS_IN_FIRST_OPEN (0x00000002)
+
/* 'flags' flags. */
/* has this inode been deleted, either from this node or from another node. */
#define OCFS_INODE_DELETED 0x00000001
@@ -244,9 +411,6 @@
#define OCFS_INODE_SYSTEM_FILE 0x00000008
/* are we going to let another node deal with deletion of this inode? */
#define OCFS_INODE_SKIP_DELETE 0x00000010
-#define OCFS_INODE_IN_REFRESH 0x00000020
-#define OCFS_INODE_BITMAP 0x00000040
-#define OCFS_INODE_OPEN_DIRECT 0x00000080
#define OCFS_I(i) ((ocfs_inode_private *)(i->u.generic_ip))
@@ -260,6 +424,9 @@
#define SET_INODE_JOURNAL(i) (OCFS_I(i)->ip_flags |= OCFS_INODE_JOURNAL)
#define CLEAR_INODE_JOURNAL(i) (OCFS_I(i)->ip_flags &= (~OCFS_INODE_JOURNAL))
+
+#define GET_INODE_LOCKRES(i) ({ if (i==NULL) BUG(); (&(OCFS_I(i)->ip_lockres)); })
+
typedef enum _ocfs_vol_state
{
VOLUME_INIT = 0,
@@ -269,12 +436,32 @@
}
ocfs_vol_state;
+typedef struct _ocfs_vol_node_map
+{
+ __u64 time;
+ __u32 miss_cnt;
+ atomic_t dismount;
+}
+ocfs_vol_node_map;
+
typedef struct _ocfs_commit_task
{
struct completion c_complete;
struct task_struct *c_task;
} ocfs_commit_task;
+typedef struct _ocfs_dlm_stats
+{
+ atomic_t total;
+ atomic_t okay;
+ atomic_t etimedout;
+ atomic_t efail;
+ atomic_t eagain;
+ atomic_t enoent;
+ atomic_t def;
+}
+ocfs_dlm_stats;
+
typedef struct _ocfs_alloc_stats
{
atomic_t moves;
@@ -288,7 +475,6 @@
} ocfs_alloc_stats;
struct _ocfs_journal;
-struct _ocfs2_slot_info;
/*
* ocfs_super
@@ -297,31 +483,37 @@
*/
struct _ocfs_super
{
+ struct semaphore osb_res; /* resource to protect the ocfs_super */
struct list_head osb_next; /* list of ocfs_super(s) */
- u32 osb_id; /* id used by the proc interface */
+ __u32 osb_id; /* id used by the proc interface */
+ struct completion dlm_complete;
+ struct task_struct *dlm_task;
ocfs_commit_task *commit;
+ __u32 osb_flags;
+ ocfs_node_map publ_map;
struct super_block *sb;
struct inode *root_inode;
struct inode *sys_root_inode;
struct inode *system_inodes[NUM_SYSTEM_INODES];
-
- struct _ocfs2_slot_info *slot_info;
-
- spinlock_t node_map_lock;
- ocfs_node_map mounted_map;
- ocfs_node_map recovery_map;
- ocfs_node_map umount_map;
-
+
/* new */
u32 num_clusters;
u64 root_blkno;
u64 system_dir_blkno;
u64 bitmap_blkno;
u32 bitmap_cpg;
+ u64 publish_blkno;
+ u32 publish_blocks;
+ u64 vote_blkno;
+ u32 vote_blocks;
+ u64 autoconfig_blkno;
+ u32 autoconfig_blocks;
+ u64 new_autoconfig_blkno;
+ u32 new_autoconfig_blocks;
+ u32 total_autoconfig_blocks;
u8 *uuid;
u8 *vol_label;
u64 first_cluster_group_blkno;
- u32 fs_generation;
u32 s_feature_compat;
u32 s_feature_incompat;
@@ -330,101 +522,398 @@
spinlock_t s_next_gen_lock;
u32 s_next_generation;
+ ocfs_vol_node_map *vol_node_map;
+ struct semaphore cfg_lock;
+ BARF_BARF_BARF **node_cfg_info;
+ __u64 cfg_seq_num;
+ int cfg_initialized;
u16 max_nodes;
+ u16 num_cfg_nodes;
u16 num_nodes;
s16 node_num;
- s16 slot_num;
int reclaim_id; /* reclaim the original node number*/
+ __u32 hbt;
int s_sectsize_bits;
int s_clustersize;
int s_clustersize_bits;
+ int needs_flush;
struct proc_dir_entry *proc_sub_dir; /* points to /proc/fs/ocfs2/<maj_min> */
atomic_t vol_state;
+ struct semaphore orphan_recovery_lock;
struct semaphore recovery_lock;
- int recovery_launched;
+ spinlock_t recovery_map_lock;
+ ocfs_node_map recovery_map;
int disable_recovery;
- wait_queue_head_t checkpoint_event;
- atomic_t needs_checkpoint;
+ atomic_t num_recovery_threads;
+ struct timer_list lock_timer;
+ atomic_t lock_stop;
+ wait_queue_head_t lock_event;
+ atomic_t lock_event_woken;
+ struct semaphore comm_lock; /* protects ocfs_comm_process_vote_reply */
+ atomic_t nm_init;
+ wait_queue_head_t nm_init_event;
+ __u32 prealloc_lock;
+ struct buffer_head **autoconfig_bhs;
+ struct semaphore publish_lock; /* protects r/w to publish sector */
+ atomic_t node_req_vote; /* set when node's vote req pending */
+ int publish_dirty;
+ wait_queue_head_t flush_event;
+ atomic_t flush_event_woken;
struct _ocfs_journal *journal;
atomic_t clean_buffer_seq;
spinlock_t clean_buffer_lock;
-
+ struct list_head *lock_recovery_lists;
+ __u64 *last_publ_seq_num;
int have_local_alloc;
struct buffer_head *local_alloc_bh;
-
- /* Next two fields are for local node slot recovery during
- * mount. */
- int dirty;
- ocfs2_dinode *local_alloc_copy;
-
+ __u8 check_mounted; /* tell nm to check mounted flag, protected by publish_lock*/
+ ocfs_dlm_stats net_reqst_stats; /* stats of netdlm vote requests */
+ ocfs_dlm_stats net_reply_stats; /* stats of netdlm vote reponses */
ocfs_alloc_stats alloc_stats;
char dev_str[20]; /* "major,minor" of the device */
+ struct semaphore vote_sem; /* protects calls to ocfs_process_vote */
+ struct list_head vote_obj_queue;
+ spinlock_t vote_obj_queue_lock;
+ unsigned long voting_ino; /* only safe from the process_vote pid */
+ wait_queue_head_t open_event;
+};
- char *group_name;
- struct inode *group_inode;
- dlm_ctxt *dlm;
- ocfs2_lock_res super_lockres;
+typedef struct _ocfs_comm_info
+{
+ u16 ip_version; /* IP version in NBO */
+ u16 ip_port; /* IP port in NBO */
+ union {
+ __u32 ip_addr4; /* IPv4 address in NBO */
+ __u32 ip_addr6[4]; /* IPv6 address in NBO */
+ } addr_u;
+} ocfs_comm_info;
- wait_queue_head_t recovery_event;
-
- spinlock_t vote_task_lock;
- struct task_struct *vote_task;
- wait_queue_head_t vote_event;
- atomic_t wake_vote_task;
- int vote_exit;
-
- struct list_head blocked_lock_list;
- unsigned long blocked_lock_count;
-
- struct list_head vote_list;
- int vote_count;
-
- struct completion vote_event_complete;
- struct completion vote_event_init;
-
- u32 net_key;
- spinlock_t net_response_lock;
- unsigned int net_response_ids;
- struct list_head net_response_list;
-};
-
typedef struct _ocfs_global_ctxt
{
struct semaphore global_res;
struct list_head osb_next; /* List of all volumes */
kmem_cache_t *inode_cache;
kmem_cache_t *lock_cache;
- u32 flags;
+ __u32 flags;
+ __s16 pref_node_num; /* preferred... osb has the real one */
+ ocfs_guid guid; /* uniquely identifies a node */
char *node_name; /* human readable node identification */
+ char *cluster_name; /* unused */
+ ocfs_comm_info comm_info; /* ip address, etc for listener */
+ int comm_info_read; /* ipc info loaded from config file */
+ spinlock_t comm_seq_lock; /* protects comm_seq_num */
+ __u64 comm_seq_num; /* local node seq num used in ipcdlm */
+ ocfs_dlm_stats net_reqst_stats; /* stats of netdlm vote requests */
+ ocfs_dlm_stats net_reply_stats; /* stats of netdlm vote reponses */
struct proc_dir_entry *proc_root_dir; /* points to /proc/fs/ocfs2 */
}
ocfs_global_ctxt;
+typedef struct _ocfs_ipc_ctxt
+{
+ __u32 dlm_msg_size;
+ __u16 version;
+ int init;
+ struct socket *send_sock;
+ struct socket *recv_sock;
+ struct completion complete;
+ struct task_struct *task;
+}
+ocfs_ipc_ctxt;
+
+
+extern ocfs_ipc_ctxt OcfsIpcCtxt;
+
+typedef struct _ocfs_ipc_dlm_config
+{
+ __u16 version;
+ __u32 msg_size;
+ __u32 num_recv_threads;
+}
+ocfs_ipc_dlm_config;
+
/*
** Globals ...
*/
extern ocfs_global_ctxt OcfsGlobalCtxt;
+
+/*
+ * DLM network stuff
+ */
+typedef struct _ocfs_dlm_msg_hdr
+{
+ __u64 lock_id;
+ __u64 lock_seq_num;
+ __u32 flags;
+ __u8 odmh_pad[4];
+ __u32 num_ident; /* number of identical messages, always >= 1 */
+} ocfs_dlm_msg_hdr;
+
+typedef ocfs_dlm_msg_hdr ocfs_dlm_req_master;
+
+typedef struct _ocfs_dlm_reply_master
+{
+ ocfs_dlm_msg_hdr h;
+ __u32 status;
+}
+ocfs_dlm_reply_master;
+
+typedef struct _ocfs_dlm_msg
+{
+ __u32 magic;
+ __u32 msg_len;
+ __u8 vol_id[MAX_VOL_ID_LENGTH];
+ __s16 src_node;
+ __s16 dst_node;
+ __u32 msg_type;
+ __u32 check_sum;
+ __u8 msg_buf[0];
+} ocfs_dlm_msg;
+
+typedef struct _ocfs_vote_obj
+{
+ struct list_head list;
+ wait_queue_head_t voted_event;
+ atomic_t voted_event_woken;
+ atomic_t refcount;
+ spinlock_t lock;
+ __u32 vote_state;
+ __u32 req_lock_type;
+ int vote_status;
+ ocfs_node_map req_vote_map;
+ ocfs_node_map got_vote_map;
+ //ocfs_node_map tmp_openmap;
+ __u64 seq_num;
+ pid_t pid;
+ ocfs_dlm_msg m;
+} ocfs_vote_obj;
+
+enum {
+ VOTE_OBJ_STATE_UNSENT,
+ VOTE_OBJ_STATE_SENT,
+ VOTE_OBJ_STATE_PARTIAL_REPLY,
+ VOTE_OBJ_STATE_FULL_REPLY,
+ VOTE_OBJ_STATE_DESTROYING
+};
+
+
+
+typedef struct _ocfs_vote_obj_lookup_data ocfs_vote_obj_lookup_data;
+
+struct _ocfs_vote_obj_lookup_data
+{
+ union {
+ struct {
+ __u64 seq_num;
+ __u64 lock_id;
+ } s;
+ struct {
+ char *page;
+ int *len;
+ int max;
+ } proc;
+ } u;
+ int (*func) (ocfs_vote_obj *obj, struct _ocfs_vote_obj_lookup_data *data);
+ ocfs_vote_obj **ret;
+};
+
+
+
+
+typedef struct _ocfs_recv_ctxt
+{
+ __s32 msg_len;
+ __u8 msg[OCFS_MAX_DLM_PKT_SIZE];
+ int status;
+ struct work_struct ipc_wq;
+}
+ocfs_recv_ctxt;
+
+typedef struct _ocfs_cfg_task
+{
+ struct work_struct cfg_wq;
+ ocfs_super *osb;
+ __u64 lock_off;
+ __u8 *buffer;
+ struct buffer_head *bh;
+}
+ocfs_cfg_task;
+
+typedef enum _ocfs_volcfg_op
+{
+ OCFS_VOLCFG_ADD,
+ OCFS_VOLCFG_UPD
+}
+ocfs_volcfg_op;
+
+typedef struct _ocfs_vote_request_ctxt
+{
+ __s16 node_num;
+ int status;
+ ocfs_dlm_msg *dlm_msg;
+} ocfs_vote_request_ctxt;
+
+typedef struct _ocfs_vote_reply_ctxt
+{
+ int reply_method;
+ int *status;
+ ocfs_node_map *got_vote_map;
+ __u32 flags;
+ ocfs_dlm_reply_master *reply;
+} ocfs_vote_reply_ctxt;
+
struct ocfs_ioc
{
char name[255]; /* "OCFS" */
char version[255]; /* version */
- u16 nodenum; /* node number */
+ __u16 nodenum; /* node number */
char nodename[255]; /* node name */
};
+/* timeout structure taken from Ben's aio.c */
+typedef struct _ocfs_timeout {
+ struct timer_list timer;
+ int timed_out;
+ wait_queue_head_t wait;
+} ocfs_timeout;
+
#define NAMEI_RA_CHUNKS 2
#define NAMEI_RA_BLOCKS 4
#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
+#define __ocfs_wait(wq, condition, timeo, ret) \
+do { \
+ ocfs_timeout __to; \
+ \
+ DECLARE_WAITQUEUE(__wait, current); \
+ DECLARE_WAITQUEUE(__to_wait, current); \
+ \
+ ocfs_init_timeout(&__to); \
+ \
+ if (timeo) { \
+ ocfs_set_timeout(&__to, timeo); \
+ if (__to.timed_out) { \
+ ocfs_clear_timeout(&__to); \
+ } \
+ } \
+ \
+ add_wait_queue(&wq, &__wait); \
+ add_wait_queue(&__to.wait, &__to_wait); \
+ do { \
+ ret = 0; \
+ set_current_state(TASK_INTERRUPTIBLE); \
+ if (condition) \
+ break; \
+ ret = -ETIMEDOUT; \
+ if (__to.timed_out) \
+ break; \
+ schedule(); \
+ if (signal_pending(current)) { \
+ ret = -EINTR; \
+ break; \
+ } \
+ } while (1); \
+ \
+ set_current_state(TASK_RUNNING); \
+ remove_wait_queue(&wq, &__wait); \
+ remove_wait_queue(&__to.wait, &__to_wait); \
+ \
+ if (timeo) \
+ ocfs_clear_timeout(&__to); \
+ \
+} while(0)
+
+#define __ocfs_wait_uninterruptible(wq, condition, timeo, ret) \
+do { \
+ ocfs_timeout __to; \
+ \
+ DECLARE_WAITQUEUE(__wait, current); \
+ DECLARE_WAITQUEUE(__to_wait, current); \
+ \
+ ocfs_init_timeout(&__to); \
+ \
+ if (timeo) { \
+ ocfs_set_timeout(&__to, timeo); \
+ if (__to.timed_out) { \
+ ocfs_clear_timeout(&__to); \
+ } \
+ } \
+ \
+ add_wait_queue(&wq, &__wait); \
+ add_wait_queue(&__to.wait, &__to_wait); \
+ do { \
+ ret = 0; \
+ set_current_state(TASK_UNINTERRUPTIBLE); \
+ if (condition) \
+ break; \
+ ret = -ETIMEDOUT; \
+ if (__to.timed_out) \
+ break; \
+ schedule(); \
+ } while (1); \
+ \
+ set_current_state(TASK_RUNNING); \
+ remove_wait_queue(&wq, &__wait); \
+ remove_wait_queue(&__to.wait, &__to_wait); \
+ \
+ if (timeo) \
+ ocfs_clear_timeout(&__to); \
+ \
+} while(0)
+
+#define ocfs_wait(wq, condition, timeout) \
+({ \
+ int __ret = 0; \
+ if (!(condition)) \
+ __ocfs_wait(wq, condition, timeout, __ret); \
+ __ret; \
+})
+
+#define ocfs_wait_uninterruptible(wq, condition, timeout) \
+({ \
+ int __ret = 0; \
+ if (!(condition)) \
+ __ocfs_wait_uninterruptible(wq, condition, timeout, __ret); \
+ __ret; \
+})
+
static inline unsigned long ino_from_blkno(struct super_block *sb,
u64 blkno)
{
return (unsigned long)(blkno & (u64)ULONG_MAX);
}
+static inline int ocfs_inc_icount(struct inode *inode)
+{
+ if (atomic_read(&inode->i_count) >= 1 &&
+ !(inode->i_state & I_CLEAR)) {
+ atomic_inc(&inode->i_count);
+ return 0;
+ }
+
+ LOG_ERROR_ARGS("eek! i_count=%d, i_state=%lu, i_ino=%lu, no dentry\n",
+ atomic_read(&inode->i_count), inode->i_state,
+ inode->i_ino);
+
+ LOG_ERROR_ARGS("inode=%llu, deleted=%u\n",
+ OCFS_I(inode)->ip_blkno,
+ INODE_DELETED(inode));
+
+ return -EINVAL;
+}
+
+static inline int ocfs_is_local_cache_lock(ocfs_super *osb, struct inode *inode)
+{
+ ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
+ if (lockres->lock_type == OCFS_LKM_EXMODE &&
+ lockres->master_node_num == osb->node_num)
+ return 1;
+ return 0;
+}
+
typedef struct _ocfs_journal_handle ocfs_journal_handle;
#endif /* !OCFS_H */
Modified: branches/endian/src/ocfs1_fs_compat.h
===================================================================
--- branches/endian/src/ocfs1_fs_compat.h 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/ocfs1_fs_compat.h 2005-01-18 21:41:46 UTC (rev 1789)
@@ -33,7 +33,6 @@
#define MAX_VOL_ID_LENGTH_V1 16
#define MAX_VOL_LABEL_LEN_V1 64
#define MAX_CLUSTER_NAME_LEN_V1 64
-#define MAX_NODE_NAME_LENGTH 32
#define OCFS1_MAJOR_VERSION (2)
#define OCFS1_MINOR_VERSION (0)
Modified: branches/endian/src/ocfs2.h
===================================================================
--- branches/endian/src/ocfs2.h 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/ocfs2.h 2005-01-18 21:41:46 UTC (rev 1789)
@@ -26,8 +26,6 @@
#ifndef OCFS2_H
#define OCFS2_H
-#define OCFS2_MAX_NODE_NAME_LENGTH 65
-
static inline u64 ocfs2_clusters_to_blocks(struct super_block *sb,
u32 clusters)
{
Modified: branches/endian/src/ocfs2_fs.h
===================================================================
--- branches/endian/src/ocfs2_fs.h 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/ocfs2_fs.h 2005-01-18 21:41:46 UTC (rev 1789)
@@ -93,19 +93,9 @@
#define OCFS2_LOCAL_ALLOC_FL (0x00000040) /* Node local alloc bitmap */
#define OCFS2_BITMAP_FL (0x00000080) /* Allocation bitmap */
#define OCFS2_JOURNAL_FL (0x00000100) /* Node journal */
-#define OCFS2_HEARTBEAT_FL (0x00000200) /* Heartbeat area */
+#define OCFS2_DLM_FL (0x00000200) /* DLM area */
#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */
-/*
- * Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
- */
-#define OCFS2_JOURNAL_DIRTY_FL (0x00000001) /* Journal needs recovery */
-
-/*
- * superblock s_state flags
- */
-#define OCFS2_ERROR_FS (0x00000001) /* FS saw errors */
-
/* Limit of space in ocfs2_dir_entry */
#define OCFS2_MAX_FILENAME_LENGTH 255
@@ -121,19 +111,12 @@
#define OCFS2_DEFAULT_JOURNAL_SIZE (8 * ONE_MEGA_BYTE)
#define OCFS2_MIN_JOURNAL_SIZE (4 * ONE_MEGA_BYTE)
-struct ocfs2_system_inode_info {
- char *si_name;
- int si_flags;
- int si_mode;
-};
-
/* System file index */
enum {
BAD_BLOCK_SYSTEM_INODE = 0,
GLOBAL_INODE_ALLOC_SYSTEM_INODE,
- SLOT_MAP_SYSTEM_INODE,
-#define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE
- HEARTBEAT_SYSTEM_INODE,
+ DLM_SYSTEM_INODE,
+#define OCFS2_FIRST_ONLINE_SYSTEM_INODE DLM_SYSTEM_INODE
GLOBAL_BITMAP_SYSTEM_INODE,
ORPHAN_DIR_SYSTEM_INODE,
#define OCFS2_LAST_GLOBAL_SYSTEM_INODE ORPHAN_DIR_SYSTEM_INODE
@@ -144,23 +127,22 @@
NUM_SYSTEM_INODES
};
-static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
+static char *ocfs2_system_inode_names[NUM_SYSTEM_INODES] = {
/* Global system inodes (single copy) */
/* The first two are only used from userspace mfks/tunefs */
- [BAD_BLOCK_SYSTEM_INODE] { "bad_blocks", 0, S_IFREG | 0644 },
- [GLOBAL_INODE_ALLOC_SYSTEM_INODE] { "global_inode_alloc", OCFS2_BITMAP_FL | OCFS2_CHAIN_FL, S_IFREG | 0644 },
+ [BAD_BLOCK_SYSTEM_INODE] "bad_blocks",
+ [GLOBAL_INODE_ALLOC_SYSTEM_INODE] "global_inode_alloc",
/* These are used by the running filesystem */
- [SLOT_MAP_SYSTEM_INODE] { "slot_map", 0, S_IFREG | 0644 },
- [HEARTBEAT_SYSTEM_INODE] { "heartbeat", OCFS2_HEARTBEAT_FL, S_IFREG | 0644 },
- [GLOBAL_BITMAP_SYSTEM_INODE] { "global_bitmap", 0, S_IFREG | 0644 },
- [ORPHAN_DIR_SYSTEM_INODE] { "orphan_dir", 0, S_IFDIR | 0755 },
+ [DLM_SYSTEM_INODE] "dlm",
+ [GLOBAL_BITMAP_SYSTEM_INODE] "global_bitmap",
+ [ORPHAN_DIR_SYSTEM_INODE] "orphan_dir",
/* Node-specific system inodes (one copy per node) */
- [EXTENT_ALLOC_SYSTEM_INODE] { "extent_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_CHAIN_FL, S_IFREG | 0644 },
- [INODE_ALLOC_SYSTEM_INODE] { "inode_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_CHAIN_FL, S_IFREG | 0644 },
- [JOURNAL_SYSTEM_INODE] { "journal:%04d", OCFS2_JOURNAL_FL, S_IFREG | 0644 },
- [LOCAL_ALLOC_SYSTEM_INODE] { "local_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_LOCAL_ALLOC_FL, S_IFREG | 0644 }
+ [EXTENT_ALLOC_SYSTEM_INODE] "extent_alloc:%04d",
+ [INODE_ALLOC_SYSTEM_INODE] "inode_alloc:%04d",
+ [JOURNAL_SYSTEM_INODE] "journal:%04d",
+ [LOCAL_ALLOC_SYSTEM_INODE] "local_alloc:%04d"
};
@@ -209,24 +191,38 @@
* Convenience casts
*/
#define OCFS2_RAW_SB(dinode) (&((dinode)->id2.i_super))
+#define DISK_LOCK(dinode) (&((dinode)->i_disk_lock))
#define LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab))
+/* TODO: change these? */
+#define OCFS2_NODE_CONFIG_HDR_SIGN "NODECFG"
+#define OCFS2_NODE_CONFIG_SIGN_LEN 8
+#define OCFS2_NODE_CONFIG_VER 2
+#define OCFS2_NODE_MIN_SUPPORTED_VER 2
+#define MAX_NODE_NAME_LENGTH 32
+
+#define OCFS2_GUID_HOSTID_LEN 20
+#define OCFS2_GUID_MACID_LEN 12
+#define OCFS2_GUID_LEN (OCFS2_GUID_HOSTID_LEN + OCFS2_GUID_MACID_LEN)
+
+
+
/*
* On disk extent record for OCFS2
* It describes a range of clusters on disk.
*/
typedef struct _ocfs2_extent_rec {
-/*00*/ __u32 e_cpos; /* Offset into the file, in clusters */
- __u32 e_clusters; /* Clusters covered by this extent */
- __u64 e_blkno; /* Physical disk offset, in blocks */
+/*00*/ __u32 bryce_e_cpos; /* Offset into the file, in clusters */
+ __u32 bryce_e_clusters; /* Clusters covered by this extent */
+ __u64 bryce_e_blkno; /* Physical disk offset, in blocks */
/*10*/
} ocfs2_extent_rec;
typedef struct _ocfs2_chain_rec {
- __u32 c_free; /* Number of free bits in this chain. */
- __u32 c_total; /* Number of total bits in this chain */
- __u64 c_blkno; /* Physical disk offset (blocks) of 1st group */
+ __u32 bryce_c_free; /* Number of free bits in this chain. */
+ __u32 bryce_c_total; /* Number of total bits in this chain */
+ __u64 bryce_c_blkno; /* Physical disk offset (blocks) of 1st group */
} ocfs2_chain_rec;
/*
@@ -236,15 +232,15 @@
* ocfs2_extent_block.h_list, respectively.
*/
typedef struct _ocfs2_extent_list {
-/*00*/ __u16 l_tree_depth; /* Extent tree depth from this
+/*00*/ __u16 bryce_l_tree_depth; /* Extent tree depth from this
point. 0 means data extents
hang directly off this
header (a leaf) */
- __u16 l_count; /* Number of extent records */
- __u16 l_next_free_rec; /* Next unused extent slot */
- __u16 l_reserved1;
- __u64 l_reserved2; /* Pad to
- sizeof(ocfs2_extent_rec) */
+ __u16 bryce_l_count; /* Number of extent records */
+ __u16 bryce_l_next_free_rec; /* Next unused extent slot */
+ __u16 bryce_l_reserved1; /* unused?? */
+ __u64 bryce_l_reserved2; /* unused?? */
+ /* Pad to sizeof(ocfs2_extent_rec) */
/*10*/ ocfs2_extent_rec l_recs[0]; /* Extent records */
} ocfs2_extent_list;
@@ -254,11 +250,11 @@
* ocfs2_dinode.id2.i_chain.
*/
typedef struct _ocfs2_chain_list {
-/*00*/ __u16 cl_cpg; /* Clusters per Block Group */
- __u16 cl_bpc; /* Bits per cluster */
- __u16 cl_count; /* Total chains in this list */
- __u16 cl_next_free_rec; /* Next unused chain slot */
- __u64 cl_reserved1;
+/*00*/ __u16 bryce_cl_cpg; /* Clusters per Block Group */
+ __u16 bryce_cl_bpc; /* Bits per cluster */
+ __u16 bryce_cl_count; /* Total chains in this list */
+ __u16 bryce_cl_next_free_rec; /* Next unused chain slot */
+ __u64 bryce_cl_reserved1; /* unused?? */
/*10*/ ocfs2_chain_rec cl_recs[0]; /* Chain records */
} ocfs2_chain_list;
@@ -268,15 +264,15 @@
typedef struct _ocfs2_extent_block
{
/*00*/ __u8 h_signature[8]; /* Signature for verification */
- __u64 h_reserved1;
-/*10*/ __s16 h_suballoc_node; /* Node suballocator this
+ __u64 bryce_h_reserved1; /* unused?? */
+/*10*/ __s16 bryce_h_suballoc_node; /* Node suballocator this
extent_header belongs to */
- __u16 h_suballoc_bit; /* Bit offset in suballocater
+ __u16 bryce_h_suballoc_bit; /* Bit offset in suballocater
block group */
- __u32 h_fs_generation; /* Must match super block */
- __u64 h_blkno; /* Offset on disk, in blocks */
-/*20*/ __u64 h_reserved3;
- __u64 h_next_leaf_blk; /* Offset on disk, in blocks,
+ __u32 bryce_h_reserved2; /* unused ?? */
+ __u64 bryce_h_blkno; /* Offset on disk, in blocks */
+/*20*/ __u64 bryce_h_reserved3; /* unused?? */
+ __u64 bryce_h_next_leaf_blk; /* Offset on disk, in blocks,
of next leaf header pointing
to data */
/*30*/ ocfs2_extent_list h_list; /* Extent record list */
@@ -284,34 +280,45 @@
} ocfs2_extent_block;
/*
+ * On disk lock structure for OCFS2
+ */
+typedef struct _ocfs2_disk_lock
+{
+/*00*/ __s16 bryce_dl_master; /* Node number of current master */
+ __u8 dl_level; /* Lock level */
+ __u8 dl_reserved1;
+/*04*/
+} ocfs2_disk_lock;
+
+/*
* On disk superblock for OCFS2
* Note that it is contained inside an ocfs2_dinode, so all offsets
* are relative to the start of ocfs2_dinode.id2.
*/
typedef struct _ocfs2_super_block {
-/*00*/ __u16 s_major_rev_level;
- __u16 s_minor_rev_level;
- __u16 s_mnt_count;
- __s16 s_max_mnt_count;
- __u16 s_state; /* File system state */
- __u16 s_errors; /* Behaviour when detecting errors */
- __u32 s_checkinterval; /* Max time between checks */
-/*10*/ __u64 s_lastcheck; /* Time of last check */
- __u32 s_creator_os; /* OS */
- __u32 s_feature_compat; /* Compatible feature set */
-/*20*/ __u32 s_feature_incompat; /* Incompatible feature set */
- __u32 s_feature_ro_compat; /* Readonly-compatible feature set */
- __u64 s_root_blkno; /* Offset, in blocks, of root directory
+/*00*/ __u16 bryce_s_major_rev_level;
+ __u16 bryce_s_minor_rev_level;
+ __u16 bryce_s_mnt_count; /* unused?? */
+ __s16 bryce_s_max_mnt_count; /* unused?? */
+ __u16 bryce_s_state; /* unused?? */ /* File system state */
+ __u16 bryce_s_errors; /* unused?? */ /* Behaviour when detecting errors */
+ __u32 bryce_s_checkinterval; /* unused?? */ /* Max time between checks */
+/*10*/ __u64 bryce_s_lastcheck; /* unused?? */ /* Time of last check */
+ __u32 bryce_s_creator_os; /* unused?? */ /* OS */
+ __u32 bryce_s_feature_compat; /* Compatible feature set */
+/*20*/ __u32 bryce_s_feature_incompat; /* Incompatible feature set */
+ __u32 bryce_s_feature_ro_compat; /* Readonly-compatible feature set */
+ __u64 bryce_s_root_blkno; /* Offset, in blocks, of root directory
dinode */
-/*30*/ __u64 s_system_dir_blkno; /* Offset, in blocks, of system
+/*30*/ __u64 bryce_s_system_dir_blkno; /* Offset, in blocks, of system
directory dinode */
- __u32 s_blocksize_bits; /* Blocksize for this fs */
- __u32 s_clustersize_bits; /* Clustersize for this fs */
-/*40*/ __u16 s_max_nodes; /* Max nodes in this cluster before
+ __u32 bryce_s_blocksize_bits; /* Blocksize for this fs */
+ __u32 bryce_s_clustersize_bits; /* Clustersize for this fs */
+/*40*/ __u16 bryce_s_max_nodes; /* Max nodes in this cluster before
tunefs required */
- __u16 s_reserved1;
- __u32 s_reserved2;
- __u64 s_first_cluster_group; /* Block offset of 1st cluster
+ __u16 bryce_s_reserved1; /* unused?? */
+ __u32 bryce_s_reserved2; /* unused?? */
+ __u64 bryce_s_first_cluster_group; /* Block offset of 1st cluster
* group header */
/*50*/ __u8 s_label[64]; /* Label for mounting, etc. */
/*90*/ __u8 s_uuid[16]; /* Was vol_id */
@@ -325,10 +332,10 @@
*/
typedef struct _ocfs2_local_alloc
{
-/*00*/ __u32 la_bm_off; /* Starting bit offset in main bitmap */
- __u16 la_size; /* Size of included bitmap, in bytes */
- __u16 la_reserved1;
- __u64 la_reserved2;
+/*00*/ __u32 bryce_la_bm_off; /* Starting bit offset in main bitmap */
+ __u16 bryce_la_size; /* Size of included bitmap, in bytes */
+ __u16 bryce_la_reserved1; /* unused?? */
+ __u64 bryce_la_reserved2; /* unused?? */
/*10*/ __u8 la_bitmap[0];
} ocfs2_local_alloc;
@@ -337,53 +344,45 @@
*/
typedef struct _ocfs2_dinode {
/*00*/ __u8 i_signature[8]; /* Signature for validation */
- __u32 i_generation; /* Generation number */
- __s16 i_suballoc_node; /* Node suballocater this inode
+ __u32 bryce_i_generation; /* Generation number */
+ __s16 bryce_i_suballoc_node; /* Node suballocater this inode
belongs to */
- __u16 i_suballoc_bit; /* Bit offset in suballocater
+ __u16 bryce_i_suballoc_bit; /* Bit offset in suballocater
block group */
-/*10*/ __u32 i_reserved0;
- __u32 i_clusters; /* Cluster count */
- __u32 i_uid; /* Owner UID */
- __u32 i_gid; /* Owning GID */
-/*20*/ __u64 i_size; /* Size in bytes */
- __u16 i_mode; /* File mode */
- __u16 i_links_count; /* Links count */
- __u32 i_flags; /* File flags */
-/*30*/ __u64 i_atime; /* Access time */
- __u64 i_ctime; /* Creation time */
-/*40*/ __u64 i_mtime; /* Modification time */
- __u64 i_dtime; /* Deletion time */
-/*50*/ __u64 i_blkno; /* Offset on disk, in blocks */
- __u64 i_last_eb_blk; /* Pointer to last extent
+/*10*/ ocfs2_disk_lock i_disk_lock; /* Lock structure */
+/*14*/ __u32 bryce_i_clusters; /* Cluster count */
+/*18*/ __u32 bryce_i_uid; /* Owner UID */
+ __u32 bryce_i_gid; /* Owning GID */
+/*20*/ __u64 bryce_i_size; /* Size in bytes */
+ __u16 bryce_i_mode; /* File mode */
+ __u16 bryce_i_links_count; /* Links count */
+ __u32 bryce_i_flags; /* File flags */
+/*30*/ __u64 bryce_i_atime; /* Access time */
+ __u64 bryce_i_ctime; /* Creation time */
+/*40*/ __u64 bryce_i_mtime; /* Modification time */
+ __u64 bryce_i_dtime; /* Deletion time */
+/*50*/ __u64 bryce_i_blkno; /* Offset on disk, in blocks */
+ __u64 bryce_i_last_eb_blk; /* Pointer to last extent
block */
-/*60*/ __u32 i_fs_generation; /* Generation per fs-instance */
- __u32 i_reserved1; /* Generation per fs-instance */
-/*68*/ __u64 i_reserved2[10];
+/*60*/ __u64 bryce_i_reserved1[11]; /* unused?? */
/*B8*/ union {
- __u64 i_pad1; /* Generic way to refer to this
+ __u64 bryce_i_pad1; /* unused?? */ /* Generic way to refer to this
64bit union */
struct {
- __u64 i_rdev; /* Device number */
+ __u64 bryce_i_rdev; /* Device number */
} dev1;
struct { /* Info for bitmap system
inodes */
- __u32 i_used; /* Bits (ie, clusters) used */
- __u32 i_total; /* Total bits (clusters)
+ __u32 bryce_i_used; /* Bits (ie, clusters) used */
+ __u32 bryce_i_total; /* Total bits (clusters)
available */
} bitmap1;
- struct { /* Info for journal system
- inodes */
- __u32 ij_flags; /* Mounted, version, etc. */
- __u32 ij_pad;
- } journal1;
} id1; /* Inode type dependant 1 */
/*C0*/ union {
ocfs2_super_block i_super;
ocfs2_local_alloc i_lab;
ocfs2_chain_list i_chain;
ocfs2_extent_list i_list;
- __u8 i_symlink[0];
} id2;
/* Actual on-disk size is one block */
} ocfs2_dinode;
@@ -392,8 +391,8 @@
* On-disk directory entry structure for OCFS2
*/
struct ocfs2_dir_entry {
-/*00*/ __u64 inode; /* Inode number */
- __u16 rec_len; /* Directory entry length */
+/*00*/ __u64 bryce_inode; /* Inode number */
+ __u16 bryce_rec_len; /* Directory entry length */
__u8 name_len; /* Name length */
__u8 file_type;
/*0C*/ char name[OCFS2_MAX_FILENAME_LENGTH]; /* File name */
@@ -406,30 +405,24 @@
typedef struct _ocfs2_group_desc
{
/*00*/ __u8 bg_signature[8]; /* Signature for validation */
- __u16 bg_size; /* Size of included bitmap in
+ __u16 bryce_bg_size; /* Size of included bitmap in
bytes. */
- __u16 bg_bits; /* Bits represented by this
+ __u16 bryce_bg_bits; /* Bits represented by this
group. */
- __u16 bg_free_bits_count; /* Free bits count */
- __u16 bg_chain; /* What chain I am in. */
-/*10*/ __u32 bg_generation;
- __u32 bg_reserved1;
- __u64 bg_next_group; /* Next group in my list, in
+ __u16 bryce_bg_free_bits_count; /* Free bits count */
+ __u16 bryce_bg_chain; /* What chain I am in. */
+/*10*/ __u32 bryce_bg_generation;
+ __u32 bryce_bg_reserved1; /* unused?? */
+ __u64 bryce_bg_next_group; /* Next group in my list, in
blocks */
-/*20*/ __u64 bg_parent_dinode; /* dinode which owns me, in
+/*20*/ __u64 bryce_bg_parent_dinode; /* dinode which owns me, in
blocks */
- __u64 bg_blkno; /* Offset on disk, in blocks */
-/*30*/ __u64 bg_reserved2[2];
+ __u64 bryce_bg_blkno; /* Offset on disk, in blocks */
+/*30*/ __u64 bryce_bg_reserved2[2]; /* unused?? */
/*40*/ __u8 bg_bitmap[0];
} ocfs2_group_desc;
#ifdef __KERNEL__
-static inline int ocfs2_fast_symlink_chars(struct super_block *sb)
-{
- return sb->s_blocksize -
- offsetof(struct _ocfs2_dinode, id2.i_symlink);
-}
-
static inline int ocfs2_extent_recs_per_inode(struct super_block *sb)
{
int size;
@@ -480,11 +473,6 @@
return size;
}
#else
-static inline int ocfs2_fast_symlink_chars(int blocksize)
-{
- return blocksize - offsetof(struct _ocfs2_dinode, id2.i_symlink);
-}
-
static inline int ocfs2_extent_recs_per_inode(int blocksize)
{
int size;
@@ -555,11 +543,10 @@
*/
if (type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE)
chars = snprintf(buf, len,
- ocfs2_system_inodes[type].si_name);
+ ocfs2_system_inode_names[type]);
else
chars = snprintf(buf, len,
- ocfs2_system_inodes[type].si_name,
- node);
+ ocfs2_system_inode_names[type], node);
return chars;
}
Modified: branches/endian/src/ocfs_compat.h
===================================================================
--- branches/endian/src/ocfs_compat.h 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/ocfs_compat.h 2005-01-18 21:41:46 UTC (rev 1789)
@@ -62,9 +62,6 @@
#define rb_root rb_root_s
#define rb_node rb_node_s
-struct rb_node *rb_first(struct rb_root *root);
-struct rb_node *rb_next(struct rb_node *node);
-struct rb_node *rb_prev(struct rb_node *node);
typedef long sector_t;
@@ -119,7 +116,7 @@
ret = 1;
spin_unlock_irqrestore(¤t->sighand->siglock, flags);
}
- return ret;
+ return(ret);
}
#else
static inline int ocfs_task_interruptible(void)
@@ -134,7 +131,7 @@
ret = 1;
spin_unlock_irqrestore(&(current->sigmask_lock), flags);
}
- return ret;
+ return(ret);
}
static inline void dequeue_signal_lock(struct task_struct *task,
sigset_t *blocked, siginfo_t *info)
@@ -158,16 +155,6 @@
#define kstatfs statfs
-static inline loff_t i_size_read(struct inode *inode)
-{
- return inode->i_size;
-}
-
-static inline void i_size_write(struct inode *inode, loff_t i_size)
-{
- inode->i_size = i_size;
-}
-
#else /* LINUX_VERSION_CODE < 2.6 */
#include <linux/sched.h>
#include <linux/signal.h>
@@ -188,7 +175,7 @@
ret = 1;
spin_unlock_irqrestore(¤t->sighand->siglock, flags);
}
- return ret;
+ return(ret);
}
#endif /* LINUX_VERSION_CODE < 2.6 */
@@ -203,8 +190,5 @@
(type *)( (char *)__mptr - offsetof(type,member) );})
#endif
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)
-struct rb_node *rb_last(struct rb_root *root);
-#endif
-
#endif /* OCFS_COMPAT_H */
+
Modified: branches/endian/src/ocfs_journal.h
===================================================================
--- branches/endian/src/ocfs_journal.h 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/ocfs_journal.h 2005-01-18 21:41:46 UTC (rev 1789)
@@ -30,7 +30,7 @@
#include <linux/jbd.h>
#define OCFS_JOURNAL_CURRENT_VERSION 1
-#define OCFS_CHECKPOINT_INTERVAL (8 * HZ)
+#define OCFS_CHECKPOINT_INTERVAL 8000
enum ocfs_journal_state {
OCFS_JOURNAL_FREE = 0,
@@ -49,7 +49,7 @@
journal_t *k_journal; /* The kernels journal type */
struct inode *k_inode; /* Kernel inode pointing to
* this journal */
- u8 version; /* Journal version */
+ __u8 version; /* Journal version */
struct _ocfs_super *osb; /* pointer to the super
* block for the node
* we're currently
@@ -59,6 +59,7 @@
* which we usually run
* from (recovery,
* etc) */
+ __u32 node_num; /* Whose journal are we? */
struct buffer_head *lockbh; /* Journal disk lock, used
to access file entry */
atomic_t num_trans; /* Number of transactions
@@ -66,52 +67,58 @@
unsigned long trans_id;
/* locking order: trans_lock -> cmt_lock */
spinlock_t cmt_lock; /* protects the committed list */
+ atomic_t num_cmt_locks; /* number of delayed
+ * locks */
+ atomic_t num_chkpt_locks;
struct rw_semaphore trans_barrier;
- wait_queue_head_t j_checkpointed;
+
+ struct list_head committing_inodes; /* list of all
+ * inodes that
+ * have committed
+ * and are
+ * awaiting a
+ * checkpoint. Protected
+ * by cmt_lock. */
+ struct list_head checkpointing_locks; /* locks
+ * pending release
+ * after a checkpoint
+ * -- this variable
+ * is unlocked as
+ * commit_thread is
+ * the only guy who
+ * looks at it! */
};
extern spinlock_t trans_inc_lock;
/* wrap trans_id so we never have it equal to zero. */
-static inline unsigned long ocfs_inc_trans_id(ocfs_journal *j)
+static inline void ocfs_inc_trans_id(ocfs_journal *j)
{
- unsigned long old_id;
spin_lock(&trans_inc_lock);
- old_id = j->trans_id++;
+ j->trans_id++;
if (!j->trans_id)
j->trans_id = 1;
spin_unlock(&trans_inc_lock);
- return old_id;
}
-static inline void ocfs_set_inode_lock_trans(ocfs_journal *journal,
- struct inode *inode)
+static inline int ocfs_trans_checkpointed(ocfs_journal *j,
+ unsigned long trans_id)
{
- spin_lock(&trans_inc_lock);
- OCFS_I(inode)->ip_last_trans = journal->trans_id;
- spin_unlock(&trans_inc_lock);
-}
-
-/* Used to figure out whether it's safe to drop a metadata lock on an
- * inode. Returns true if all the inodes changes have been
- * checkpointed to disk. You should be holding the spinlock on the
- * metadata lock while calling this to be sure that nobody can take
- * the lock and put it on another transaction. */
-static inline int ocfs_inode_fully_checkpointed(struct inode *inode)
-{
int ret;
- ocfs_journal *journal = OCFS2_SB(inode->i_sb)->journal;
-
spin_lock(&trans_inc_lock);
- ret = time_after(journal->trans_id, OCFS_I(inode)->ip_last_trans);
+ ret = time_after(trans_id, j->trans_id);
spin_unlock(&trans_inc_lock);
return ret;
}
-/* convenience function to check if an inode is still new (has never
- * hit disk) Will do you a favor and set created_trans = 0 when you've
- * been checkpointed. returns '1' if the inode is still new. */
-static inline int ocfs_inode_is_new(struct inode *inode)
+/* convenience function to check if an inode has been checkpointed
+ * yet. Replaces ocfs_journal_new_file_search. Will do you a favor and
+ * set created_trans = 0 when you've been checkpointed.
+ * returns '1' if the inode hasn't been checkpointed yet.
+ *
+ */
+static inline int ocfs_inode_is_new(ocfs_super *osb,
+ struct inode *inode)
{
int ret;
@@ -119,10 +126,10 @@
* mkfs. This helps us early during mount, before we have the
* journal open and trans_id could be junk. */
if (OCFS_I(inode)->ip_flags & OCFS_INODE_SYSTEM_FILE)
- return 0;
+ return(0);
spin_lock(&trans_inc_lock);
- ret = !(time_after(OCFS2_SB(inode->i_sb)->journal->trans_id,
- OCFS_I(inode)->ip_created_trans));
+ ret = !(time_after(osb->journal->trans_id,
+ OCFS_I(inode)->ip_created_trans));
if (!ret)
OCFS_I(inode)->ip_created_trans = 0;
spin_unlock(&trans_inc_lock);
@@ -139,15 +146,29 @@
typedef struct _ocfs_journal_lock ocfs_journal_lock;
struct _ocfs_journal_lock {
- struct inode *jl_inode;
- struct list_head jl_lock_list;
+ /* release_lock arguments. */
+ __u32 type;
+ __u32 flags;
+ struct inode *inode;
+ unsigned int num_ident;
+
+ /* used by commit_cache */
+ unsigned int drop_holders;
+ /* lock_list: we are either on
+ * - handle->locks: if still running
+ * - inode->ip_pending_locks: if waiting for checkpoint
+ * - journal->checkpointing_locks: awaiting release after checkpoint
+ */
+ struct list_head lock_list;
};
struct _ocfs_journal_handle {
handle_t *k_handle; /* kernel handle. */
ocfs_journal *journal;
struct _ocfs_super *osb; /* what super block we belong to */
- u32 flags; /* see flags below. */
+ __u32 flags; /* see flags below. */
+ struct list_head h_list; /* points to whatever list we're
+ * on. */
int max_buffs; /* Buffs reserved by this handle */
/* The following two fields are for ocfs_handle_add_lock */
@@ -177,7 +198,7 @@
/*
* Journal Control:
- * Initialize, Load, Shutdown, Wipe a journal.
+ * Initialize, Load, Shutdown, Wipe, Create a journal.
*
* ocfs_journal_init - Initialize journal structures in the OSB.
* ocfs_journal_load - Load the given journal off disk. Replay it if
@@ -187,39 +208,13 @@
* ocfs_journal_wipe - Wipe transactions from a journal. Optionally
* zero out each block.
* ocfs_recovery_thread - Perform recovery on a node. osb is our own osb.
- * ocfs2_mark_dead_nodes - Start recovery on nodes we won't get a heartbeat
- * event on.
- * ocfs_start_checkpoint - Kick the commit thread to do a checkpoint.
*/
-int ocfs_journal_init(struct _ocfs_super *osb, int *dirty);
+int ocfs_journal_init(struct _ocfs_super *osb);
void ocfs_journal_shutdown(struct _ocfs_super *osb);
int ocfs_journal_wipe(ocfs_journal *journal, int full);
int ocfs_journal_load(ocfs_journal *journal);
void ocfs_recovery_thread(struct _ocfs_super *osb, int node_num);
-int ocfs2_mark_dead_nodes(ocfs_super *osb);
-static inline void ocfs_start_checkpoint(struct _ocfs_super *osb)
-{
- atomic_set(&osb->needs_checkpoint, 1);
- wake_up(&osb->checkpoint_event);
-}
-static inline void ocfs2_checkpoint_inode(struct inode *inode)
-{
- ocfs_super *osb = OCFS_SB(inode->i_sb);
- if (!ocfs_inode_fully_checkpointed(inode)) {
- /* WARNING: This only kicks off a single
- * checkpoint. If someone races you and adds more
- * metadata to the journal, you won't know, and will
- * wind up waiting *alot* longer than necessary. Right
- * now we only use this in clear_inode so that's
- * OK. */
- ocfs_start_checkpoint(osb);
-
- wait_event(osb->journal->j_checkpointed,
- ocfs_inode_fully_checkpointed(inode));
- }
-}
-
/*
* Transaction Handling:
* Manage the lifetime of a transaction handle.
@@ -297,8 +292,10 @@
*/
int ocfs_journal_dirty(ocfs_journal_handle *handle,
struct buffer_head *bh);
-int ocfs_handle_add_lock(ocfs_journal_handle *handle,
- struct inode *inode);
+void ocfs_handle_add_lock(ocfs_journal_handle *handle,
+ __u32 type,
+ __u32 flags,
+ struct inode *inode);
/*
* Use this to protect from other processes reading buffer state while
* it's in flight.
@@ -382,9 +379,9 @@
* prev. last_eb_blk + blocks along edge of tree.
* calc_symlink_credits passes because we just need 1
* credit for the dinode there. */
- dinode_blocks = 1 + 1 + fe->id2.i_list.l_tree_depth;
+ dinode_blocks = 1 + 1 + le16_to_cpu(fe->id2.i_list.bryce_l_tree_depth);
- return bitmap_blocks + sysfile_bitmap_blocks + dinode_blocks;
+ return (bitmap_blocks + sysfile_bitmap_blocks + dinode_blocks);
}
static inline int ocfs_calc_symlink_credits(struct super_block *sb)
@@ -395,7 +392,7 @@
* within our single allocated extent. */
blocks += ocfs2_clusters_to_blocks(sb, 1);
- return blocks;
+ return(blocks);
}
static inline int ocfs_calc_group_alloc_credits(struct super_block *sb,
@@ -406,7 +403,7 @@
/* parent inode update + new block group header + bitmap inode update
+ bitmap blocks affected */
blocks = 1 + 1 + 1 + bitmap_blocks;
- return blocks;
+ return(blocks);
}
static inline int ocfs_calc_tree_trunc_credits(struct super_block *sb,
@@ -415,25 +412,25 @@
ocfs2_extent_list *last_el)
{
/* for file entry + all headers in this pass + update to next leaf */
- int credits = 1 + fe->id2.i_list.l_tree_depth + 1;
+ int credits = 1 + le16_to_cpu(fe->id2.i_list.bryce_l_tree_depth) + 1;
int bitmap_blocks, i;
- i = last_el->l_next_free_rec - 1;
+ i = le16_to_cpu(last_el->bryce_l_next_free_rec) - 1;
OCFS_ASSERT(i >= 0);
/* We may be deleting metadata blocks, so metadata alloc dinode +
one desc. block for each possible delete. */
- if (fe->id2.i_list.l_tree_depth
- && (last_el->l_next_free_rec == 1)
- && ((last_el->l_recs[i].e_clusters - clusters_to_del) == 0))
- credits += 1 + fe->id2.i_list.l_tree_depth;
+ if (le16_to_cpu(fe->id2.i_list.bryce_l_tree_depth)
+ && (le16_to_cpu(last_el->bryce_l_next_free_rec) == 1)
+ && ((le32_to_cpu(last_el->l_recs[i].bryce_e_clusters) - clusters_to_del) == 0))
+ credits += 1 + le16_to_cpu(fe->id2.i_list.bryce_l_tree_depth);
/* bitmap fe + group descriptor */
bitmap_blocks = OCFS_SUBALLOC_FREE;
credits += bitmap_blocks;
- return credits;
+ return(credits);
}
#endif /* _OCFSJOURNAL_H_ */
Modified: branches/endian/src/ocfs_log.h
===================================================================
--- branches/endian/src/ocfs_log.h 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/ocfs_log.h 2005-01-18 21:41:46 UTC (rev 1789)
@@ -26,16 +26,16 @@
#ifndef OCFS_LOG_H
#define OCFS_LOG_H
-extern u32 debug_context;
-extern u32 debug_level;
-extern u32 debug_exclude;
+extern __u32 debug_context;
+extern __u32 debug_level;
+extern __u32 debug_exclude;
#ifndef TRACE
#undef EVIL_TRACE
#endif
#ifdef EVIL_TRACE
-extern u64 debug_mask;
+extern __u64 debug_mask;
enum {
OCFS_TRACE_CREATE=1,
@@ -100,36 +100,36 @@
#define OCFS_DEBUG_CONTEXT_ALLOC 0x00000001 /* alloc.c */
#define OCFS_DEBUG_CONTEXT_DIR 0x00000002 /* dir.c */
#define OCFS_DEBUG_CONTEXT_EXTMAP 0x00000004 /* extmap.c */
-#define OCFS_DEBUG_CONTEXT_SLOTMAP 0x00000008 /* slotmap.c */
+#define OCFS_DEBUG_CONTEXT_HEARTBEAT 0x00000008 /* hearbeat.c */
#define OCFS_DEBUG_CONTEXT_IOCTL 0x00000010 /* ioctl.c */
-#define OCFS_DEBUG_CONTEXT_VOTE 0x00000020 /* vote.c */
+#define OCFS_DEBUG_CONTEXT_NM 0x00000020 /* nm.c */
#define OCFS_DEBUG_CONTEXT_PROC 0x00000040 /* proc.c */
#define OCFS_DEBUG_CONTEXT_SYMLINK 0x00000080 /* symlink.c */
#define OCFS_DEBUG_CONTEXT_BITMAP 0x00000100 /* bitmap.c */
#define OCFS_DEBUG_CONTEXT_FILE 0x00000200 /* file.c */
#define OCFS_DEBUG_CONTEXT_INODE 0x00000400 /* inode.c */
#define OCFS_DEBUG_CONTEXT_JOURNAL 0x00000800 /* journal.c */
-#define OCFS_DEBUG_CONTEXT_CHAINALLOC 0x00001000 /* chainalloc */
-#define OCFS_DEBUG_CONTEXT_LOCALALLOC 0x00002000 /* localalloc */
+#define OCFS_DEBUG_CONTEXT_CHAINALLOC 0x00001000 /* */
+#define OCFS_DEBUG_CONTEXT_LOCALALLOC 0x00002000 /* */
#define OCFS_DEBUG_CONTEXT_SYSFILE 0x00004000 /* sysfile.c */
#define OCFS_DEBUG_CONTEXT_VOLCFG 0x00008000 /* volcfg.c */
#define OCFS_DEBUG_CONTEXT_DCACHE 0x00010000 /* dcache.c */
-#define OCFS_DEBUG_CONTEXT_DLMGLUE 0x00020000 /* dlmglue.c */
-#define OCFS_DEBUG_CONTEXT_COMPAT 0x00040000 /* */
+#define OCFS_DEBUG_CONTEXT_DLM 0x00020000 /* dlm.c */
+#define OCFS_DEBUG_CONTEXT_HASH 0x00040000 /* hash.c */
#define OCFS_DEBUG_CONTEXT_IO 0x00080000 /* io.c */
#define OCFS_DEBUG_CONTEXT_NAMEI 0x00100000 /* namei.c */
#define OCFS_DEBUG_CONTEXT_OSB 0x00200000 /* osb.c */
#define OCFS_DEBUG_CONTEXT_SUPER 0x00400000 /* super.c */
#define OCFS_DEBUG_CONTEXT_UTIL 0x00800000 /* util.c */
-#define OCFS_DEBUG_CONTEXT_MMAP 0x01000000 /* mmap.c */
-#define OCFS_DEBUG_CONTEXT_UNUSED4 0x02000000 /* */
+#define OCFS_DEBUG_CONTEXT_VOTE 0x01000000 /* vote.c */
+#define OCFS_DEBUG_CONTEXT_LOCKRES 0x02000000 /* lockres.c */
#ifdef OCFS_DBG_TIMING
typedef union _my_timing_t
{
- u64 q;
- u32 lohi[2];
+ __u64 q;
+ __u32 lohi[2];
} my_timing_t;
#define IO_FUNC_TIMING_DECL my_timing_t begin, end; rdtsc (begin.lohi[0], begin.lohi[1]);
@@ -154,7 +154,7 @@
#define PRINT_STRING(__t) printk("\n");
#define PRINT_ENTRY(__t) printk("(%d) ENTRY: %s", current->pid, __FUNCTION__)
#else
-#define DECL_U8_ARRAY(__t, __s) u8 (__t)[(__s)]
+#define DECL_U8_ARRAY(__t, __s) __u8 (__t)[(__s)]
#define INIT_U8_ARRAY(__s) *(__s) = '\0'
#define PRINT_STRING(__t) printk("%s\n", (__t))
#define PRINT_ENTRY(__t) printk("(%d) %sENTRY: %s", current->pid, (__t), __FUNCTION__)
@@ -193,16 +193,16 @@
# define ENTRY_TIMING_DECLS
# define GET_TIMING(s, hi, lo)
#else
-# define ENTRY_TIMING_DECLS u32 _HI = 0, _LO = 0
+# define ENTRY_TIMING_DECLS __u32 _HI = 0, _LO = 0
# define GET_TIMING(s, hi, lo) \
do { \
IF_LEVEL_CONTEXT(OCFS_DEBUG_LEVEL_TIMING) { \
- u32 _lo, _hi; \
+ __u32 _lo, _hi; \
rdtsc (_lo, _hi); \
if ((s) == NULL) { \
(hi) = _hi; (lo) = _lo; \
} else { \
- u64 _b, _e; \
+ __u64 _b, _e; \
_b = hi; _b <<= 32; _b |= lo; \
_e = _hi; _e <<= 32; _e |= _lo; \
_e -= _b; \
Modified: branches/endian/src/proc.c
===================================================================
--- branches/endian/src/proc.c 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/proc.c 2005-01-18 21:41:46 UTC (rev 1789)
@@ -38,8 +38,8 @@
#include "ocfs2.h"
#include "proc.h"
+#include "vote.h"
#include "alloc.h"
-#include "heartbeat.h"
#include "ocfs_journal.h"
@@ -48,15 +48,18 @@
#define OCFS2_PROC_BASENAME "fs/ocfs2"
+static int ocfs_proc_globalctxt(char *page, char **start, off_t off, int count, int *eof, void *data);
+static int ocfs_proc_dlm_stats(char *page, char **start, off_t off, int count, int *eof, void *data);
static int ocfs_proc_version (char *page, char **start, off_t off, int count, int *eof, void *data);
static int ocfs_proc_nodenum (char *page, char **start, off_t off, int count, int *eof, void *data);
-static int ocfs_proc_slotnum (char *page, char **start, off_t off, int count, int *eof, void *data);
static int ocfs_proc_nodename (char *page, char **start, off_t off, int count, int *eof, void *data);
static int ocfs_proc_mountpoint (char *page, char **start, off_t off, int count, int *eof, void *data);
static int ocfs_proc_statistics (char *page, char **start, off_t off, int count, int *eof, void *data);
static int ocfs_proc_device (char *page, char **start, off_t off, int count, int *eof, void *data);
static int ocfs_proc_nodes (char *page, char **start, off_t off, int count, int *eof, void *data);
+static int ocfs_proc_net_vote_obj (char *page, char **start, off_t off, int count, int *eof, void *data);
static int ocfs_proc_alloc_stat(char *page, char **start, off_t off, int count, int *eof, void *data);
+static int ocfs_proc_guid (char *page, char **start, off_t off, int count, int *eof, void *data);
static int ocfs_proc_label (char *page, char **start, off_t off, int count, int *eof, void *data);
typedef struct _ocfs_proc_list
@@ -69,16 +72,20 @@
ocfs_proc_list top_dir[] = {
{ "version", NULL, ocfs_proc_version },
{ "nodename", NULL, ocfs_proc_nodename },
+ { "globalctxt", NULL, ocfs_proc_globalctxt },
+ { "lockstat", NULL, ocfs_proc_dlm_stats },
{ NULL } };
ocfs_proc_list sub_dir[] = {
{ "nodenum", NULL, ocfs_proc_nodenum },
{ "mountpoint", NULL, ocfs_proc_mountpoint },
- { "slotnum", NULL, ocfs_proc_slotnum },
{ "statistics", NULL, ocfs_proc_statistics },
+ { "lockstat", NULL, ocfs_proc_dlm_stats },
{ "device", NULL, ocfs_proc_device },
{ "nodes", NULL, ocfs_proc_nodes },
+ { "sent-votes", NULL, ocfs_proc_net_vote_obj },
{ "allocstat", NULL, ocfs_proc_alloc_stat },
+ { "guid", NULL, ocfs_proc_guid },
{ "label", NULL, ocfs_proc_label },
{ NULL } };
@@ -123,6 +130,7 @@
}
LOG_EXIT ();
+ return ;
} /* ocfs_proc_deinit */
/*
@@ -151,7 +159,83 @@
return len;
} /* ocfs_proc_calc_metrics */
+
/*
+ * ocfs_proc_globalctxt()
+ *
+ */
+static int ocfs_proc_globalctxt(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ int len = 0;
+ int ret;
+
+ LOG_ENTRY ();
+
+ len += sprintf(page + len, "ip addr/port : 0x%08u/%u\n",
+ ntohl(OcfsGlobalCtxt.comm_info.addr_u.ip_addr4),
+ ntohs(OcfsGlobalCtxt.comm_info.ip_port));
+ len += sprintf(page + len, "guid : ");
+ strncat(page + len, OcfsGlobalCtxt.guid.guid, OCFS2_GUID_LEN);
+ len += OCFS2_GUID_LEN;
+ strncat(page + len, "\n", 1);
+ len++;
+
+ ret = ocfs_proc_calc_metrics(page, start, off, count, eof, len);
+
+ LOG_EXIT_INT (ret);
+ return ret;
+} /* ocfs_proc_version */
+
+/*
+ * ocfs_proc_dlm_stats()
+ *
+ */
+static int ocfs_proc_dlm_stats(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ int len;
+ int ret;
+ ocfs_super *osb;
+ ocfs_dlm_stats *nrq;
+ ocfs_dlm_stats *nrp;
+
+ LOG_ENTRY ();
+
+#define DLM_STATS_HDR "%-25s %10s %10s %10s %10s %10s %10s %10s\n"
+#define DLM_STATS "%-25s %10d %10d %10d %10d %10d %10d %10d\n"
+
+ osb = data;
+ if (osb) {
+ nrq = &(osb->net_reqst_stats);
+ nrp = &(osb->net_reply_stats);
+ } else {
+ nrq = &(OcfsGlobalCtxt.net_reqst_stats);
+ nrp = &(OcfsGlobalCtxt.net_reply_stats);
+ }
+
+ len = sprintf (page, DLM_STATS_HDR, " ", "total", "okay", "etimedout",
+ "eagain", "efail", "enoent", "others");
+
+ len += sprintf (page + len, DLM_STATS, "netdlm vote requests",
+ atomic_read (&nrq->total), atomic_read (&nrq->okay),
+ atomic_read (&nrq->etimedout), atomic_read (&nrq->eagain),
+ atomic_read (&nrq->efail), atomic_read (&nrq->enoent),
+ atomic_read (&nrq->def));
+
+ len += sprintf (page + len, DLM_STATS, "netdlm vote responses",
+ atomic_read (&nrp->total), atomic_read (&nrp->okay),
+ atomic_read (&nrp->etimedout), atomic_read (&nrp->eagain),
+ atomic_read (&nrp->efail), atomic_read (&nrp->enoent),
+ atomic_read (&nrp->def));
+
+ ret = ocfs_proc_calc_metrics (page, start, off, count, eof, len);
+
+ LOG_EXIT_INT (ret);
+ return ret;
+} /* ocfs_proc_dlm_stats */
+
+/*
* ocfs_proc_alloc_stat()
*
*/
@@ -196,7 +280,7 @@
LOG_EXIT_INT (ret);
- return ret;
+ return(ret);
}
/*
@@ -247,29 +331,6 @@
} /* ocfs_proc_nodenum */
/*
- * ocfs_proc_slotnum()
- *
- */
-static int ocfs_proc_slotnum (char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- int len;
- int ret;
- ocfs_super *osb;
-
- LOG_ENTRY ();
-
- osb = data;
- sprintf (page, "%d\n", osb->slot_num);
- len = strlen (page);
-
- ret = ocfs_proc_calc_metrics (page, start, off, count, eof, len);
-
- LOG_EXIT_INT (ret);
- return ret;
-} /* ocfs_proc_slotnum */
-
-/*
* ocfs_proc_nodename()
*
*/
@@ -339,6 +400,7 @@
}
LOG_EXIT ();
+ return;
} /* ocfs_proc_remove_volume */
/*
@@ -369,25 +431,50 @@
int count, int *eof, void *data)
{
int len;
+ char *pubmap = NULL;
ocfs_super *osb;
- int ret = 0;
+ int ret = 0, i;
+ char *ptr;
LOG_ENTRY ();
osb = data;
+ pubmap = ocfs_malloc (100);
+ if (!pubmap) {
+ LOG_ERROR_STATUS (-ENOMEM);
+ goto bail;
+ }
+
+ ptr = pubmap;
+ for (i = 0; i < osb->max_nodes; i++) {
+ if (ocfs_node_map_test_bit(&osb->publ_map, i))
+ ptr += sprintf (ptr, "%d ", i);
+ }
+ if (pubmap != ptr)
+ *(ptr - 1) = '\0';
+
#define PROC_STATS \
+ "Publish map : %s\n" \
"Number of nodes : %u\n" \
"Cluster size : %d\n" \
"Volume size : %llu\n" \
- "Open Transactions: : %u\n"
+ "Open Transactions: : %u\n" \
+ "Delayed Locks : %u\n" \
+ "Checkpointing Locks : %u\n"
- len = sprintf (page, PROC_STATS, osb->num_nodes, osb->s_clustersize,
+ len = sprintf (page, PROC_STATS, pubmap,
+ osb->num_nodes, osb->s_clustersize,
ocfs2_clusters_to_bytes(osb->sb, osb->num_clusters),
- atomic_read(&osb->journal->num_trans));
+ atomic_read(&osb->journal->num_trans),
+ atomic_read(&osb->journal->num_cmt_locks),
+ atomic_read(&osb->journal->num_chkpt_locks));
ret = ocfs_proc_calc_metrics (page, start, off, count, eof, len);
+bail:
+ if (pubmap)
+ kfree(pubmap);
LOG_EXIT_INT (ret);
return ret;
} /* ocfs_proc_statistics */
@@ -425,6 +512,7 @@
int i;
int ret;
ocfs_super *osb;
+ BARF_BARF_BARF *node;
char mount;
LOG_ENTRY ();
@@ -432,10 +520,23 @@
osb = data;
if (osb) {
+ down (&(osb->cfg_lock));
for (i = 0; i < osb->max_nodes; i++) {
- mount = ocfs_node_map_test_bit(osb, &osb->mounted_map, i) ? 'M' : ' ';
- len += sprintf(page + len, "%2d %c\n", i, mount);
+ node = osb->node_cfg_info[i];
+ if (!node)
+ continue;
+ mount = ocfs_node_map_test_bit(&osb->publ_map, i) ? 'M' : ' ';
+ len += sprintf (page + len,
+ "%2d %c %-32s 0x%08u %-6u ",
+ i, mount, node->node_name,
+ ntohl(node->ipc_config.addr_u.ip_addr4),
+ ntohs(node->ipc_config.ip_port));
+ strncat(page + len, node->guid.guid,
+ OCFS2_GUID_LEN);
+ len += OCFS2_GUID_LEN;
+ len += sprintf (page + len, "\n");
}
+ up (&(osb->cfg_lock));
}
ret = ocfs_proc_calc_metrics (page, start, off, count, eof, len);
@@ -445,6 +546,63 @@
} /* ocfs_proc_nodes */
/*
+ * ocfs_proc_net_votes()
+ *
+ */
+static int ocfs_proc_net_vote_obj (char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ int len = 0, ret;
+ ocfs_super *osb;
+ ocfs_vote_obj_lookup_data d; // 24 bytes
+
+ LOG_ENTRY ();
+
+ osb = data;
+
+ d.func = ocfs_lookup_obj_for_proc;
+ d.ret = NULL;
+ d.u.proc.page = page;
+ d.u.proc.len = &len;
+ d.u.proc.max = 4096;
+ ret = ocfs_lookup_vote_request_obj (osb, &d);
+ ret = ocfs_proc_calc_metrics (page, start, off, count, eof, len);
+
+ LOG_EXIT_INT (ret);
+ return ret;
+} /* ocfs_proc_net_vote_obj */
+
+/*
+ * ocfs_proc_guid()
+ *
+ */
+static int ocfs_proc_guid (char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ int len;
+ int ret;
+ ocfs_super *osb;
+ char *p;
+ int i;
+
+ LOG_ENTRY ();
+
+ osb = (ocfs_super *) data;
+
+ for (i = 0, p = page; i < MAX_VOL_ID_LENGTH; i++, p += 2)
+ sprintf(p, "%02X", osb->uuid[i]);
+ *p = '\n'; ++p; *p = '\0';
+
+ len = strlen (page);
+
+ ret = ocfs_proc_calc_metrics (page, start, off, count, eof, len);
+
+ LOG_EXIT_INT (ret);
+ return ret;
+} /* ocfs_proc_guid */
+
+
+/*
* ocfs_proc_label()
*
*/
Modified: branches/endian/src/suballoc.c
===================================================================
--- branches/endian/src/suballoc.c 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/suballoc.c 2005-01-18 21:41:46 UTC (rev 1789)
@@ -36,7 +36,7 @@
#include "ocfs2.h"
#include "alloc.h"
-#include "dlmglue.h"
+#include "dlm.h"
#include "localalloc.h"
#include "util.h"
#include "suballoc.h"
@@ -167,16 +167,16 @@
memset(bg, 0, sb->s_blocksize);
strcpy (bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE);
- bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
- bg->bg_size = ocfs2_group_bitmap_size(sb);
- bg->bg_bits = (u32) cl->cl_cpg * (u32) cl->cl_bpc;
- bg->bg_chain = my_chain;
- bg->bg_next_group = cl->cl_recs[my_chain].c_blkno;
- bg->bg_parent_dinode = OCFS_I(alloc_inode)->ip_blkno;
- bg->bg_blkno = group_blkno;
+ bg->bryce_bg_generation = cpu_to_le32(alloc_inode->i_generation);
+ bg->bryce_bg_size = le16_to_cpu(ocfs2_group_bitmap_size(sb));
+ bg->bryce_bg_bits = (u32) le16_to_cpu(cl->bryce_cl_cpg) * (u32) le16_to_cpu(cl->bryce_cl_bpc);
+ bg->bryce_bg_chain = le16_to_cpu(my_chain);
+ bg->bryce_bg_next_group = le64_to_cpu(cl->cl_recs[my_chain].bryce_c_blkno);
+ bg->bryce_bg_parent_dinode = le64_to_cpu(OCFS_I(alloc_inode)->ip_blkno);
+ bg->bryce_bg_blkno = le64_to_cpu(group_blkno);
/* set the 1st bit in the bitmap to account for the descriptor block */
ocfs2_set_bit(0, (unsigned long *) bg->bg_bitmap);
- bg->bg_free_bits_count = bg->bg_bits - 1;
+ cpu_to_le16(bg->bryce_bg_free_bits_count) = le16_to_cpu(bg->bryce_bg_bits) - 1;
status = ocfs_journal_dirty(handle, bg_bh);
if (status < 0)
@@ -192,8 +192,8 @@
u16 curr, best;
best = curr = 0;
- while (curr < cl->cl_count) {
- if (cl->cl_recs[best].c_total > cl->cl_recs[curr].c_total)
+ while (curr < le16_to_cpu(cl->bryce_cl_count)) {
+ if (le32_to_cpu(cl->cl_recs[best].bryce_c_total) > le32_to_cpu(cl->cl_recs[curr].bryce_c_total))
best = curr;
curr++;
}
@@ -276,7 +276,7 @@
ocfs2_chain_list *cl;
ocfs2_alloc_context *ac = NULL;
ocfs_journal_handle *handle = NULL;
- u32 bit_off, num_bits, tmp;
+ u32 bit_off, num_bits;
u16 alloc_rec;
u64 bg_blkno;
struct buffer_head *bg_bh = NULL;
@@ -299,14 +299,14 @@
cl = &fe->id2.i_chain;
status = ocfs_reserve_clusters(osb,
handle,
- cl->cl_cpg,
+ le16_to_cpu(cl->bryce_cl_cpg),
&ac);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
}
- credits = ocfs_calc_group_alloc_credits(osb->sb, cl->cl_cpg);
+ credits = ocfs_calc_group_alloc_credits(osb->sb, le16_to_cpu(cl->bryce_cl_cpg));
handle = ocfs_start_trans(osb, handle, credits);
if (!handle) {
status = -ENOMEM;
@@ -317,7 +317,7 @@
status = ocfs_claim_clusters(osb,
handle,
ac,
- cl->cl_cpg,
+ le16_to_cpu(cl->bryce_cl_cpg),
&bit_off,
&num_bits);
if (status < 0) {
@@ -366,22 +366,16 @@
goto bail;
}
- cl->cl_recs[alloc_rec].c_free += bg->bg_free_bits_count;
- cl->cl_recs[alloc_rec].c_total += bg->bg_bits;
- cl->cl_recs[alloc_rec].c_blkno = bg_blkno;
- if (cl->cl_next_free_rec < cl->cl_count)
- cl->cl_next_free_rec++;
+ cpu_to_le32(cl->cl_recs[alloc_rec].bryce_c_free) += le16_to_cpu(bg->bryce_bg_free_bits_count);
+ cpu_to_le32(cl->cl_recs[alloc_rec].bryce_c_total) += le16_to_cpu(bg->bryce_bg_bits);
+ cpu_to_le64(cl->cl_recs[alloc_rec].bryce_c_blkno) = bg_blkno;
+ if (le16_to_cpu(cl->bryce_cl_next_free_rec) < le16_to_cpu(cl->bryce_cl_count))
+ cpu_to_le16(cl->bryce_cl_next_free_rec)++;
- tmp = le32_to_cpu(fe->id1.bitmap1.i_used);
- tmp += bg->bg_bits - bg->bg_free_bits_count;
- fe->id1.bitmap1.i_used = cpu_to_le32(tmp);
+ cpu_to_le32(fe->id1.bitmap1.bryce_i_used) += (le16_to_cpu(bg->bryce_bg_bits) - le16_to_cpu(bg->bryce_bg_free_bits_count));
+ cpu_to_le32(fe->id1.bitmap1.bryce_i_total) += le16_to_cpu(bg->bryce_bg_bits);
+ cpu_to_le32(fe->bryce_i_clusters) += le16_to_cpu(cl->bryce_cl_cpg);
- tmp = le32_to_cpu(fe->id1.bitmap1.i_total);
- tmp += bg->bg_bits;
- fe->id1.bitmap1.i_total = cpu_to_le32(tmp);
-
- fe->i_clusters += cl->cl_cpg;
-
status = ocfs_journal_dirty(handle, bh);
if (status < 0) {
LOG_ERROR_STATUS(status);
@@ -389,12 +383,12 @@
}
spin_lock(&OCFS_I(alloc_inode)->ip_lock);
- OCFS_I(alloc_inode)->ip_clusters = fe->i_clusters;
- fe->i_size = ocfs2_clusters_to_bytes(alloc_inode->i_sb,
- fe->i_clusters);
+ OCFS_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->bryce_i_clusters);
+ cpu_to_le64(fe->bryce_i_size) = ocfs2_clusters_to_bytes(alloc_inode->i_sb,
+ le32_to_cpu(fe->bryce_i_clusters));
spin_unlock(&OCFS_I(alloc_inode)->ip_lock);
- i_size_write(alloc_inode, fe->i_size);
- alloc_inode->i_blocks = (i_size_read(alloc_inode) + osb->sb->s_blocksize - 1) >> osb->sb->s_blocksize_bits;
+ alloc_inode->i_size = le64_to_cpu(fe->bryce_i_size);
+ alloc_inode->i_blocks = (alloc_inode->i_size + osb->sb->s_blocksize - 1) >> osb->sb->s_blocksize_bits;
status = 0;
bail:
@@ -431,19 +425,22 @@
OCFS_ASSERT(!(handle->flags & OCFS_HANDLE_STARTED));
ocfs_handle_add_inode(handle, alloc_inode);
- status = ocfs2_meta_lock(alloc_inode, handle, &bh, 1);
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE,
+ 0, &bh, alloc_inode);
if (status < 0) {
if (status != -EINTR)
LOG_ERROR_STATUS (status);
goto bail;
}
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
+ 0, alloc_inode);
fe = (ocfs2_dinode *) bh->b_data;
OCFS_ASSERT_RO(IS_VALID_FILE_ENTRY(fe));
- OCFS_ASSERT_RO(fe->i_flags & OCFS2_CHAIN_FL);
+ OCFS_ASSERT_RO(le32_to_cpu(fe->bryce_i_flags) & OCFS2_CHAIN_FL);
- free_bits = le32_to_cpu(fe->id1.bitmap1.i_total) -
- le32_to_cpu(fe->id1.bitmap1.i_used);
+ free_bits = le32_to_cpu(fe->id1.bitmap1.bryce_i_total) -
+ le32_to_cpu(fe->id1.bitmap1.bryce_i_used);
if (bits_wanted > free_bits) {
/* cluster bitmap never grows */
@@ -461,8 +458,8 @@
}
/* You should never ask for this much metadata */
OCFS_ASSERT(bits_wanted <=
- (le32_to_cpu(fe->id1.bitmap1.i_total)
- - le32_to_cpu(fe->id1.bitmap1.i_used)));
+ (le32_to_cpu(fe->id1.bitmap1.bryce_i_total)
+ - le32_to_cpu(fe->id1.bitmap1.bryce_i_used)));
}
get_bh(bh);
@@ -497,7 +494,7 @@
#ifndef OCFS_USE_ALL_METADATA_SUBALLOCATORS
alloc_inode = ocfs_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, 0);
#else
- alloc_inode = ocfs_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, osb->slot_num);
+ alloc_inode = ocfs_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, osb->node_num);
#endif
if (!alloc_inode) {
status = -ENOMEM;
@@ -546,7 +543,7 @@
(*ac)->ac_handle = handle;
(*ac)->ac_which = OCFS_AC_USE_INODE;
- alloc_inode = ocfs_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, osb->slot_num);
+ alloc_inode = ocfs_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, osb->node_num);
if (!alloc_inode) {
status = -ENOMEM;
LOG_ERROR_STATUS(status);
@@ -662,7 +659,7 @@
}
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
}
/*
@@ -718,9 +715,9 @@
bitmap = bg->bg_bitmap;
while((offset = ocfs2_find_next_zero_bit(bitmap,
- bg->bg_bits,
+ le16_to_cpu(bg->bryce_bg_bits),
start)) != -1) {
- if (offset == bg->bg_bits)
+ if (offset == le16_to_cpu(bg->bryce_bg_bits))
break;
if (!ocfs_test_bg_bit_allocatable(bg_bh, offset)) {
@@ -776,7 +773,7 @@
LOG_ENTRY();
OCFS_ASSERT_RO(IS_VALID_GROUP_DESC(bg));
- OCFS_ASSERT(bg->bg_free_bits_count >= num_bits);
+ OCFS_ASSERT(cpu_to_le16(bg->bryce_bg_free_bits_count) >= num_bits);
LOG_TRACE_ARGS("block_group_set_bits: off = %u, num = %u\n", bit_off,
num_bits);
@@ -793,7 +790,7 @@
goto bail;
}
- bg->bg_free_bits_count -= num_bits;
+ cpu_to_le16(bg->bryce_bg_free_bits_count) -= num_bits;
while(num_bits--)
ocfs2_set_bit(bit_off++, bitmap);
@@ -815,16 +812,16 @@
{
u16 curr, best;
- OCFS_ASSERT(cl->cl_next_free_rec);
+ OCFS_ASSERT(le16_to_cpu(cl->bryce_cl_next_free_rec));
best = curr = 0;
- while (curr < cl->cl_next_free_rec) {
- if (cl->cl_recs[curr].c_free > cl->cl_recs[best].c_free)
+ while (curr < le16_to_cpu(cl->bryce_cl_next_free_rec)) {
+ if (le32_to_cpu(cl->cl_recs[curr].bryce_c_free) > le32_to_cpu(cl->cl_recs[best].bryce_c_free))
best = curr;
curr++;
}
- OCFS_ASSERT(best < cl->cl_next_free_rec);
+ OCFS_ASSERT(best < le16_to_cpu(cl->bryce_cl_next_free_rec));
return best;
}
@@ -848,12 +845,12 @@
OCFS_ASSERT_RO(IS_VALID_GROUP_DESC(prev_bg));
LOG_TRACE_ARGS("In suballoc %llu, chain %u, move group %llu to top, "
- "prev = %llu\n", fe->i_blkno, chain, bg->bg_blkno,
- prev_bg->bg_blkno);
+ "prev = %llu\n", le64_to_cpu(fe->bryce_i_blkno), chain, le64_to_cpu(bg->bryce_bg_blkno),
+ le64_to_cpu(prev_bg->bryce_bg_blkno));
- fe_ptr = fe->id2.i_chain.cl_recs[chain].c_blkno;
- bg_ptr = bg->bg_next_group;
- prev_bg_ptr = prev_bg->bg_next_group;
+ fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].bryce_c_blkno);
+ bg_ptr = le64_to_cpu(bg->bryce_bg_next_group);
+ prev_bg_ptr = le64_to_cpu(prev_bg->bryce_bg_next_group);
status = ocfs_journal_access(handle, alloc_inode, prev_bg_bh,
OCFS_JOURNAL_ACCESS_WRITE);
@@ -862,7 +859,7 @@
goto bail;
}
- prev_bg->bg_next_group = bg->bg_next_group;
+ cpu_to_le64(prev_bg->bryce_bg_next_group) = le64_to_cpu(bg->bryce_bg_next_group);
status = ocfs_journal_dirty(handle, prev_bg_bh);
if (status < 0) {
@@ -877,7 +874,7 @@
goto bail;
}
- bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno;
+ cpu_to_le64(bg->bryce_bg_next_group) = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].bryce_c_blkno);
status = ocfs_journal_dirty(handle, bg_bh);
if (status < 0) {
@@ -892,7 +889,7 @@
goto bail;
}
- fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno;
+ cpu_to_le64(fe->id2.i_chain.cl_recs[chain].bryce_c_blkno) = le64_to_cpu(bg->bryce_bg_blkno);
status = ocfs_journal_dirty(handle, fe_bh);
if (status < 0) {
@@ -903,9 +900,9 @@
status = 0;
bail:
if (status < 0) {
- fe->id2.i_chain.cl_recs[chain].c_blkno = fe_ptr;
- bg->bg_next_group = bg_ptr;
- prev_bg->bg_next_group = prev_bg_ptr;
+ cpu_to_le64(fe->id2.i_chain.cl_recs[chain].bryce_c_blkno) = fe_ptr;
+ cpu_to_le64(bg->bryce_bg_next_group) = bg_ptr;
+ cpu_to_le64(prev_bg->bryce_bg_next_group) = prev_bg_ptr;
}
LOG_EXIT_STATUS(status);
@@ -915,7 +912,7 @@
static inline int ocfs_block_group_reasonably_empty(ocfs2_group_desc *bg,
u32 wanted)
{
- return bg->bg_free_bits_count > wanted;
+ return le16_to_cpu(bg->bryce_bg_free_bits_count) > wanted;
}
/* return 0 on success, -ENOSPC to keep searching and any other < 0
@@ -932,7 +929,7 @@
OCFS_ASSERT(ocfs2_is_cluster_bitmap(inode));
- if (bg->bg_free_bits_count) {
+ if (le16_to_cpu(bg->bryce_bg_free_bits_count)) {
ret = ocfs_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
group_bh, bits_wanted,
&tmp_off, &tmp_found);
@@ -959,7 +956,7 @@
OCFS_ASSERT(min_bits == 1);
OCFS_ASSERT(!ocfs2_is_cluster_bitmap(inode));
- if (bg->bg_free_bits_count)
+ if (le16_to_cpu(bg->bryce_bg_free_bits_count))
ret = ocfs_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
group_bh, bits_wanted,
bit_off, bits_found);
@@ -976,7 +973,6 @@
{
int status;
u16 chain, tmp_bits;
- u32 tmp_used;
u64 next_group;
ocfs_journal_handle *handle = ac->ac_handle;
struct inode *alloc_inode = ac->ac_inode;
@@ -991,7 +987,7 @@
bits_wanted, chain, OCFS_I(alloc_inode)->ip_blkno);
status = ocfs_read_block(OCFS2_SB(alloc_inode->i_sb),
- cl->cl_recs[chain].c_blkno, &group_bh,
+ le64_to_cpu(cl->cl_recs[chain].bryce_c_blkno), &group_bh,
OCFS_BH_CACHED, alloc_inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
@@ -1006,14 +1002,14 @@
while ((status = ac->ac_group_search(alloc_inode, group_bh,
bits_wanted, min_bits, bit_off,
&tmp_bits)) == -ENOSPC) {
- if (!bg->bg_next_group)
+ if (!le64_to_cpu(bg->bryce_bg_next_group))
break;
if (prev_group_bh) {
brelse(prev_group_bh);
prev_group_bh = NULL;
}
- next_group = bg->bg_next_group;
+ next_group = le64_to_cpu(bg->bryce_bg_next_group);
prev_group_bh = group_bh;
group_bh = NULL;
status = ocfs_read_block(OCFS2_SB(alloc_inode->i_sb),
@@ -1033,7 +1029,7 @@
}
LOG_TRACE_ARGS("alloc succeeds: we give %u bits from block group "
- "%llu\n", tmp_bits, bg->bg_blkno);
+ "%llu\n", tmp_bits, le64_to_cpu(bg->bryce_bg_blkno));
*num_bits = tmp_bits;
@@ -1075,9 +1071,8 @@
goto bail;
}
- tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
- fe->id1.bitmap1.i_used = cpu_to_le32(*num_bits + tmp_used);
- cl->cl_recs[chain].c_free -= *num_bits;
+ cpu_to_le32(fe->id1.bitmap1.bryce_i_used) += *num_bits;
+ cpu_to_le32(cl->cl_recs[chain].bryce_c_free) -= *num_bits;
status = ocfs_journal_dirty(handle,
ac->ac_bh);
@@ -1098,9 +1093,9 @@
}
LOG_TRACE_ARGS("Allocated %u bits from suballocator %llu\n",
- *num_bits, fe->i_blkno);
+ *num_bits, le64_to_cpu(fe->bryce_i_blkno));
- *bg_blkno = bg->bg_blkno;
+ *bg_blkno = le64_to_cpu(bg->bryce_bg_blkno);
bail:
if (group_bh)
brelse(group_bh);
@@ -1133,8 +1128,7 @@
fe = (ocfs2_dinode *) ac->ac_bh->b_data;
OCFS_ASSERT_RO(IS_VALID_FILE_ENTRY(fe));
- OCFS_ASSERT_RO(le32_to_cpu(fe->id1.bitmap1.i_used) <
- le32_to_cpu(fe->id1.bitmap1.i_total));
+ OCFS_ASSERT_RO(le32_to_cpu(fe->id1.bitmap1.bryce_i_used) < le32_to_cpu(fe->id1.bitmap1.bryce_i_total));
cl = (ocfs2_chain_list *) &fe->id2.i_chain;
@@ -1159,10 +1153,10 @@
* because we only calculate enough journal credits for one
* relink per alloc. */
ac->ac_allow_chain_relink = 0;
- for (i = 0; i < cl->cl_next_free_rec; i ++) {
+ for (i = 0; i < le16_to_cpu(cl->bryce_cl_next_free_rec); i ++) {
if (i == victim)
continue;
- if (!cl->cl_recs[i].c_free)
+ if (!le32_to_cpu(cl->cl_recs[i].bryce_c_free))
continue;
ac->ac_chain = i;
@@ -1387,7 +1381,7 @@
bail:
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
}
static inline int ocfs_block_group_clear_bits(ocfs_journal_handle *handle,
@@ -1429,7 +1423,7 @@
ocfs2_set_bit(bit_off + tmp,
(unsigned long *) undo_bg->bg_bitmap);
}
- bg->bg_free_bits_count += num_bits;
+ cpu_to_le16(bg->bryce_bg_free_bits_count) += num_bits;
status = ocfs_journal_dirty(handle, group_bh);
if (status < 0)
@@ -1449,7 +1443,6 @@
unsigned int count)
{
int status = 0;
- u32 tmp_used;
ocfs_super *osb = OCFS2_SB(alloc_inode->i_sb);
ocfs2_dinode *fe = (ocfs2_dinode *) alloc_bh->b_data;
ocfs2_chain_list *cl = &fe->id2.i_chain;
@@ -1460,7 +1453,7 @@
OCFS_ASSERT_RO(IS_VALID_FILE_ENTRY(fe));
OCFS_ASSERT((count + start_bit)
- <= ((u32) cl->cl_cpg * (u32) cl->cl_bpc));
+ <= ((u32) le16_to_cpu(cl->bryce_cl_cpg) * (u32) le16_to_cpu(cl->bryce_cl_bpc)));
LOG_TRACE_ARGS("suballocator %llu: freeing %u bits from group %llu, "
"starting at %u\n",
@@ -1476,7 +1469,7 @@
group = (ocfs2_group_desc *) group_bh->b_data;
OCFS_ASSERT_RO(IS_VALID_GROUP_DESC(group));
- OCFS_ASSERT((count + start_bit) <= group->bg_bits);
+ OCFS_ASSERT((count + start_bit) <= le16_to_cpu(group->bryce_bg_bits));
status = ocfs_block_group_clear_bits(handle, alloc_inode,
group, group_bh,
@@ -1493,9 +1486,8 @@
goto bail;
}
- cl->cl_recs[group->bg_chain].c_free += count;
- tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
- fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count);
+ cpu_to_le32(cl->cl_recs[le16_to_cpu(group->bryce_bg_chain)].bryce_c_free) += count;
+ cpu_to_le32(fe->id1.bitmap1.bryce_i_used) -= count;
status = ocfs_journal_dirty(handle, alloc_bh);
if (status < 0) {
@@ -1523,8 +1515,8 @@
struct buffer_head *inode_alloc_bh,
ocfs2_dinode *di)
{
- u64 blk = di->i_blkno;
- u16 bit = le16_to_cpu(di->i_suballoc_bit);
+ u64 blk = le64_to_cpu(di->bryce_i_blkno);
+ u16 bit = le16_to_cpu(di->bryce_i_suballoc_bit);
u64 bg_blkno = ocfs_which_suballoc_group(blk, bit);
return ocfs_free_suballoc_bits(handle, inode_alloc_inode,
@@ -1536,8 +1528,8 @@
struct buffer_head *eb_alloc_bh,
ocfs2_extent_block *eb)
{
- u64 blk = le64_to_cpu(eb->h_blkno);
- u16 bit = le16_to_cpu(eb->h_suballoc_bit);
+ u64 blk = le64_to_cpu(eb->bryce_h_blkno);
+ u16 bit = le16_to_cpu(eb->bryce_h_suballoc_bit);
u64 bg_blkno = ocfs_which_suballoc_group(blk, bit);
return ocfs_free_suballoc_bits(handle, eb_alloc_inode, eb_alloc_bh,
@@ -1585,41 +1577,39 @@
{
printk("Block Group:\n");
printk("bg_signature: %s\n", bg->bg_signature);
- printk("bg_size: %u\n", bg->bg_size);
- printk("bg_bits: %u\n", bg->bg_bits);
- printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count);
- printk("bg_chain: %u\n", bg->bg_chain);
- printk("bg_generation: %u\n", le32_to_cpu(bg->bg_generation));
- printk("bg_next_group: %llu\n", bg->bg_next_group);
- printk("bg_parent_dinode: %llu\n", bg->bg_parent_dinode);
- printk("bg_blkno: %llu\n", bg->bg_blkno);
+ printk("bg_size: %u\n", le16_to_cpu(bg->bryce_bg_size));
+ printk("bg_bits: %u\n", le16_to_cpu(bg->bryce_bg_bits));
+ printk("bg_free_bits_count: %u\n", le16_to_cpu(bg->bryce_bg_free_bits_count));
+ printk("bg_chain: %u\n", le16_to_cpu(bg->bryce_bg_chain));
+ printk("bg_generation: %u\n", le32_to_cpu(bg->bryce_bg_generation));
+ printk("bg_next_group: %llu\n", le64_to_cpu(bg->bryce_bg_next_group));
+ printk("bg_parent_dinode: %llu\n", le64_to_cpu(bg->bryce_bg_parent_dinode));
+ printk("bg_blkno: %llu\n", le64_to_cpu(bg->bryce_bg_blkno));
}
static inline void debug_suballoc_inode(ocfs2_dinode *fe)
{
int i;
- printk("Suballoc Inode %llu:\n", fe->i_blkno);
+ printk("Suballoc Inode %llu:\n", le64_to_cpu(fe->bryce_i_blkno));
printk("i_signature: %s\n", fe->i_signature);
- printk("i_size: %llu\n", fe->i_size);
- printk("i_clusters: %u\n", fe->i_clusters);
- printk("i_generation: %u\n",
- le32_to_cpu(fe->i_generation));
- printk("id1.bitmap1.i_used: %u\n",
- le32_to_cpu(fe->id1.bitmap1.i_used));
- printk("id1.bitmap1.i_total: %u\n",
- le32_to_cpu(fe->id1.bitmap1.i_total));
- printk("id2.i_chain.cl_cpg: %u\n", fe->id2.i_chain.cl_cpg);
- printk("id2.i_chain.cl_bpc: %u\n", fe->id2.i_chain.cl_bpc);
- printk("id2.i_chain.cl_count: %u\n", fe->id2.i_chain.cl_count);
+
+ printk("i_size: %llu\n", le64_to_cpu(fe->bryce_i_size));
+ printk("i_clusters: %u\n", le32_to_cpu(fe->bryce_i_clusters));
+ printk("i_generation: %u\n", le32_to_cpu(fe->bryce_i_generation));
+ printk("id1.bitmap1.i_used: %u\n", le32_to_cpu(fe->id1.bitmap1.bryce_i_used));
+ printk("id1.bitmap1.i_total: %u\n", le32_to_cpu(fe->id1.bitmap1.bryce_i_total));
+ printk("id2.i_chain.cl_cpg: %u\n", le16_to_cpu(fe->id2.i_chain.bryce_cl_cpg));
+ printk("id2.i_chain.cl_bpc: %u\n", le16_to_cpu(fe->id2.i_chain.bryce_cl_bpc));
+ printk("id2.i_chain.cl_count: %u\n", le16_to_cpu(fe->id2.i_chain.bryce_cl_count));
printk("id2.i_chain.cl_next_free_rec: %u\n",
- fe->id2.i_chain.cl_next_free_rec);
- for(i = 0; i < fe->id2.i_chain.cl_next_free_rec; i++) {
+ le16_to_cpu(fe->id2.i_chain.bryce_cl_next_free_rec));
+ for(i = 0; i < le16_to_cpu(fe->id2.i_chain.bryce_cl_next_free_rec); i++) {
printk("fe->id2.i_chain.cl_recs[%d].c_free: %u\n", i,
- fe->id2.i_chain.cl_recs[i].c_free);
+ le32_to_cpu(fe->id2.i_chain.cl_recs[i].bryce_c_free));
printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i,
- fe->id2.i_chain.cl_recs[i].c_total);
+ le32_to_cpu(fe->id2.i_chain.cl_recs[i].bryce_c_total));
printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %llu\n", i,
- fe->id2.i_chain.cl_recs[i].c_blkno);
+ le64_to_cpu(fe->id2.i_chain.cl_recs[i].bryce_c_blkno));
}
}
Modified: branches/endian/src/suballoc.h
===================================================================
--- branches/endian/src/suballoc.h 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/suballoc.h 2005-01-18 21:41:46 UTC (rev 1789)
@@ -54,7 +54,7 @@
void ocfs_free_alloc_context(ocfs2_alloc_context *ac);
static inline int ocfs_alloc_context_bits_left(ocfs2_alloc_context *ac)
{
- return ac->ac_bits_wanted - ac->ac_bits_given;
+ return(ac->ac_bits_wanted - ac->ac_bits_given);
}
int ocfs_reserve_new_metadata(ocfs_super *osb,
Modified: branches/endian/src/super.c
===================================================================
--- branches/endian/src/super.c 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/super.c 2005-01-18 21:41:46 UTC (rev 1789)
@@ -42,10 +42,6 @@
#include <linux/socket.h>
#include <linux/inet.h>
-#include <cluster/util.h>
-#include <cluster/dlmcommon.h>
-#include <cluster/nodemanager.h>
-
#include "ocfs_log.h"
#include "ocfs.h"
#include "ocfs2.h"
@@ -54,18 +50,18 @@
#include "ocfs1_fs_compat.h"
#include "alloc.h"
-#include "dlmglue.h"
#include "extent_map.h"
#include "heartbeat.h"
#include "inode.h"
#include "journal.h"
#include "localalloc.h"
+#include "nm.h"
#include "proc.h"
-#include "slot_map.h"
#include "super.h"
#include "sysfile.h"
#include "util.h"
#include "ver.h"
+#include "volcfg.h"
#include "vote.h"
#include "ocfs_journal.h"
@@ -78,14 +74,23 @@
*/
ocfs_global_ctxt OcfsGlobalCtxt;
spinlock_t osb_id_lock;
-u32 osb_id; /* Keeps track of next available OSB Id */
+__u32 osb_id; /* Keeps track of next available OSB Id */
+spinlock_t mount_cnt_lock;
+__u32 mount_cnt; /* Number of volumes currently mounted */
-u32 debug_context = 0;
-u32 debug_level = 0;
-u32 debug_exclude = 0;
+char *node_name = NULL;
+__s32 node_number = OCFS_INVALID_NODE_NUM;
+__u32 debug_context = 0;
+__u32 debug_level = 0;
+__u32 debug_exclude = 0;
+char *ip_address = NULL;
+__u32 ip_port_v2 = 0;
+char *guid = NULL;
+__u32 cs = 0;
+char *ocfs_hostname;
#ifdef EVIL_TRACE
-u64 debug_mask = 0;
+__u64 debug_mask = 0;
#endif
#if 0
@@ -97,11 +102,11 @@
#define KERN_OCFS 988
static ctl_table ocfs_dbg_table[] = {
- {1, "debug_level", &debug_level, sizeof (u32), 0644, NULL, &proc_dointvec,
+ {1, "debug_level", &debug_level, sizeof (__u32), 0644, NULL, &proc_dointvec,
&sysctl_intvec, NULL, NULL, NULL},
- {2, "debug_context", &debug_context, sizeof (u32), 0644, NULL, &proc_dointvec,
+ {2, "debug_context", &debug_context, sizeof (__u32), 0644, NULL, &proc_dointvec,
&sysctl_intvec, NULL, NULL, NULL},
- {3, "debug_exclude", &debug_exclude, sizeof (u32), 0644, NULL, &proc_dointvec,
+ {3, "debug_exclude", &debug_exclude, sizeof (__u32), 0644, NULL, &proc_dointvec,
&sysctl_intvec, NULL, NULL, NULL},
{0}
};
@@ -123,26 +128,45 @@
//MODULE_DESCRIPTION("Oracle Clustered FileSystem");
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+module_param (node_name, charp, 0);
+module_param (node_number, int, 0);
module_param (debug_context, uint, 0);
module_param (debug_level, uint, 0);
module_param (debug_exclude, uint, 0);
+module_param (ip_address, charp, 0);
+module_param (ip_port_v2, uint, 0);
+module_param (guid, charp, 0);
+module_param (cs, uint, 0);
#else /* 2.6.x kernel */
+MODULE_PARM (node_name, "s");
+MODULE_PARM_DESC(node_name, "Name of this machine in the cluster");
+MODULE_PARM (node_number, "i");
+MODULE_PARM_DESC(node_number, "Slot number for this machine within volume");
MODULE_PARM (debug_context, "i");
MODULE_PARM_DESC(debug_context, "Debug context");
MODULE_PARM (debug_level, "i");
MODULE_PARM_DESC(debug_level, "Debug level");
MODULE_PARM (debug_exclude, "i");
MODULE_PARM_DESC(debug_exclude, "Process ID to exclude from tracing");
+MODULE_PARM (ip_address, "s");
+MODULE_PARM_DESC(ip_address, "IP address for the network dlm on this node");
+MODULE_PARM (ip_port_v2, "i");
+MODULE_PARM_DESC(ip_port_v2, "Port number for the network dlm on this node");
+MODULE_PARM (guid, "s");
+MODULE_PARM_DESC(guid, "GUID for this machine");
+MODULE_PARM (cs, "i");
+MODULE_PARM_DESC(cs, "Checksum");
#endif /* Linux 2.4 stuff */
+extern struct semaphore recovery_list_sem;
-static int ocfs_parse_options (char *options, u32 * uid, u32 * gid, int * reclaim_id, char **group_name);
+static int ocfs_parse_options (char *options, __u32 * uid, __u32 * gid, int * reclaim_id);
static int __init ocfs_driver_entry (void);
static void __exit ocfs_driver_exit (void);
static void ocfs_put_super (struct super_block *sb);
-static int ocfs_mount_volume (struct super_block *sb, int reclaim_id,
- char **group_name, struct inode *root);
-static void ocfs_dismount_volume(struct super_block *sb);
+static int ocfs_mount_volume (struct super_block *sb, int reclaim_id, struct inode *root);
+static int ocfs_dismount_volume (struct super_block *sb);
+static int ocfs_read_params(void);
static int ocfs_initialize_mem_lists (void);
static void ocfs_free_mem_lists (void);
static void ocfs_delete_osb (ocfs_super * osb);
@@ -158,11 +182,13 @@
static int ocfs_init_global_system_inodes(ocfs_super *osb);
static int ocfs_init_local_system_inodes(ocfs_super *osb);
static int ocfs_release_system_inodes(ocfs_super *osb);
-static int ocfs2_fill_local_node_info(ocfs_super *osb, char **group_name);
-static int ocfs2_complete_mount_recovery(ocfs_super *osb);
+static int ocfs_publish_set_unmounted(ocfs_super *osb, int node_num);
+static int ocfs_publish_set_mounted(ocfs_super *osb, int node_num);
+static int ocfs_publish_toggle_mounted(ocfs_super *osb, int node_num,
+ int value);
static int ocfs_check_volume(ocfs_super * osb);
static int ocfs_verify_volume(ocfs2_dinode *di, struct buffer_head *bh,
- u32 sectsize);
+ __u32 sectsize);
static int ocfs_initialize_osb(ocfs_super *osb, struct buffer_head *bh);
static int ocfs2_get_sector(struct super_block *sb, struct buffer_head **bh, int block, int sect_size);
static void ocfs_write_super (struct super_block * sb);
@@ -243,7 +269,7 @@
for (i = OCFS2_FIRST_ONLINE_SYSTEM_INODE;
i <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; i++) {
- new = ocfs_get_system_file_inode(osb, i, osb->slot_num);
+ new = ocfs_get_system_file_inode(osb, i, osb->node_num);
if (!new) {
ocfs_release_system_inodes(osb);
LOG_ERROR_STATUS(status = -EINVAL);
@@ -257,7 +283,7 @@
bail:
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
} /* ocfs_init_global_system_inodes */
/*
@@ -272,7 +298,7 @@
LOG_ENTRY();
for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1; i < NUM_SYSTEM_INODES ; i++) {
- new = ocfs_get_system_file_inode(osb, i, osb->slot_num);
+ new = ocfs_get_system_file_inode(osb, i, osb->node_num);
if (!new) {
ocfs_release_system_inodes(osb);
LOG_ERROR_STATUS(status = -EINVAL);
@@ -284,7 +310,7 @@
bail:
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
} /* ocfs_init_local_system_inodes */
/*
@@ -312,7 +338,7 @@
}
LOG_EXIT_STATUS(status);
- return status;
+ return(status);
} /* ocfs_release_system_inodes */
static int ocfs_fill_super(struct super_block *sb, void *data, int silent)
@@ -320,15 +346,14 @@
struct dentry *root;
int status;
struct inode *inode = NULL;
- u32 uid = current->fsuid;
- u32 gid = current->fsgid;
+ __u32 uid = current->fsuid;
+ __u32 gid = current->fsgid;
int reclaim_id;
- char *group_name = NULL;
ocfs_super *osb = NULL;
LOG_ENTRY_ARGS ("%p, %p, %i", sb, data, silent);
- if (ocfs_parse_options (data, &uid, &gid, &reclaim_id, &group_name) != 0) {
+ if (ocfs_parse_options (data, &uid, &gid, &reclaim_id) != 0) {
status = -EINVAL;
LOG_ERROR_STR ("ocfs_read_super: bad mount option");
goto read_super_error;
@@ -341,7 +366,7 @@
/* this is needed to support O_LARGE_FILE */
sb->s_maxbytes = OCFS_LINUX_MAX_FILE_SIZE;
- status = ocfs_mount_volume (sb, reclaim_id, &group_name, NULL);
+ status = ocfs_mount_volume (sb, reclaim_id, NULL);
if (status < 0)
goto read_super_error;
@@ -369,23 +394,11 @@
sb->s_root = root;
- printk ("ocfs2: Mounting device (%u,%u) on %s (node %d, slot %d)\n",
+ printk ("ocfs2: Mounting device (%u,%u) on %s (node %d)\n",
MAJOR(sb->s_dev), MINOR(sb->s_dev),
- OcfsGlobalCtxt.node_name, osb->node_num, osb->slot_num);
+ osb->node_cfg_info[osb->node_num]->node_name, osb->node_num);
atomic_set(&osb->vol_state, VOLUME_MOUNTED);
-
- if (osb->dirty) {
- /* This must happen *after* setting the volume to
- * MOUNTED as we may sleep on any recovery threads. */
- status = ocfs2_complete_mount_recovery(osb);
- if (status < 0)
- LOG_EXIT_STATUS(status);
- }
-
- if (group_name)
- kfree(group_name);
-
LOG_EXIT_STATUS(status);
return status;
@@ -398,9 +411,6 @@
if (inode)
iput (inode);
- if (group_name)
- kfree(group_name);
-
LOG_EXIT_STATUS(status);
return status;
}
@@ -437,12 +447,11 @@
*
* e.g., gid=9999,uid=9999,[no]cache,reclaimid
*/
-static int ocfs_parse_options (char *options, u32 * uid, u32 * gid, int * reclaim_id, char **group_name)
+static int ocfs_parse_options (char *options, __u32 * uid, __u32 * gid, int * reclaim_id)
{
char *c;
char *value;
int ret = 1;
- int size;
LOG_ENTRY ();
@@ -483,24 +492,6 @@
}
} else if (!strcmp (c, "reclaimid")) {
*reclaim_id = 1;
- } else if (!strcmp(c, "group")) {
- if (!value || !*value) {
- LOG_ERROR_STR
- ("group option requires an argument");
- goto bail;
- }
- LOG_TRACE_ARGS("group name passed = %s\n", value);
-
- size = strlen(value) + 1;
- *group_name = kmalloc(size, GFP_KERNEL);
- if (!(*group_name)) {
- LOG_ERROR_STATUS(-ENOMEM);
- goto bail;
- }
- memset(*group_name, 0, size);
- printk("ocfs2: group name passed = %s, size = %d\n",
- value, size);
- strcpy(*group_name, value);
} else {
LOG_ERROR_ARGS ("Invalid mount option: %s", c);
goto bail;
@@ -528,7 +519,16 @@
if (init_ocfs2_extent_maps())
return -ENOMEM;
+
+ ocfs_hostname = kmalloc(strlen(system_utsname.nodename) + 1, GFP_KERNEL);
+ if (ocfs_hostname == NULL) {
+ status = -EINVAL;
+ goto leave;
+ }
+ strcpy(ocfs_hostname, system_utsname.nodename);
+ printk("ocfs2: hostname is %s\n", ocfs_hostname);
+
ocfs_table_header = register_sysctl_table(ocfs_root_table, 0);
if (!ocfs_table_header) {
LOG_ERROR_STATUS(status = -ENOMEM);
@@ -536,28 +536,18 @@
}
memset (&OcfsGlobalCtxt, 0, sizeof (ocfs_global_ctxt));
+ memset (&OcfsIpcCtxt, 0, sizeof (ocfs_ipc_ctxt));
INIT_LIST_HEAD (&(OcfsGlobalCtxt.osb_next));
INIT_LIST_HEAD (&(OcfsGlobalCtxt.osb_next));
- /* Ok, just use utsname for now. Eventually we need to
- * get this from the node config subsystem. */
- OcfsGlobalCtxt.node_name = kmalloc(OCFS2_MAX_NODE_NAME_LENGTH,
- GFP_KERNEL);
- if (!OcfsGlobalCtxt.node_name) {
- status = -ENOMEM;
- LOG_ERROR_STATUS(status);
- goto leave;
- }
- memset(OcfsGlobalCtxt.node_name, 0, OCFS2_MAX_NODE_NAME_LENGTH);
- strncpy(OcfsGlobalCtxt.node_name, system_utsname.nodename,
- OCFS2_MAX_NODE_NAME_LENGTH - 1);
+ /* Read remaining insmod params */
+ if ((status = ocfs_read_params ()) < 0)
+ goto leave;
- printk("ocfs2: node name is %s\n", OcfsGlobalCtxt.node_name);
-
/* Initialize the global data resource */
init_MUTEX (&(OcfsGlobalCtxt.global_res));
- OcfsGlobalCtxt.flags |= OCFS_FLAG_GLBL_CTXT_RESOURCE_INITIALIZED;
+ OCFS_SET_FLAG (OcfsGlobalCtxt.flags, OCFS_FLAG_GLBL_CTXT_RESOURCE_INITIALIZED);
/* Initialize the memory slabs for oin and file entry */
status = ocfs_initialize_mem_lists ();
@@ -570,7 +560,18 @@
spin_lock (&osb_id_lock);
osb_id = 0;
spin_unlock (&osb_id_lock);
+
+ spin_lock_init (&mount_cnt_lock);
+ spin_lock (&mount_cnt_lock);
+ mount_cnt = 0;
+ spin_unlock (&mount_cnt_lock);
+
+ spin_lock_init (&OcfsGlobalCtxt.comm_seq_lock);
+ spin_lock (&OcfsGlobalCtxt.comm_seq_lock);
+ OcfsGlobalCtxt.comm_seq_num = 0;
+ spin_unlock (&OcfsGlobalCtxt.comm_seq_lock);
+ init_MUTEX (&recovery_list_sem);
/* Initialize the proc interface */
ocfs_proc_init ();
@@ -582,7 +583,8 @@
/* Delete the global context resource */
if (OcfsGlobalCtxt.flags & OCFS_FLAG_GLBL_CTXT_RESOURCE_INITIALIZED)
- OcfsGlobalCtxt.flags &= ~OCFS_FLAG_GLBL_CTXT_RESOURCE_INITIALIZED;
+ OCFS_CLEAR_FLAG (OcfsGlobalCtxt.flags,
+ OCFS_FLAG_GLBL_CTXT_RESOURCE_INITIALIZED);
if (ocfs_table_header)
unregister_sysctl_table(ocfs_table_header);
@@ -599,6 +601,79 @@
} /* ocfs_driver_entry */
/*
+ * ocfs_read_params()
+ *
+ * Read insmod params
+ */
+static int ocfs_read_params(void)
+{
+ int status = 0;
+ __u32 check_sum = 0;
+ int i;
+
+ /* Read remaining insmod params */
+ if (node_number != OCFS_INVALID_NODE_NUM) {
+ // this will be validated later
+ OcfsGlobalCtxt.pref_node_num = node_number;
+ LOG_TRACE_ARGS("Preferred node number: %d\n", node_number);
+ }
+
+ if (ip_port_v2 == 0)
+ OcfsGlobalCtxt.comm_info.ip_port =
+ htons(OCFS_IPC_DEFAULT_PORT);
+ else if (ip_port_v2 & 0xFFFF0000) {
+ status = -EINVAL;
+ LOG_ERROR_STR("'ip_port_v2' is too large'");
+ }
+ else
+ OcfsGlobalCtxt.comm_info.ip_port =
+ htons((u16)ip_port_v2);
+ LOG_TRACE_ARGS("IP port: %d\n",
+ ntohs(OcfsGlobalCtxt.comm_info.ip_port));
+
+ if (node_name && strlen(node_name) < MAX_NODE_NAME_LENGTH) {
+ OcfsGlobalCtxt.node_name = node_name;
+ LOG_TRACE_ARGS ("Node name: %s\n", OcfsGlobalCtxt.node_name);
+ } else {
+ status = -EINVAL;
+ LOG_ERROR_STR ("'node_name' not set or too long");
+ }
+
+#define MAX_IPv4_ADDR_STR_LEN 15 /* 4x '255' + 3x '.' */
+ if (ip_address && strlen (ip_address) <= MAX_IPv4_ADDR_STR_LEN) {
+ OcfsGlobalCtxt.comm_info.addr_u.ip_addr4 =
+ in_aton(ip_address);
+ LOG_TRACE_ARGS ("IP address: %s\n", ip_address);
+ } else {
+ status = -EINVAL;
+ LOG_ERROR_STR ("'ip_address' not set or too long");
+ }
+
+ if (guid && strlen (guid) == OCFS2_GUID_LEN) {
+ memcpy(&OcfsGlobalCtxt.guid.guid, guid, OCFS2_GUID_LEN);
+ LOG_TRACE_ARGS ("Node guid: %s\n", guid);
+ } else {
+ status = -EINVAL;
+ LOG_ERROR_STR ("'guid' not set correctly");
+ }
+
+ if (status == 0) {
+ for (i = 0; i < OCFS2_GUID_LEN; ++i)
+ check_sum += (__u32) guid[i];
+ if (cs != check_sum) {
+ status = -EINVAL;
+ LOG_ERROR_STR ("load module using load_ocfs2");
+ }
+ }
+
+ /* hardcoding... not used yet */
+ OcfsGlobalCtxt.comm_info.ip_version = htons(4);
+
+ return status;
+} /* ocfs_read_params */
+
+
+/*
* ocfs_driver_exit()
*
* Called on rmmod
@@ -613,6 +688,7 @@
/* Signal DLM thread to exit */
down (&(OcfsGlobalCtxt.global_res));
+ OCFS_SET_FLAG (OcfsGlobalCtxt.flags, OCFS_FLAG_SHUTDOWN_VOL_THREAD);
if (OcfsGlobalCtxt.flags & OCFS_FLAG_MEM_LISTS_INITIALIZED)
ocfs_free_mem_lists ();
@@ -626,11 +702,9 @@
exit_ocfs2_extent_maps();
- if (OcfsGlobalCtxt.node_name)
- kfree(OcfsGlobalCtxt.node_name);
-
printk("Unloaded OCFS Driver module\n");
LOG_EXIT ();
+ return;
} /* ocfs_driver_exit */
@@ -651,6 +725,7 @@
LOG_EXIT ();
LOG_CLEAR_CONTEXT();
+ return;
} /* ocfs_put_super */
@@ -662,8 +737,8 @@
static int ocfs_statfs(struct super_block *sb, struct kstatfs *buf)
{
ocfs_super *osb;
- u32 numbits, freebits;
- int status;
+ __u32 numbits, freebits;
+ int status, ret = 0;
ocfs2_dinode *bm_lock;
struct buffer_head *bh = NULL;
struct inode *inode = NULL;
@@ -679,21 +754,22 @@
-1);
if (!inode) {
LOG_ERROR_STR("failed to get bitmap inode");
- status = -EIO;
+ ret = -EIO;
goto bail;
}
- status = ocfs2_meta_lock(inode, NULL, &bh, 0);
+ status = ocfs_read_block(osb, OCFS_I(inode)->ip_blkno, &bh,
+ OCFS_BH_CACHED, inode);
if (status < 0) {
- if (status != -EINTR)
- LOG_ERROR_STATUS(status);
+ LOG_ERROR_STR("failed to read bitmap data");
+ ret = -EIO;
goto bail;
}
bm_lock = (ocfs2_dinode *) bh->b_data;
- numbits = le32_to_cpu(bm_lock->id1.bitmap1.i_total);
- freebits = numbits - le32_to_cpu(bm_lock->id1.bitmap1.i_used);
+ numbits = le32_to_cpu(bm_lock->id1.bitmap1.bryce_i_total);
+ freebits = numbits - le32_to_cpu(bm_lock->id1.bitmap1.bryce_i_used);
buf->f_type = OCFS_MAGIC;
buf->f_bsize = sb->s_blocksize;
@@ -710,16 +786,15 @@
brelse(bh);
- ocfs2_meta_unlock(inode, 0);
- status = 0;
+
bail:
if (inode)
iput (inode);
- LOG_EXIT_INT (status);
+ LOG_EXIT_INT (ret);
LOG_CLEAR_CONTEXT();
- return status;
+ return ret;
} /* ocfs_statfs */
@@ -737,7 +812,7 @@
sizeof(ocfs_journal_lock), 0, SLAB_NO_REAP | SLAB_HWCACHE_ALIGN,
NULL, NULL);
- OcfsGlobalCtxt.flags |= OCFS_FLAG_MEM_LISTS_INITIALIZED;
+ OCFS_SET_FLAG (OcfsGlobalCtxt.flags, OCFS_FLAG_MEM_LISTS_INITIALIZED);
return 0;
} /* ocfs_initialize_mem_lists */
@@ -750,7 +825,7 @@
{
kmem_cache_destroy (OcfsGlobalCtxt.inode_cache);
kmem_cache_destroy (OcfsGlobalCtxt.lock_cache);
- OcfsGlobalCtxt.flags &= ~OCFS_FLAG_MEM_LISTS_INITIALIZED;
+ OCFS_CLEAR_FLAG (OcfsGlobalCtxt.flags, OCFS_FLAG_MEM_LISTS_INITIALIZED);
} /* ocfs_free_mem_lists */
static int ocfs2_sb_probe(struct super_block *sb,
@@ -851,58 +926,18 @@
return 0;
}
-static int ocfs2_fill_local_node_info(ocfs_super *osb, char **group_name)
-{
- int status, i;
- struct inode *group = NULL;
- char *p;
- if (group_name) {
- osb->group_name = *group_name;
- *group_name = NULL;
- } else {
- osb->group_name = kmalloc(NM_MAX_NAME_LEN + 1, GFP_KERNEL);
- if (!osb->group_name) {
- status = -ENOMEM;
- LOG_ERROR_STATUS(status);
- goto bail;
- }
- memset(osb->group_name, 0, NM_MAX_NAME_LEN + 1);
- for (i = 0, p = osb->uuid; i < MAX_VOL_ID_LENGTH; i++, p += 2)
- sprintf(p, "%02X", osb->uuid[i]);
- }
-
- group = nm_get_group_by_name(osb->group_name);
- if (!group) {
- printk("ocfs2: could not join group \"%s\"\n",
- osb->group_name);
- status = -EINVAL;
- goto bail;
- }
-
- osb->group_inode = group;
- osb->node_num = nm_this_node(group);
-
- printk("ocfs2: I am node %d, a member of group %s\n", osb->node_num,
- osb->group_name);
-
- status = 0;
-bail:
-
- return status;
-}
-
/*
* ocfs_mount_volume()
*
*/
-static int ocfs_mount_volume (struct super_block *sb, int reclaim_id,
- char **group_name, struct inode *root)
+static int ocfs_mount_volume (struct super_block *sb, int reclaim_id, struct inode *root)
{
- int status, sector_size;
- int unlock_super = 0;
- ocfs_super *osb = NULL;
+ int status;
+ ocfs_super *osb;
+ int child_pid;
struct buffer_head *bh = NULL;
+ int sector_size;
LOG_ENTRY ();
@@ -913,7 +948,7 @@
goto leave;
}
- if ((osb = kmalloc (sizeof(ocfs_super), GFP_KERNEL)) == NULL) {
+ if ((osb = ocfs_malloc (sizeof (ocfs_super))) == NULL) {
LOG_ERROR_STATUS (status = -ENOMEM);
goto leave;
}
@@ -937,79 +972,97 @@
goto leave;
}
- status = ocfs2_fill_local_node_info(osb, group_name);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
+ down(&(osb->osb_res));
- status = ocfs2_register_hb_callbacks(osb);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
+ /* Launch the NM thread for the mounted volume */
+ osb->dlm_task = NULL;
+ child_pid = kernel_thread (ocfs_heartbeat_thread, osb,
+ CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
+ if (child_pid < 0) {
+ LOG_ERROR_ARGS ("unable to launch ocfs2nm thread, error=%d",
+ child_pid);
+ up (&(osb->osb_res));
+ status = child_pid;
goto leave;
+ } else {
+ init_completion (&osb->dlm_complete);
}
- status = ocfs2_dlm_init(osb);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
+ up (&(osb->osb_res));
- /* requires vote_thread to be running. */
- status = ocfs2_register_net_handlers(osb);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
+ /* Add proc entry for this volume */
+ ocfs_proc_add_volume (osb);
- status = ocfs2_super_lock(osb, 1);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
+ /* GlobalMountCount */
+ spin_lock (&mount_cnt_lock);
+ mount_cnt++;
+ if (mount_cnt == 1) {
+ OcfsIpcCtxt.dlm_msg_size = OCFS_DLM_MAX_MSG_SIZE;
+ OcfsIpcCtxt.version = OCFS_IPC_DLM_VERSION;
+ /* start the listener thread */
+ status = ocfs_init_udp_sock(&OcfsIpcCtxt.send_sock,
+ &OcfsIpcCtxt.recv_sock);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto leave;
+ }
+ OcfsIpcCtxt.task = NULL;
+ child_pid = kernel_thread (ocfs_recv_thread, NULL,
+ CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
+ if (child_pid >= 0) {
+ init_completion (&(OcfsIpcCtxt.complete));
+ } else {
+ status = child_pid;
+ LOG_ERROR_ARGS ("unable to launch ocfs2lsnr thread, error=%d", child_pid);
+ goto leave;
+ }
}
- unlock_super = 1;
+ spin_unlock (&mount_cnt_lock);
- /* This will load up the node map and add ourselves to it. */
- status = ocfs2_find_slot(osb);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
+ /* wait for nm thread to be init */
+ ocfs_wait (osb->nm_init_event, (atomic_read (&osb->nm_init) >= OCFS_HEARTBEAT_INIT ), 0);
- ocfs2_populate_mounted_map(osb);
+ down(&(osb->osb_res));
+ down (&(osb->publish_lock));
+ ocfs_nm_heart_beat (osb, HEARTBEAT_METHOD_DISK, 1);
+ up (&(osb->publish_lock));
- /* load all node-local system inodes */
- status = ocfs_init_local_system_inodes(osb);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto leave;
- }
+ ocfs_node_map_set_bit(&osb->publ_map, osb->node_num);
+ up (&(osb->osb_res));
- /* Add proc entry for this volume */
- ocfs_proc_add_volume (osb);
-
/* Read the publish sector for this node and cleanup dirent being */
/* modified when we crashed. */
LOG_TRACE_STR ("ocfs_check_volume...");
+ down(&(osb->osb_res));
status = ocfs_check_volume (osb);
if (status < 0) {
+ up(&(osb->osb_res));
LOG_ERROR_STATUS (status);
goto leave;
}
- /* This should be sent *after* we recovered our journal as it
- * will cause other nodes to unmark us as needing
- * recovery. However, we need to send it *before* dropping the
- * super block lock as otherwise their recovery threads might
- * try to clean us up while we're live! */
- status = ocfs2_request_mount_vote(osb);
- if (status < 0)
- LOG_ERROR_STATUS (status);
+ /* Launch the commit thread */
+ osb->commit = ocfs_malloc(sizeof(ocfs_commit_task));
+ if (osb->commit == NULL) {
+ LOG_ERROR_STATUS(status = -ENOMEM);
+ up (&(osb->osb_res));
+ goto leave;
+ }
+ memset(osb->commit, 0, sizeof(ocfs_commit_task));
+ child_pid = kernel_thread (ocfs_commit_thread, osb,
+ CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
+ if (child_pid < 0) {
+ LOG_ERROR_ARGS ("unable to launch ocfs2commit thread, error=%d",
+ child_pid);
+ up (&(osb->osb_res));
+ status = child_pid;
+ goto leave;
+ } else {
+ init_completion (&osb->commit->c_complete);
+ }
+ up (&(osb->osb_res));
leave:
- if (unlock_super)
- ocfs2_super_unlock(osb, 1);
-
if (bh != NULL)
brelse(bh);
LOG_EXIT_STATUS (status);
@@ -1021,67 +1074,232 @@
* ocfs_dismount_volume()
*
*/
-static void ocfs_dismount_volume (struct super_block *sb)
+static int ocfs_dismount_volume (struct super_block *sb)
{
- int tmp;
+ int status;
+ int AcquiredOSB = 0;
ocfs_super *osb = NULL;
+ int i;
LOG_ENTRY_ARGS ("(0x%p)\n", sb);
- OCFS_ASSERT(sb);
+ if (sb == NULL) {
+ LOG_ERROR_STATUS (status = -EINVAL);
+ goto leave;
+ }
+
osb = OCFS_SB(sb);
- OCFS_ASSERT(osb);
- ocfs_shutdown_local_alloc(osb);
+ if (osb == NULL) {
+ LOG_ERROR_STATUS (status = -EINVAL);
+ goto leave;
+ }
/* disable any new recovery threads and wait for any currently
* running ones to exit. Do this before setting the vol_state. */
down(&osb->recovery_lock);
osb->disable_recovery = 1;
- while (osb->recovery_launched) {
- up(&osb->recovery_lock);
+ up(&osb->recovery_lock);
+ while (atomic_read(&osb->num_recovery_threads)) {
LOG_TRACE_STR("Waiting on a recovery thread to complete.");
schedule();
- down(&osb->recovery_lock);
}
- up(&osb->recovery_lock);
+ down(&(osb->osb_res));
+ AcquiredOSB = 1;
+
+ ocfs_shutdown_local_alloc(osb);
ocfs_journal_shutdown(osb);
+ /* unset the mounted flag -- we're done with the journal and
+ * the local alloc bitmap */
+ status = ocfs_publish_set_unmounted(osb, osb->node_num);
+ if (status < 0)
+ LOG_ERROR_STR("Could not set mounted flag!");
+
ocfs_sync_blockdev(sb);
/* Remove the proc element for this volume */
ocfs_proc_remove_volume (osb);
- tmp = ocfs2_super_lock(osb, 1);
- if (tmp < 0) {
- LOG_ERROR_STATUS(tmp);
- return;
+ /* Dismount */
+ OCFS_SET_FLAG (osb->osb_flags, OCFS_OSB_FLAGS_BEING_DISMOUNTED);
+
+ /* Wait for this volume's NM thread to exit */
+ if (osb->dlm_task) {
+ LOG_TRACE_STR ("Waiting for ocfs2nm to exit....");
+ send_sig (SIGINT, osb->dlm_task, 0);
+ wait_for_completion (&(osb->dlm_complete));
+ osb->dlm_task = NULL;
}
- tmp = ocfs2_request_umount_vote(osb);
- if (tmp < 0)
- LOG_ERROR_STATUS(tmp);
+ /* send dismount msg to all */
+ status = ocfs_send_dismount_msg (osb);
+ if (status < 0)
+ LOG_ERROR_STATUS (status);
- ocfs2_put_slot(osb);
+ /* decrement mount count */
+ spin_lock (&mount_cnt_lock);
+ mount_cnt--;
+ if (mount_cnt == 0) {
+ /* Shutdown ocfslsnr */
+ if (OcfsIpcCtxt.task) {
+ LOG_TRACE_STR ("Waiting for ocfs2lsnr to exit....");
+ send_sig (SIGINT, OcfsIpcCtxt.task, 0);
+ wait_for_completion (&(OcfsIpcCtxt.complete));
+ OcfsIpcCtxt.task = NULL;
+ }
+ }
+ spin_unlock (&mount_cnt_lock);
- ocfs2_dlm_shutdown(osb);
-
- ocfs2_clear_hb_callbacks(osb);
-
atomic_set(&osb->vol_state, VOLUME_DISMOUNTED);
+ if (AcquiredOSB) {
+ up (&(osb->osb_res));
+ AcquiredOSB = 0;
+ }
printk ("ocfs2: Unmounting device (%u,%u) on %s (node %d)\n",
MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev),
- OcfsGlobalCtxt.node_name, osb->node_num);
+ osb->node_cfg_info[osb->node_num]->node_name, osb->node_num);
+ /* Free all nodecfgs */
+ for (i = 0; i < osb->max_nodes; ++i) {
+ BARF_BARF_BARF *p;
+
+ p = osb->node_cfg_info[i];
+ if (p)
+ kfree(p);
+ }
+
ocfs_release_system_inodes(osb);
ocfs_delete_osb (osb);
kfree(osb);
sb->s_dev = 0;
+
+leave:
+ if (AcquiredOSB) {
+ up (&(osb->osb_res));
+ AcquiredOSB = 0;
+ }
+
+ LOG_EXIT_STATUS (status);
+ return status;
} /* ocfs_dismount_volume */
+
+/* true if mounted, false otherwise */
+int ocfs_publish_get_mount_state(ocfs_super *osb, int node_num)
+{
+ int status;
+ ocfs_publish *publish;
+ struct buffer_head *publish_bh = NULL;
+ int retval = 0;
+ int flags = 0;
+
+ LOG_ENTRY();
+
+ /* read it in */
+ /* we may be called during mount in which case our publish
+ * sector might be dirty. */
+ if (node_num == osb->node_num)
+ flags = OCFS_BH_CACHED;
+ status = ocfs_read_block(osb, (osb->publish_blkno + node_num),
+ &publish_bh, flags, NULL);
+ if (status < 0) {
+ brelse(publish_bh);
+ LOG_ERROR_STR("Could not read publish sector, mounted value"
+ " may be incorrect!");
+ LOG_ERROR_STATUS (status);
+ goto done;
+ }
+ publish = (ocfs_publish *) publish_bh->b_data;
+
+ retval = publish->mounted;
+
+ brelse(publish_bh);
+done:
+ LOG_EXIT_STATUS(retval);
+ return(retval);
+}
+
+static int ocfs_publish_toggle_mounted(ocfs_super *osb, int node_num, int value)
+{
+ int status;
+ ocfs_publish *publish;
+ struct buffer_head * publish_bh = NULL;
+
+ LOG_ENTRY_ARGS("(node_num=%d, value=%d)\n", node_num, value);
+
+ /* read it in */
+ status = ocfs_read_block(osb, (osb->publish_blkno + node_num),
+ &publish_bh, 0, NULL);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto done;
+ }
+ publish = (ocfs_publish *) publish_bh->b_data;
+
+ /* change it */
+ publish->mounted = value;
+
+ /* write it back out */
+ status = ocfs_write_block(osb, publish_bh, NULL);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto done;
+ }
+
+done:
+ if (publish_bh)
+ brelse(publish_bh);
+
+ LOG_EXIT_STATUS(status);
+
+ return(status);
+}
+
+/* set the 'mounted' bit in the publish sector */
+static int ocfs_publish_set_mounted(ocfs_super *osb, int node_num)
+{
+ int retval;
+
+ LOG_ENTRY_ARGS("(node_num=%d)\n", node_num);
+
+ down (&(osb->publish_lock));
+
+ retval = ocfs_publish_toggle_mounted(osb, node_num, 1);
+ if (osb->node_num == node_num)
+ osb->check_mounted = 1;
+
+ up (&(osb->publish_lock));
+
+ LOG_EXIT_STATUS(retval);
+
+ return(retval);
+}
+
+/* unset the 'mounted' bit in the publish sector */
+static int ocfs_publish_set_unmounted(ocfs_super *osb, int node_num)
+{
+ int retval;
+
+ LOG_ENTRY_ARGS("(node_num=%d)\n", node_num);
+
+ down (&(osb->publish_lock));
+
+ if (osb->node_num == node_num)
+ osb->check_mounted = 0;
+
+ retval = ocfs_publish_toggle_mounted(osb, node_num, 0);
+
+ up (&(osb->publish_lock));
+
+ LOG_EXIT_STATUS(retval);
+
+ return(retval);
+}
+
/*
* ocfs_initialize_osb()
*
@@ -1089,10 +1307,14 @@
static int ocfs_initialize_osb(ocfs_super *osb, struct buffer_head *bh)
{
int status = 0;
+ ocfs_publish *publish = NULL;
+ u64 p_blkno;
+ struct buffer_head *publish_bh = NULL; /* our own publish sector */
+ struct buffer_head **publish_bhs = NULL; /* all the publish sectors */
+ struct buffer_head *bitmap_bh = NULL;
int i;
ocfs2_dinode *di = NULL;
struct inode *inode = NULL;
- struct buffer_head *bitmap_bh = NULL;
LOG_ENTRY ();
@@ -1101,53 +1323,47 @@
if (!osb->vol_label) {
LOG_ERROR_STR("unable to alloc vol label");
status = -ENOMEM;
- goto bail;
+ goto done_nojournal;
}
osb->uuid = kmalloc(MAX_VOL_ID_LENGTH, GFP_KERNEL);
if (!osb->uuid) {
LOG_ERROR_STR("unable to alloc uuid");
status = -ENOMEM;
- goto bail;
+ goto done_nojournal;
}
+
/* this needs to be done before most other initializations */
di = (ocfs2_dinode *) bh->b_data;
- osb->max_nodes = le32_to_cpu(di->id2.i_super.s_max_nodes);
+ osb->max_nodes = le16_to_cpu(di->id2.i_super.bryce_s_max_nodes);
if (osb->max_nodes > OCFS2_MAX_NODES) {
LOG_ERROR_ARGS("Invalid number of nodes (%u)\n",
osb->max_nodes);
status = -EINVAL;
- goto bail;
+ goto done_nojournal;
}
printk("max_nodes for this device: %u\n", osb->max_nodes);
osb->s_feature_compat =
- le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat);
+ le32_to_cpu(OCFS2_RAW_SB(di)->bryce_s_feature_compat);
osb->s_feature_ro_compat =
- le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_ro_compat);
+ le32_to_cpu(OCFS2_RAW_SB(di)->bryce_s_feature_ro_compat);
osb->s_feature_incompat =
- le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_incompat);
+ le32_to_cpu(OCFS2_RAW_SB(di)->bryce_s_feature_incompat);
if ((i = OCFS2_HAS_INCOMPAT_FEATURE(osb->sb, ~OCFS2_FEATURE_INCOMPAT_SUPP))) {
LOG_ERROR_ARGS("couldn't mount because of unsupported "
"optional features (%x).\n", i);
- goto bail;
+ goto done_nojournal;
}
if (!(osb->sb->s_flags & MS_RDONLY) &&
(i = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, ~OCFS2_FEATURE_RO_COMPAT_SUPP))) {
LOG_ERROR_ARGS("couldn't mount RDWR because of "
"unsupported optional features (%x).\n",
i);
- goto bail;
+ goto done_nojournal;
}
- init_waitqueue_head(&osb->recovery_event);
- atomic_set(&osb->wake_vote_task, 0);
- spin_lock_init(&osb->vote_task_lock);
- init_waitqueue_head(&osb->vote_event);
- INIT_LIST_HEAD(&osb->blocked_lock_list);
- osb->blocked_lock_count = 0;
- INIT_LIST_HEAD(&osb->vote_list);
spin_lock_init(&osb->s_next_gen_lock);
get_random_bytes(&osb->s_next_generation, sizeof(u32));
@@ -1164,42 +1380,116 @@
if (!osb->journal) {
LOG_ERROR_STR("unable to alloc journal");
status = -ENOMEM;
- goto bail;
+ goto done_nojournal;
}
memset(osb->journal, 0, sizeof(ocfs_journal));
- ocfs2_init_node_maps(osb);
+ publish_bhs = kmalloc(sizeof(struct buffer_head *) * osb->max_nodes, GFP_KERNEL);
+ if (publish_bhs == NULL) {
+ LOG_ERROR_STATUS(status = -ENOMEM);
+ goto finally;
+ }
+ memset(publish_bhs, 0, sizeof(struct buffer_head *) * osb->max_nodes);
+ osb->vol_node_map = kmalloc(sizeof(ocfs_vol_node_map) * osb->max_nodes, GFP_KERNEL);
+ if (!osb->vol_node_map) {
+ LOG_ERROR_STATUS(status = -ENOMEM);
+ goto bail;
+ }
+ memset(osb->vol_node_map, 0, sizeof(ocfs_vol_node_map) * osb->max_nodes);
+
+ osb->lock_recovery_lists = kmalloc(sizeof(struct list_head) * osb->max_nodes, GFP_KERNEL);
+ if (!osb->lock_recovery_lists) {
+ LOG_ERROR_STATUS(status = -ENOMEM);
+ goto bail;
+ }
+ memset(osb->lock_recovery_lists, 0, sizeof(struct list_head) * osb->max_nodes);
+
+ osb->last_publ_seq_num = kmalloc(sizeof(__u64) * osb->max_nodes, GFP_KERNEL);
+ if (!osb->last_publ_seq_num) {
+ LOG_ERROR_STATUS(status = -ENOMEM);
+ goto bail;
+ }
+ memset(osb->last_publ_seq_num, 0, sizeof(__u64) * osb->max_nodes);
+
+ osb->node_cfg_info = kmalloc(sizeof(BARF_BARF_BARF *) * osb->max_nodes, GFP_KERNEL);
+ if (!osb->node_cfg_info) {
+ LOG_ERROR_STATUS(status = -ENOMEM);
+ goto bail;
+ }
+ memset(osb->node_cfg_info, 0, sizeof(BARF_BARF_BARF *) * osb->max_nodes);
+
+ ocfs_node_map_init(osb, &osb->publ_map);
+
+
+ OCFS_CLEAR_FLAG (osb->osb_flags, OCFS_OSB_FLAGS_SHUTDOWN);
+
INIT_LIST_HEAD (&(osb->osb_next));
snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
+ init_MUTEX (&(osb->osb_res));
init_MUTEX (&(osb->recovery_lock));
+ init_MUTEX (&(osb->orphan_recovery_lock));
+ init_MUTEX (&(osb->comm_lock));
+ init_MUTEX (&(osb->cfg_lock));
+ init_MUTEX (&(osb->vote_sem));
+ spin_lock_init(&osb->recovery_map_lock);
+ ocfs_node_map_init(osb, &osb->recovery_map);
+
+ osb->needs_flush = 0;
osb->disable_recovery = 0;
- osb->recovery_launched = 0;
- init_waitqueue_head (&osb->checkpoint_event);
- atomic_set (&osb->needs_checkpoint, 0);
+ init_MUTEX (&(osb->publish_lock));
+ atomic_set (&osb->node_req_vote, 0);
+
+ atomic_set (&osb->num_recovery_threads, 0);
+
+ init_waitqueue_head (&osb->nm_init_event);
+ atomic_set (&osb->nm_init, 0);
+
+ osb->publish_dirty = 0;
+ init_waitqueue_head (&osb->flush_event);
+ atomic_set (&osb->flush_event_woken, 0);
atomic_set (&osb->clean_buffer_seq, 1);
spin_lock_init (&osb->clean_buffer_lock);
+ spin_lock_init (&osb->vote_obj_queue_lock);
+ INIT_LIST_HEAD (&(osb->vote_obj_queue));
+ for (i=0; i<osb->max_nodes; i++) {
+ INIT_LIST_HEAD(&(osb->lock_recovery_lists[i]));
+ }
osb->node_num = OCFS_INVALID_NODE_NUM;
- osb->slot_num = OCFS_INVALID_NODE_NUM;
osb->have_local_alloc = 0;
osb->local_alloc_bh = NULL;
+ init_waitqueue_head (&osb->open_event);
+ /* not using any of these sb fields yet */
+#if 0
+di->i_ctime = cpu_to_le64(format_time); // use this as s_wtime (write time)
+di->i_mtime = cpu_to_le64(format_time); // use this as s_mtime (mount time)
+di->i_blkno = cpu_to_le64(super_off >> blocksize_bits);
+di->id2.i_super.s_mnt_count = 0;
+di->id2.i_super.s_max_mnt_count = cpu_to_le16(OCFS2_DFL_MAX_MNT_COUNT);
+di->id2.i_super.s_state = 0;
+di->id2.i_super.s_errors = 0;
+di->id2.i_super.s_lastcheck = cpu_to_le64(format_time);
+di->id2.i_super.s_checkinterval = cpu_to_le32(OCFS2_DFL_CHECKINTERVAL);
+di->id2.i_super.s_creator_os = cpu_to_le32(OCFS2_OS_LINUX);
+#endif
+
di = (ocfs2_dinode *) bh->b_data;
/* get some pseudo constants for clustersize bits */
- osb->s_clustersize_bits = le32_to_cpu(di->id2.i_super.s_clustersize_bits);
+ osb->s_clustersize_bits = le32_to_cpu(di->id2.i_super.bryce_s_clustersize_bits);
osb->s_clustersize = 1 << osb->s_clustersize_bits;
printk("clusterbits=%d\n", osb->s_clustersize_bits);
OCFS_ASSERT(osb->s_clustersize_bits);
- if (ocfs2_clusters_to_blocks(osb->sb, di->i_clusters - 1)
+ if (ocfs2_clusters_to_blocks(osb->sb, le32_to_cpu(di->bryce_i_clusters) - 1)
> (u32)~0UL) {
LOG_ERROR_ARGS("Volume might try to write to blocks beyond "
"what jbd can address in 32 bits.\n");
@@ -1210,10 +1500,9 @@
strncpy(osb->vol_label, di->id2.i_super.s_label, 63);
osb->vol_label[63] = '\0';
memcpy(osb->uuid, di->id2.i_super.s_uuid, MAX_VOL_ID_LENGTH);
- osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno);
- osb->system_dir_blkno = le64_to_cpu(di->id2.i_super.s_system_dir_blkno);
- osb->first_cluster_group_blkno = le64_to_cpu(di->id2.i_super.s_first_cluster_group);
- osb->fs_generation = le32_to_cpu(di->i_fs_generation);
+ osb->root_blkno = le64_to_cpu(di->id2.i_super.bryce_s_root_blkno);
+ osb->system_dir_blkno = le64_to_cpu(di->id2.i_super.bryce_s_system_dir_blkno);
+ osb->first_cluster_group_blkno = le64_to_cpu(di->id2.i_super.bryce_s_first_cluster_group);
printk("vol_label: %s\n", osb->vol_label);
{
int ttt;
@@ -1234,24 +1523,72 @@
}
/*
- * an inode for the heartbeat area
+ * an inode for the whole DLM area
+ * autoconfig, new autoconfig, publish, vote
*/
- inode = ocfs_get_system_file_inode(osb, HEARTBEAT_SYSTEM_INODE, -1);
+ inode = ocfs_get_system_file_inode(osb, DLM_SYSTEM_INODE, -1);
if (!inode) {
LOG_ERROR_STATUS(status = -EINVAL);
goto bail;
}
- if (inode->i_size >> osb->sb->s_blocksize_bits < OCFS2_MAX_NODES) {
- LOG_ERROR_ARGS("heartbeat area size incorrect: "
+ status = ocfs2_extent_map_get_blocks(inode, 0ULL, 1, &p_blkno,
+ NULL);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ // i_size must be at least
+ // (2 + osb->max_nodes + 4) + osb->max_nodes + osb->max_nodes
+ if (inode->i_size >> osb->sb->s_blocksize_bits <
+ (OCFS_VOLCFG_HDR_SECTORS + osb->max_nodes) + // autoconfig
+ OCFS_VOLCFG_NEWCFG_SECTORS + // new autoconfig
+ osb->max_nodes + // publish
+ osb->max_nodes ) { // vote
+ LOG_ERROR_ARGS("dlm area size incorrect: "
"found=%llu, need=%u\n",
inode->i_size,
- OCFS2_MAX_NODES << osb->sb->s_blocksize_bits);
+ (OCFS_VOLCFG_HDR_SECTORS +
+ OCFS_VOLCFG_NEWCFG_SECTORS +
+ (osb->max_nodes*3)) <<
+ osb->sb->s_blocksize_bits);
status = -EINVAL;
goto bail;
}
+ osb->autoconfig_blkno = p_blkno;
+ osb->autoconfig_blocks = OCFS_VOLCFG_HDR_SECTORS + osb->max_nodes;
+
+ osb->new_autoconfig_blkno = osb->autoconfig_blkno + osb->autoconfig_blocks;
+ osb->new_autoconfig_blocks = OCFS_VOLCFG_NEWCFG_SECTORS;
+ osb->total_autoconfig_blocks = OCFS_VOLCFG_NEWCFG_SECTORS + osb->max_nodes;
+
+ osb->publish_blkno = osb->new_autoconfig_blkno + osb->new_autoconfig_blocks;
+ osb->publish_blocks = osb->max_nodes;
+
+ osb->vote_blkno = osb->publish_blkno + osb->publish_blocks;
+ osb->vote_blocks = osb->max_nodes;
+
+ printk("autoconfig: blkno=%llu, blocks=%u newblkno=%llu newblocks=%u\n",
+ osb->autoconfig_blkno, osb->autoconfig_blocks,
+ osb->new_autoconfig_blkno, osb->new_autoconfig_blocks);
+ printk("publish: blkno=%llu, blocks=%u\n", osb->publish_blkno,
+ osb->publish_blocks);
+ printk("vote: blkno=%llu, blocks=%u\n", osb->vote_blkno, osb->vote_blocks);
+
+ osb->autoconfig_bhs = ocfs_malloc (osb->total_autoconfig_blocks
+ * sizeof(struct buffer_head *));
+ if (!osb->autoconfig_bhs) {
+ LOG_ERROR_STATUS (status = -ENOMEM);
+ goto bail;
+ }
+ memset(osb->autoconfig_bhs, 0,
+ osb->total_autoconfig_blocks * sizeof(struct buffer_head *));
+
iput(inode);
+
+
/*
* global bitmap
@@ -1261,7 +1598,7 @@
LOG_ERROR_STATUS(status = -EINVAL);
goto bail;
}
-
+
osb->bitmap_blkno = OCFS_I(inode)->ip_blkno;
status = ocfs_read_block(osb, osb->bitmap_blkno, &bitmap_bh, 0, inode);
@@ -1270,26 +1607,82 @@
LOG_ERROR_STATUS(status);
goto bail;
}
-
di = (ocfs2_dinode *) bitmap_bh->b_data;
- osb->bitmap_cpg = di->id2.i_chain.cl_cpg;
- osb->num_clusters = le32_to_cpu(di->id1.bitmap1.i_total);
+ osb->bitmap_cpg = le16_to_cpu(di->id2.i_chain.bryce_cl_cpg);
+ osb->num_clusters = le32_to_cpu(di->id1.bitmap1.bryce_i_total);
brelse(bitmap_bh);
printk("cluster bitmap inode: %llu, clusters per group: %u\n",
osb->bitmap_blkno, osb->bitmap_cpg);
- status = ocfs2_init_slot_info(osb);
+ osb->prealloc_lock = 0;
+
+
+ status = ocfs_get_config (osb);
if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto finally;
+ }
+
+ /* Read the Publish Sector of local Node */
+ status = ocfs_read_block(osb, (osb->publish_blkno + osb->node_num),
+ &publish_bh, 0, NULL);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto finally;
+ }
+ publish = (ocfs_publish *) publish_bh->b_data;
+
+ /*
+ * FIXME: This really ought to be something exported by the
+ * identical code in heartbeat.c
+ */
+ publish->time = jiffies;
+ /* Disallow 0 */
+ if (!publish->time)
+ publish->time = 1;
+
+ publish = NULL;
+
+ status = ocfs_write_block(osb, publish_bh, NULL);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto finally;
+ }
+ brelse(publish_bh);
+
+ /* Read disk for all Publish Sectors */
+ status = ocfs_read_blocks(osb, osb->publish_blkno, osb->max_nodes,
+ publish_bhs, 0, NULL);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto finally;
+ }
+
+ ocfs_update_publish_map(osb, publish_bhs, 1);
+
+ for(i = 0; i < osb->max_nodes; i++)
+ osb->last_publ_seq_num[i] = (__u64) (-1);
+
+
+ /* load all node-local system inodes */
+ status = ocfs_init_local_system_inodes(osb);
+ if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
}
+ /* We might need to add a variable in Global List of osb to */
+ /* delay any creation, if any other node is already creating a file */
+
/* Link this osb onto the global linked list of all osb structures. */
/* The Global Link List is mainted for the whole driver . */
down (&(OcfsGlobalCtxt.global_res));
list_add_tail (&(osb->osb_next), &(OcfsGlobalCtxt.osb_next));
up (&(OcfsGlobalCtxt.global_res));
+ /* Mark the fact that this osb structure is initialized. */
+ OCFS_SET_FLAG (osb->osb_flags, OCFS_OSB_FLAGS_INITIALIZED);
+
spin_lock (&osb_id_lock);
osb->osb_id = osb_id;
if (osb_id < OCFS_MAX_OSB_ID)
@@ -1302,7 +1695,35 @@
}
spin_unlock (&osb_id_lock);
+
+ /* skip the frees which happen on error only */
+ goto finally;
+
bail:
+ if (osb->autoconfig_bhs)
+ kfree(osb->autoconfig_bhs);
+ if (osb->vol_node_map)
+ kfree(osb->vol_node_map);
+ if (osb->lock_recovery_lists)
+ kfree(osb->lock_recovery_lists);
+ if (osb->last_publ_seq_num)
+ kfree(osb->last_publ_seq_num);
+ if (osb->node_cfg_info)
+ kfree(osb->node_cfg_info);
+finally:
+ if (publish) {
+ if (publish_bh) {
+ brelse(publish_bh);
+ }
+ }
+ if (publish_bhs[0]) {
+ int i;
+ for(i = 0; i < osb->max_nodes; i++)
+ if (publish_bhs[i])
+ brelse(publish_bhs[i]);
+ }
+
+done_nojournal:
LOG_EXIT_STATUS (status);
return status;
} /* ocfs_initialize_osb */
@@ -1314,7 +1735,7 @@
* 0 on success
*/
static int ocfs_verify_volume(ocfs2_dinode *di, struct buffer_head *bh,
- u32 blksz)
+ __u32 blksz)
{
int status = -EAGAIN;
@@ -1323,38 +1744,39 @@
if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE,
strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) {
status = -EINVAL;
- if ((1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits)) != blksz) {
+ if ((1 << le32_to_cpu(di->id2.i_super.bryce_s_blocksize_bits)) != blksz) {
LOG_ERROR_ARGS("found superblock with incorrect block size: "
"found %u, should be %u\n",
- 1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits),
+ 1 << le32_to_cpu(di->id2.i_super.bryce_s_blocksize_bits),
blksz);
- } else if (le16_to_cpu(di->id2.i_super.s_major_rev_level) !=
+ } else if (le16_to_cpu(di->id2.i_super.bryce_s_major_rev_level) !=
OCFS2_MAJOR_REV_LEVEL ||
- le16_to_cpu(di->id2.i_super.s_minor_rev_level) !=
+ le16_to_cpu(di->id2.i_super.bryce_s_minor_rev_level) !=
OCFS2_MINOR_REV_LEVEL) {
+#warning dont know what is appropriate on minor rev difference
LOG_ERROR_ARGS("found superblock with bad version: "
"found %u.%u, should be %u.%u\n",
- le16_to_cpu(di->id2.i_super.s_major_rev_level),
- le16_to_cpu(di->id2.i_super.s_minor_rev_level),
+ le16_to_cpu(di->id2.i_super.bryce_s_major_rev_level),
+ le16_to_cpu(di->id2.i_super.bryce_s_minor_rev_level),
OCFS2_MAJOR_REV_LEVEL,
OCFS2_MINOR_REV_LEVEL);
- } else if (bh->b_blocknr != di->i_blkno) {
+ } else if (bh->b_blocknr != le64_to_cpu(di->bryce_i_blkno)) {
LOG_ERROR_ARGS("bad block number on superblock: "
"found %llu, should be %llu\n",
- di->i_blkno,
+ le64_to_cpu(di->bryce_i_blkno),
(unsigned long long)bh->b_blocknr);
- } else if (le32_to_cpu(di->id2.i_super.s_clustersize_bits) < 12 ||
- le32_to_cpu(di->id2.i_super.s_clustersize_bits) > 20) {
+ } else if (le32_to_cpu(di->id2.i_super.bryce_s_clustersize_bits) < 12 ||
+ le32_to_cpu(di->id2.i_super.bryce_s_clustersize_bits) > 20) {
LOG_ERROR_ARGS("bad cluster size found: %u\n",
- 1 << le32_to_cpu(di->id2.i_super.s_clustersize_bits));
- } else if (!le64_to_cpu(di->id2.i_super.s_root_blkno)) {
+ 1 << le32_to_cpu(di->id2.i_super.bryce_s_clustersize_bits));
+ } else if (!le64_to_cpu(di->id2.i_super.bryce_s_root_blkno)) {
LOG_ERROR_STR("bad root_blkno: 0");
- } else if (!le64_to_cpu(di->id2.i_super.s_system_dir_blkno)) {
+ } else if (!le64_to_cpu(di->id2.i_super.bryce_s_system_dir_blkno)) {
LOG_ERROR_STR("bad system_dir_blkno: 0");
- } else if (le32_to_cpu(di->id2.i_super.s_max_nodes) > OCFS2_MAX_NODES) {
+ } else if (le16_to_cpu(di->id2.i_super.bryce_s_max_nodes) > OCFS2_MAX_NODES) {
LOG_ERROR_ARGS("max_nodes found greater than maximum: "
"found %u, max %u\n",
- le32_to_cpu(di->id2.i_super.s_max_nodes),
+ le16_to_cpu(di->id2.i_super.bryce_s_max_nodes),
OCFS2_MAX_NODES);
} else {
/* found it! */
@@ -1366,35 +1788,6 @@
return status;
} /* ocfs_verify_volume */
-/* This part of local node recovery needs to happen after we've
- * discovered all other nodes that need recovery and we've recovered
- * them. */
-static int ocfs2_complete_mount_recovery(ocfs_super *osb)
-{
- int status = 0;
- ocfs2_dinode *local_alloc = osb->local_alloc_copy;
-
- osb->local_alloc_copy = NULL;
-
- if (osb->dirty) {
- status = ocfs_complete_local_alloc_recovery(osb, local_alloc);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto finally;
- }
-
- status = ocfs_recover_orphans(osb);
- if (status < 0)
- LOG_ERROR_STATUS(status);
- }
- osb->dirty = 0;
-
-finally:
- if (local_alloc)
- kfree(local_alloc);
- return status;
-}
-
/*
* ocfs_check_volume()
*
@@ -1402,15 +1795,32 @@
static int ocfs_check_volume (ocfs_super * osb)
{
int status = 0;
- int dirty;
+ ocfs_publish *publish = NULL;
+ int node_num = osb->node_num;
+ struct buffer_head * publish_bh = NULL;
+ int mounted;
ocfs2_dinode *local_alloc = NULL; /* only used if we
* recover
* ourselves. */
LOG_ENTRY ();
+ /* Read the node's publish sector */
+ status = ocfs_read_block(osb, (osb->publish_blkno + osb->node_num),
+ &publish_bh, 0, NULL);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto finally;
+ }
+
+ publish = (ocfs_publish *) publish_bh->b_data;
+ /* we copy this out of the publish sector and then unlock
+ * the bh as other functions will need to modify it. */
+ mounted = publish->mounted;
+ publish = NULL;
+
/* Init our journal object. */
- status = ocfs_journal_init(osb, &dirty);
+ status = ocfs_journal_init(osb);
if (status < 0) {
LOG_ERROR_STR("Could not initialize journal!");
goto finally;
@@ -1419,7 +1829,7 @@
/* If the journal was unmounted cleanly then we don't want to
* recover anything. Otherwise, journal_load will do that
* dirty work for us :) */
- if (!dirty) {
+ if (!mounted) {
status = ocfs_journal_wipe(osb->journal, 0);
if (status < 0) {
LOG_ERROR_STATUS(status);
@@ -1433,10 +1843,10 @@
/* will play back anything left in the journal. */
ocfs_journal_load(osb->journal);
- if (dirty) {
+ if (mounted) {
/* recover my local alloc if we didn't unmount cleanly. */
status = ocfs_begin_local_alloc_recovery(osb,
- osb->slot_num,
+ node_num,
&local_alloc);
if (status < 0) {
LOG_ERROR_STATUS(status);
@@ -1446,33 +1856,36 @@
* ourselves as mounted. */
}
+ /* 'mounted' flag in publish sector should not be set until
+ * after we successfully load the journal. */
+ status = ocfs_publish_set_mounted(osb, osb->node_num);
+ if (status < 0)
+ LOG_ERROR_STR("Could not set mounted flag!");
LOG_TRACE_STR("Journal loaded.");
status = ocfs_load_local_alloc(osb);
- if (status < 0) {
+ if (status < 0)
LOG_ERROR_STATUS(status);
- goto finally;
- }
- if (dirty) {
- /* Recovery will be completed after we've mounted the
- * rest of the volume. */
- osb->dirty = 1;
- osb->local_alloc_copy = local_alloc;
- local_alloc = NULL;
+ if (mounted) {
+ status = ocfs_complete_local_alloc_recovery(osb, local_alloc);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto finally;
+ }
+
+ status = ocfs_recover_orphans(osb);
+ if (status < 0)
+ LOG_ERROR_STATUS(status);
}
- /* go through each journal, trylock it and if you get the
- * lock, and it's marked as dirty, set the bit in the recover
- * map and launch a recovery thread for it. */
- status = ocfs2_mark_dead_nodes(osb);
- if (status < 0)
- LOG_ERROR_STATUS(status);
-
finally:
if (local_alloc)
kfree(local_alloc);
+ if (publish_bh)
+ brelse(publish_bh);
+
LOG_EXIT_STATUS (status);
return status;
} /* ocfs_check_volume */
@@ -1488,6 +1901,7 @@
*/
static void ocfs_delete_osb (ocfs_super * osb)
{
+ int i;
LOG_ENTRY ();
/* This function assumes that the caller has the main osb resource */
@@ -1499,25 +1913,35 @@
list_del (&(osb->osb_next));
up (&(OcfsGlobalCtxt.global_res));
- if (osb->slot_info)
- ocfs2_free_slot_info(osb->slot_info);
+ for (i=0; i<osb->max_nodes; i++)
+ ocfs_recover_oin_locks(osb, i);
- if (osb->group_inode)
- iput(osb->group_inode);
+ for(i = 0; i < osb->total_autoconfig_blocks; i++)
+ if (osb->autoconfig_bhs[i])
+ brelse(osb->autoconfig_bhs[i]);
+ if (osb->autoconfig_bhs)
+ kfree(osb->autoconfig_bhs);
+ if (osb->vol_node_map)
+ kfree(osb->vol_node_map);
+ if (osb->lock_recovery_lists)
+ kfree(osb->lock_recovery_lists);
+ if (osb->last_publ_seq_num)
+ kfree(osb->last_publ_seq_num);
+ if (osb->node_cfg_info)
+ kfree(osb->node_cfg_info);
+
/* FIXME
* This belongs in journal shutdown, but because we have to
* allocate osb->journal at the start of ocfs_initalize_osb(),
* we free it here.
*/
kfree(osb->journal);
- if (osb->group_name)
- kfree(osb->group_name);
- if (osb->local_alloc_copy)
- kfree(osb->local_alloc_copy);
+
memset (osb, 0, sizeof (ocfs_super));
LOG_EXIT ();
+ return;
} /* ocfs_delete_osb */
Modified: branches/endian/src/symlink.c
===================================================================
--- branches/endian/src/symlink.c 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/symlink.c 2005-01-18 21:41:46 UTC (rev 1789)
@@ -42,16 +42,12 @@
#include "ocfs_log.h"
#include "ocfs.h"
-#include "ocfs2.h"
#include "alloc.h"
#include "file.h"
#include "inode.h"
#include "symlink.h"
-#include "ocfs_journal.h"
-#include "buffer_head_io.h"
-
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_SYMLINK
struct ocfs_symlink_ops {
@@ -62,11 +58,6 @@
static unsigned int ocfs_link_size(struct ocfs_symlink_ops *ops, char *str, struct inode *inode);
static void ocfs_link_expand(struct ocfs_symlink_ops *ops, char *out, char *in, struct inode *inode);
-static char *ocfs2_fast_symlink_getlink(struct inode *inode,
- struct buffer_head **bh);
-static int ocfs2_readlink(struct dentry *dentry,
- char *buffer,
- int buflen);
/* Context Dependent Symbolic Link (CDSL) code */
@@ -189,54 +180,17 @@
return (char*)page;
}
-static char *ocfs2_fast_symlink_getlink(struct inode *inode,
- struct buffer_head **bh)
-{
- int status;
- char *link = NULL;
- ocfs2_dinode *fe;
- LOG_ENTRY();
-
- status = ocfs_read_block(OCFS2_SB(inode->i_sb),
- OCFS_I(inode)->ip_blkno,
- bh,
- OCFS_BH_CACHED,
- inode);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- link = ERR_PTR(status);
- goto bail;
- }
-
- fe = (ocfs2_dinode *) (*bh)->b_data;
- link = (char *) fe->id2.i_symlink;
-bail:
- LOG_EXIT_STATUS(status);
-
- return link;
-}
-
static int ocfs_follow_link(struct dentry *dentry, struct nameidata *nd)
{
struct inode *inode = dentry->d_inode;
struct page *page = NULL;
- struct buffer_head *bh = NULL;
char *orig_link, *new_link;
unsigned int len, res = 0;
LOG_SET_CONTEXT(FOLLOW_LINK);
- if (ocfs2_inode_is_fast_symlink(inode))
- orig_link = ocfs2_fast_symlink_getlink(inode, &bh);
- else
- orig_link = my_page_getlink(dentry, &page);
-
- if (IS_ERR(orig_link)) {
- res = PTR_ERR(orig_link);
- goto out;
- }
-
+ orig_link = my_page_getlink(dentry, &page);
len = ocfs_link_size(symlink_ops, orig_link, inode);
new_link = kmalloc(len, GFP_KERNEL);
if (new_link == NULL) {
@@ -250,8 +204,6 @@
kunmap(page);
page_cache_release(page);
}
- if (bh)
- brelse(bh);
LOG_CLEAR_CONTEXT();
return res;
@@ -322,51 +274,16 @@
return len + 1;
}
-static int ocfs2_readlink(struct dentry *dentry,
- char *buffer,
- int buflen)
-{
- int ret;
- char *link;
- struct buffer_head *bh = NULL;
- struct inode *inode = dentry->d_inode;
-
- LOG_ENTRY();
-
- link = ocfs2_fast_symlink_getlink(inode, &bh);
- if (IS_ERR(link)) {
- ret = PTR_ERR(link);
- goto out;
- }
-
- ret = vfs_readlink(dentry, buffer, buflen, link);
-
- brelse(bh);
-out:
- LOG_EXIT_STATUS(ret);
- return ret;
-}
-
#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0)
struct inode_operations ocfs_symlink_inode_operations = {
.readlink = page_readlink,
.follow_link = ocfs_follow_link,
.getattr = ocfs_getattr,
};
-struct inode_operations ocfs_fast_symlink_inode_operations = {
- .readlink = ocfs2_readlink,
- .follow_link = ocfs_follow_link,
- .getattr = ocfs_getattr,
-};
#else
struct inode_operations ocfs_symlink_inode_operations = {
.readlink = page_readlink,
.follow_link = ocfs_follow_link,
.revalidate = ocfs_inode_revalidate,
};
-struct inode_operations ocfs_fast_symlink_inode_operations = {
- .readlink = ocfs2_readlink,
- .follow_link = ocfs_follow_link,
- .revalidate = ocfs_inode_revalidate,
-};
#endif
Modified: branches/endian/src/symlink.h
===================================================================
--- branches/endian/src/symlink.h 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/symlink.h 2005-01-18 21:41:46 UTC (rev 1789)
@@ -27,16 +27,5 @@
#define OCFS2_SYMLINK_H
extern struct inode_operations ocfs_symlink_inode_operations;
-extern struct inode_operations ocfs_fast_symlink_inode_operations;
-/*
- * Test whether an inode is a fast symlink.
- */
-static inline int ocfs2_inode_is_fast_symlink(struct inode *inode)
-{
- return (S_ISLNK(inode->i_mode) &&
- inode->i_blocks == 0);
-}
-
-
#endif /* OCFS2_SYMLINK_H */
Modified: branches/endian/src/sysfile.c
===================================================================
--- branches/endian/src/sysfile.c 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/sysfile.c 2005-01-18 21:41:46 UTC (rev 1789)
@@ -46,30 +46,30 @@
/* Tracing */
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_SYSFILE
-static struct inode * _ocfs_get_system_file_inode(ocfs_super *osb, int type, u32 slot);
+static struct inode * _ocfs_get_system_file_inode(ocfs_super *osb, int type, __u32 node);
static inline int is_global_system_inode(int type);
-static inline int is_in_system_inode_array(ocfs_super *osb, int type, u32 slot);
+static inline int is_in_system_inode_array(ocfs_super *osb, int type, __u32 node);
static inline int is_global_system_inode(int type)
{
- return type >= OCFS2_FIRST_ONLINE_SYSTEM_INODE &&
- type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE;
+ return (type >= OCFS2_FIRST_ONLINE_SYSTEM_INODE &&
+ type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE);
}
-static inline int is_in_system_inode_array(ocfs_super *osb, int type, u32 slot)
+static inline int is_in_system_inode_array(ocfs_super *osb, int type, __u32 node)
{
- return slot == osb->slot_num || is_global_system_inode(type);
+ return (node == osb->node_num || is_global_system_inode(type));
}
struct inode *ocfs_get_system_file_inode(ocfs_super *osb, int type,
- u32 slot)
+ __u32 node)
{
struct inode *inode = NULL;
struct inode **arr = NULL;
/* avoid the lookup if cached in local system file array */
- if (is_in_system_inode_array(osb, type, slot))
+ if (is_in_system_inode_array(osb, type, node))
arr = &(osb->system_inodes[type]);
if (arr && ((inode = *arr) != NULL)) {
@@ -82,7 +82,7 @@
}
/* this gets one ref thru iget */
- inode = _ocfs_get_system_file_inode(osb, type, slot);
+ inode = _ocfs_get_system_file_inode(osb, type, node);
/* add one more if putting into array for first time */
if (arr && inode) {
@@ -93,7 +93,7 @@
return inode;
}
-static struct inode * _ocfs_get_system_file_inode(ocfs_super *osb, int type, u32 slot)
+static struct inode * _ocfs_get_system_file_inode(ocfs_super *osb, int type, __u32 node)
{
char namebuf[40];
struct inode *inode = NULL;
@@ -104,7 +104,7 @@
ocfs2_sprintf_system_inode_name(namebuf,
sizeof(namebuf),
- type, slot);
+ type, node);
status = ocfs_find_files_on_disk(osb, namebuf, strlen(namebuf),
&blkno, osb->sys_root_inode,
Modified: branches/endian/src/sysfile.h
===================================================================
--- branches/endian/src/sysfile.h 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/sysfile.h 2005-01-18 21:41:46 UTC (rev 1789)
@@ -26,6 +26,6 @@
#ifndef OCFS2_SYSFILE_H
#define OCFS2_SYSFILE_H
-struct inode * ocfs_get_system_file_inode(ocfs_super *osb, int type, u32 slot);
+struct inode * ocfs_get_system_file_inode(ocfs_super *osb, int type, __u32 node);
#endif /* OCFS2_SYSFILE_H */
Modified: branches/endian/src/util.c
===================================================================
--- branches/endian/src/util.c 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/util.c 2005-01-18 21:41:46 UTC (rev 1789)
@@ -38,6 +38,8 @@
/* Tracing */
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_UTIL
+static void ocfs_timeout_func(unsigned long data);
+
/* block all but 'mask' sigs, optionally saving off our previous
* signal state. */
void ocfs_block_sigs(sigset_t *oldsigs, unsigned long mask)
@@ -103,6 +105,8 @@
sigaddsetmask(&tmpsig, SHUTDOWN_SIGS);
sigprocmask(SIG_UNBLOCK, &tmpsig, NULL);
}
+
+ return;
} /* ocfs_daemonize */
#else
void ocfs_daemonize (char *name, int len, int shutdown_sigs)
@@ -121,9 +125,35 @@
ocfs_block_sigs(NULL, SHUTDOWN_SIGS);
else
ocfs_block_sigs(NULL, 0);
+ return;
} /* ocfs_daemonize */
#endif
+/*
+ * ocfs_sleep()
+ *
+ * The interval time is in milliseconds
+ *
+ * This function needs to be removed.
+ * Instead call schedule_timeout() directly and handle signals.
+ */
+int ocfs_sleep (__u32 ms)
+{
+ __u32 numJiffies;
+
+ LOG_ENTRY ();
+
+ /* 10ms = 1 jiffy, minimum resolution is one jiffy */
+ numJiffies = ms * HZ / 1000;
+ numJiffies = (numJiffies < 1) ? 1 : numJiffies;
+
+ set_current_state (TASK_INTERRUPTIBLE);
+ numJiffies = schedule_timeout (numJiffies);
+
+ LOG_EXIT ();
+ return 0;
+} /* ocfs_sleep */
+
/* prefetch has been declared to allow to build in debug mode */
#ifdef DEBUG
#ifndef ARCH_HAS_PREFETCH
@@ -133,6 +163,46 @@
#endif
#endif
+
+static void ocfs_timeout_func(unsigned long data)
+{
+ ocfs_timeout *to = (ocfs_timeout *)data;
+
+ to->timed_out = 1;
+ wake_up(&to->wait);
+}
+
+void ocfs_init_timeout(ocfs_timeout *to)
+{
+ init_timer(&to->timer);
+ to->timer.data = (unsigned long)to;
+ to->timer.function = ocfs_timeout_func;
+ to->timed_out = 0;
+ init_waitqueue_head(&to->wait);
+}
+
+void ocfs_set_timeout(ocfs_timeout *to, __u32 timeout)
+{
+ __u32 how_long;
+
+ if (!timeout) {
+ to->timed_out = 1;
+ return ;
+ }
+
+ how_long = (timeout * HZ / 1000);
+ if (how_long < 1)
+ how_long = 1;
+
+ to->timer.expires = jiffies + how_long;
+ add_timer(&to->timer);
+}
+
+void ocfs_clear_timeout(ocfs_timeout *to)
+{
+ del_timer_sync(&to->timer);
+}
+
void ocfs_truncate_inode_pages(struct inode *inode, loff_t off)
{
LOG_TRACE_ARGS("truncating pages for inode %llu (%p) from offset %llu\n",
@@ -180,7 +250,7 @@
static int ocfs_kernel_text_address(unsigned long addr)
{
- return addr >= 0xc0000000;
+ return (addr >= 0xc0000000 ) ;
}
void ocfs_show_trace(unsigned long * stack)
@@ -205,5 +275,46 @@
}
+int __ocfs_wait_atomic_eq(wait_queue_head_t *wq, atomic_t *var, int val, int ms)
+{
+ int ret;
+ ocfs_timeout timeout;
+ DECLARE_WAITQUEUE(wait, current);
+ DECLARE_WAITQUEUE(to_wait, current);
+ ocfs_init_timeout(&timeout);
+ if (ms) {
+ ocfs_set_timeout(&timeout, ms);
+ if (timeout.timed_out) {
+ ocfs_clear_timeout(&timeout);
+ }
+ }
+ add_wait_queue(wq, &wait);
+ add_wait_queue(&timeout.wait, &to_wait);
+ do {
+ ret = 0;
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (atomic_read(var)==val)
+ break;
+ ret = -ETIMEDOUT;
+ if (timeout.timed_out)
+ break;
+ schedule();
+ if (signal_pending(current)) {
+ ret = -EINTR;
+ break;
+ }
+ } while (1);
+
+ set_current_state(TASK_RUNNING);
+ remove_wait_queue(wq, &wait);
+ remove_wait_queue(&timeout.wait, &to_wait);
+
+ if (ms)
+ ocfs_clear_timeout(&timeout);
+
+ return ret;
+}
+
+
Modified: branches/endian/src/util.h
===================================================================
--- branches/endian/src/util.h 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/util.h 2005-01-18 21:41:46 UTC (rev 1789)
@@ -26,11 +26,26 @@
#ifndef OCFS2_UTIL_H
#define OCFS2_UTIL_H
+void ocfs_clear_timeout(ocfs_timeout *to);
void ocfs_daemonize(char *name, int len, int shutdown_sigs);
+void ocfs_init_timeout(ocfs_timeout *to);
+void ocfs_set_timeout(ocfs_timeout *to, __u32 timeout);
void ocfs_show_stack(unsigned long *esp);
void ocfs_show_trace(unsigned long *stack);
+int ocfs_sleep(__u32 ms);
void ocfs_truncate_inode_pages(struct inode *inode, loff_t off);
+int __ocfs_wait_atomic_eq(wait_queue_head_t *wq, atomic_t *var, int val, int ms);
void ocfs_block_sigs(sigset_t *oldsigs, unsigned long mask);
void ocfs_unblock_sigs(sigset_t newsig);
+/* exits when var == val, or on timeout */
+static inline int ocfs_wait_atomic_eq(wait_queue_head_t *wq, atomic_t *var, int val, int timeout)
+{
+ int ret = 0;
+ if (atomic_read(var) != val)
+ ret = __ocfs_wait_atomic_eq(wq, var, val, timeout);
+ return ret;
+}
+
+
#endif /* OCFS2_UTIL_H */
Modified: branches/endian/src/vote.c
===================================================================
--- branches/endian/src/vote.c 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/vote.c 2005-01-18 21:41:46 UTC (rev 1789)
@@ -3,9 +3,9 @@
*
* vote.c
*
- * description here
+ * netdlm listener, receive, verify and send messages
*
- * Copyright (C) 2003, 2004 Oracle. All rights reserved.
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
@@ -25,830 +25,1125 @@
#include "ocfs_compat.h"
+#include <linux/fs.h>
#include <linux/types.h>
#include <linux/slab.h>
-#include <linux/highmem.h>
-#include <linux/smp_lock.h>
+#include <linux/inet.h>
+#include <linux/net.h>
+#include <linux/in.h>
-#include <cluster/util.h>
-#include <cluster/dlmcommon.h>
-#include <cluster/heartbeat.h>
-#include <cluster/nodemanager.h>
-#include <cluster/tcp.h>
-#include <cluster/dlmmod.h>
+#include <asm/uaccess.h>
#include "ocfs_log.h"
#include "ocfs.h"
-#include "ocfs2.h"
-#include "alloc.h"
-#include "dlmglue.h"
-#include "extent_map.h"
-#include "heartbeat.h"
-#include "inode.h"
-#include "slot_map.h"
+#include "dlm.h"
+#include "nm.h"
#include "util.h"
#include "vote.h"
-#include "ocfs_journal.h"
-#include "buffer_head_io.h"
+/* Tracing */
+#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_VOTE
-#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_VOTE
-#define OCFS2_MESSAGE_TYPE_VOTE (0x1)
-#define OCFS2_MESSAGE_TYPE_RESPONSE (0x2)
-typedef struct _ocfs2_msg_hdr
-{
- u32 h_response_id; /* used to lookup message handle on sending
- * node. */
- u32 h_request;
- u64 h_blkno;
- u32 h_generation;
- u32 h_node_num; /* node sending this particular message. */
-} ocfs2_msg_hdr;
-typedef struct _ocfs2_vote_msg
-{
- ocfs2_msg_hdr v_hdr;
- /* may put stuff in here... */
-} ocfs2_vote_msg;
+ocfs_ipc_ctxt OcfsIpcCtxt;
-typedef struct _ocfs2_response_msg
-{
- ocfs2_msg_hdr r_hdr;
- s32 r_response; /* this maps to '0' or a -value in errno.h */
-} ocfs2_response_msg;
+static const char vote_state_str[] = { 'U', 'S', 'P', 'F', 'D' };
-typedef struct _ocfs2_vote_work {
- struct list_head w_list;
- ocfs2_vote_msg w_msg;
-} ocfs2_vote_work;
+static ocfs_vote_obj * ocfs_alloc_vote_obj (ocfs_super *osb, int bytes, __u32 reqlock, ocfs_node_map * votemap);
+static void ocfs_dlm_recv_msg (void *val);
+static int ocfs_check_ipc_msg (__u8 * msg, __u32 msg_len);
+static int ocfs_comm_process_vote_reply (ocfs_super * osb, ocfs_dlm_msg * dlm_msg);
+static int ocfs_comm_process_msg (__u8 * msg);
+static void ocfs_init_dlm_msg (ocfs_super * osb, ocfs_dlm_msg * dlm_msg, __u32 msg_len, __u32 type);
-enum ocfs2_vote_request {
- OCFS2_VOTE_REQ_INVALID = 0,
- OCFS2_VOTE_REQ_DELETE,
- OCFS2_VOTE_REQ_UNLINK,
- OCFS2_VOTE_REQ_RENAME,
- OCFS2_VOTE_REQ_MOUNT,
- OCFS2_VOTE_REQ_UMOUNT
-};
+static int ocfs_send_bcast (ocfs_super * osb, ocfs_node_map *votemap, ocfs_dlm_msg * dlm_msg);
+static int ocfs_node_map_stringify(ocfs_node_map *map, char **str);
-typedef struct _ocfs2_net_wait_ctxt {
- struct list_head n_list;
- u32 n_response_id;
- wait_queue_head_t n_event;
- ocfs_node_map n_node_map;
- int n_response; /* an agreggate response. 0 if
- * all nodes are go, < 0 on any
- * negative response from any
- * node or network error. */
-} ocfs2_net_wait_ctxt;
-
-static void ocfs2_vote_thread_do_work(ocfs_super *osb);
-static void ocfs2_process_vote(ocfs_super *osb,
- ocfs2_vote_msg *msg);
-static int ocfs2_do_request_vote(ocfs_super *osb,
- u64 blkno,
- unsigned int generation,
- enum ocfs2_vote_request type);
-
-static void ocfs2_process_mount_request(ocfs_super *osb,
- unsigned int node_num)
+static void ocfs_put_vote_obj(ocfs_vote_obj *obj)
{
- LOG_TRACE_ARGS("MOUNT vote from node %u\n", node_num);
- /* The other node only sends us this message when he has an EX
- * on the superblock, so our recovery threads (if having been
- * launched) are waiting on it.*/
- ocfs_recovery_map_clear(osb, node_num);
- ocfs_node_map_set_bit(osb, &osb->mounted_map, node_num);
+ if (atomic_dec_and_test(&obj->refcount))
+ kfree(obj);
}
-static void ocfs2_process_umount_request(ocfs_super *osb,
- unsigned int node_num)
+static void ocfs_get_vote_obj(ocfs_vote_obj *obj)
{
- LOG_TRACE_ARGS("UMOUNT vote from node %u\n", node_num);
- ocfs_node_map_clear_bit(osb, &osb->mounted_map, node_num);
- ocfs_node_map_set_bit(osb, &osb->umount_map, node_num);
+ atomic_inc(&obj->refcount);
}
-static int ocfs2_process_delete_request(struct inode *inode)
+
+/*
+ * ocfs_recv_udp_msg()
+ *
+ */
+int ocfs_recv_udp_msg (ocfs_recv_ctxt * recv_ctxt)
{
- int response = -EBUSY;
+ int status = -ENETUNREACH, error;
+ mm_segment_t oldfs;
+ struct sockaddr_in sin;
+ struct iovec iov = {
+ .iov_len = recv_ctxt->msg_len,
+ .iov_base = recv_ctxt->msg
+ };
+ struct msghdr msg = {
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_iovlen = 1,
+ .msg_iov = &iov,
+ .msg_name = (struct sockaddr *) &sin,
+ .msg_namelen = sizeof (sin),
+ .msg_flags = 0
+ };
- LOG_TRACE_ARGS("DELETE vote on inode %lu, read "
- "lnk_cnt = %u\n", inode->i_ino,
- inode->i_nlink);
+ LOG_ENTRY ();
- /* force this as ours may be out of date. */
- inode->i_nlink = 0;
+ /* Initialize the workitem with our worker routine and Q it. */
+ INIT_WORK (&recv_ctxt->ipc_wq, ocfs_dlm_recv_msg, recv_ctxt);
- spin_lock(&OCFS_I(inode)->ip_lock);
- /* vote no if the file is still open. */
- if (OCFS_I(inode)->ip_open_cnt > 0) {
- LOG_TRACE_PROCESS_VOTE("open count = %u\n",
- OCFS_I(inode)->ip_open_cnt);
- spin_unlock(&OCFS_I(inode)->ip_lock);
- goto done;
- }
- spin_unlock(&OCFS_I(inode)->ip_lock);
+ memset (&sin, 0, sizeof (sin));
+ oldfs = get_fs ();
+ set_fs (get_ds ());
+ error = sock_recvmsg (OcfsIpcCtxt.recv_sock, &msg, recv_ctxt->msg_len, msg.msg_flags);
+ set_fs (oldfs);
- /* directories are a bit ugly... What if someone is sitting in
- * it? We want to make sure the inode is removed completely as
- * a result of the iput in process_vote. */
- if (S_ISDIR(inode->i_mode) && (atomic_read(&inode->i_count) != 1)) {
- LOG_TRACE_PROCESS_VOTE("i_count = %u\n",
- atomic_read(&inode->i_count));
- goto done;
+ if (error < 0) {
+ if (error == -ERESTARTSYS) {
+ status = -EBADF;
+ LOG_TRACE_STR ("Shutting down ocfs2lsnr");
+ } else {
+ status = -ENETUNREACH;
+ LOG_ERROR_ARGS ("unable to recvmsg, error=%d", error);
+ LOG_ERROR_STATUS (status);
+ }
+ goto bail;
+ } else if (msg.msg_namelen) {
+ recv_ctxt->msg_len = iov.iov_len;
+ status = 0;
}
- /* If we get here, then we're voting 'yes', so commit the
- * delete on our side. */
- response = 0;
+ LOG_TRACE_ARGS ("Received packet from: %d.%d.%d.%d\n",
+ NIPQUAD (sin.sin_addr.s_addr));
- spin_lock(&OCFS_I(inode)->ip_lock);
- SET_INODE_DELETED(inode);
- /* We set the SKIP_DELETE flag on the inode so we don't try to
- * delete it in delete_inode ourselves. */
- OCFS_I(inode)->ip_flags |= OCFS_INODE_SKIP_DELETE;
- spin_unlock(&OCFS_I(inode)->ip_lock);
+ if (status == 0)
+ schedule_work(&recv_ctxt->ipc_wq);
- d_prune_aliases (inode);
+bail:
+ LOG_EXIT_STATUS (status);
+ return status;
+} /* ocfs_recv_udp_msg */
- /* TODO: How much of this is really necessary? */
- sync_mapping_buffers(inode->i_mapping);
- ocfs_truncate_inode_pages(inode, 0);
- ocfs2_extent_map_trunc(inode, 0);
-done:
- return response;
-}
-
-static void ocfs2_process_dentry_request(struct inode *inode,
- int rename)
+static inline int ocfs2_comm_ip_version_to_family(u16 ip_version)
{
- d_prune_aliases (inode);
+ switch (ntohs(ip_version)) {
+ case 4:
+ return PF_INET;
+ case 6:
+ return PF_INET6;
+ default:
+ BUG();
+ }
- /* for rename, we don't drop link counts */
- if (!rename) {
- if (S_ISDIR(inode->i_mode))
- inode->i_nlink = 0;
- else
- inode->i_nlink--;
- }
+ return 4;
}
-static void ocfs2_process_vote(ocfs_super *osb,
- ocfs2_vote_msg *msg)
+/*
+ * ocfs_send_bcast()
+ *
+ */
+static int ocfs_send_bcast (ocfs_super * osb, ocfs_node_map *votemap, ocfs_dlm_msg * dlm_msg)
{
- int net_status, vote_response;
- int rename = 0;
- unsigned int node_num, generation;
- u64 blkno;
- enum ocfs2_vote_request request;
- struct inode *inode = NULL;
- struct inode *remote_node;
- ocfs2_msg_hdr *hdr = &msg->v_hdr;
- ocfs2_response_msg response;
+ int status = 0, error;
+ __s16 num;
+ BARF_BARF_BARF *node;
+ struct sockaddr_in sin;
+ mm_segment_t oldfs;
- /* decode the network mumbo jumbo into local variables. */
- request = ntohl(hdr->h_request);
- blkno = be64_to_cpu(hdr->h_blkno);
- generation = ntohl(hdr->h_generation);
- node_num = ntohl(hdr->h_node_num);
+ LOG_ENTRY ();
- LOG_TRACE_ARGS("ocfs2: processing vote: request = %u, blkno = %llu, "
- "generation = %u, node_num = %u\n", request, blkno,
- generation, node_num);
+ oldfs = get_fs ();
+ for (num=0; num<osb->max_nodes; num++) {
+ if (num == osb->node_num)
+ continue;
- vote_response = 0;
+ if (!ocfs_node_map_test_bit(votemap, num))
+ continue;
- switch (request) {
- case OCFS2_VOTE_REQ_UMOUNT:
- ocfs2_process_umount_request(osb, node_num);
- goto respond;
- case OCFS2_VOTE_REQ_MOUNT:
- ocfs2_process_mount_request(osb, node_num);
- goto respond;
- default:
- /* avoids a gcc warning */
- break;
+ node = osb->node_cfg_info[num];
+ if (!node)
+ continue;
+
+ LOG_TRACE_ARGS("Sending msg to node=%u, name=%s\n",
+ num, node->node_name);
+ memset (&sin, 0, sizeof (sin));
+ sin.sin_family = ocfs2_comm_ip_version_to_family(node->ipc_config.ip_version);
+ sin.sin_addr.s_addr = node->ipc_config.addr_u.ip_addr4;
+ sin.sin_port = node->ipc_config.ip_port;
+
+ LOG_TRACE_ARGS("about to send to 0x%08u:%u\n",
+ ntohl(node->ipc_config.addr_u.ip_addr4),
+ ntohs(node->ipc_config.ip_port));
+
+ status = -ENETUNREACH;
+ if (OcfsIpcCtxt.send_sock) {
+ struct iovec iov = {
+ .iov_base = dlm_msg,
+ .iov_len = dlm_msg->msg_len
+ };
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_name = (struct sockaddr *) &sin,
+ .msg_namelen = sizeof (sin),
+ .msg_flags = 0
+ };
+
+ status = 0;
+ set_fs (get_ds ());
+ error = sock_sendmsg (OcfsIpcCtxt.send_sock, &msg, dlm_msg->msg_len);
+ set_fs (oldfs);
+
+ if (error < 0) {
+ LOG_ERROR_ARGS ("unable to sendmsg, error=%d", error);
+ status = -ENETUNREACH;
+ }
+ }
+ if (status < 0)
+ LOG_ERROR_STATUS (status);
}
- /* We cannot process the remaining message types before we're
- * fully mounted. It's perfectly safe however to send a 'yes'
- * response as we can't possibly have any of the state they're
- * asking us to modify yet. */
- if (atomic_read(&osb->vol_state) == VOLUME_INIT)
- goto respond;
+ status = 0;
+ LOG_EXIT_STATUS (status);
+ return status;
+} /* ocfs_send_bcast */
- /* If we get here, then the request is against an inode. */
- inode = ocfs_ilookup(osb, blkno);
- if (!inode) {
- vote_response = -EINVAL;
- goto respond;
+/*
+ * ocfs_init_udp_sock()
+ *
+ */
+int ocfs_init_udp_sock(struct socket **send_sock,
+ struct socket **recv_sock)
+{
+ struct sockaddr_in sin;
+ int status;
+ ocfs_comm_info *comm;
+
+ LOG_ENTRY ();
+
+ comm = &(OcfsGlobalCtxt.comm_info);
+
+ /* Create Send Socket */
+ status = sock_create(ocfs2_comm_ip_version_to_family(comm->ip_version),
+ SOCK_DGRAM, IPPROTO_UDP,
+ send_sock);
+ if (status < 0) {
+ LOG_ERROR_ARGS ("unable to create socket, error=%d", status);
+ goto bail;
}
- OCFS_ASSERT(inode->i_generation == generation);
+ /* Bind Send Socket */
+ memset(&sin, 0, sizeof (sin));
+ sin.sin_family = ocfs2_comm_ip_version_to_family(comm->ip_version);
+ sin.sin_addr.s_addr = htonl (INADDR_ANY);
+ sin.sin_port = htons(0);
- switch (request) {
- case OCFS2_VOTE_REQ_DELETE:
- vote_response = ocfs2_process_delete_request(inode);
- break;
- case OCFS2_VOTE_REQ_RENAME:
- rename = 1;
- /* fall through */
- case OCFS2_VOTE_REQ_UNLINK:
- ocfs2_process_dentry_request(inode, rename);
- break;
- default:
- LOG_ERROR_ARGS("node %u, invalid request: %u\n", node_num,
- request);
- vote_response = -EINVAL;
+ status = (*send_sock)->ops->bind(*send_sock,
+ (struct sockaddr *)&sin,
+ sizeof(sin));
+ if (status < 0) {
+ LOG_ERROR_ARGS ("unable to bind socket, error=%d", status);
+ goto bail;
}
-respond:
- /* Response struture is small so we just put it on the stack
- * and stuff it inline. */
- memset(&response, 0, sizeof(ocfs2_response_msg));
- response.r_hdr.h_response_id = hdr->h_response_id;
- response.r_hdr.h_blkno = hdr->h_blkno;
- response.r_hdr.h_generation = hdr->h_generation;
- response.r_hdr.h_node_num = htonl(osb->node_num);
- response.r_response = htonl(vote_response);
+ /* Create Receive Socket */
+ status = sock_create(ocfs2_comm_ip_version_to_family(comm->ip_version),
+ SOCK_DGRAM, IPPROTO_UDP,
+ recv_sock);
+ if (status < 0) {
+ LOG_ERROR_ARGS ("unable to create socket, error=%d", status);
+ goto bail;
+ }
- remote_node = nm_get_node_by_num(node_num);
- if (!remote_node) {
- LOG_ERROR_ARGS("Couldn't get inode for node %u!\n", node_num);
- } else {
- net_status = net_send_message(OCFS2_MESSAGE_TYPE_RESPONSE,
- osb->net_key,
- &response,
- sizeof(ocfs2_response_msg),
- remote_node,
- NULL);
- if (net_status < 0)
- LOG_ERROR_ARGS("message to node %u fails with error "
- "%d!\n", node_num, net_status);
- iput(remote_node);
+
+ /* Bind Receive Socket */
+ memset(&sin, 0, sizeof(sin));
+ sin.sin_family = ocfs2_comm_ip_version_to_family(comm->ip_version);
+ sin.sin_addr.s_addr = htonl(INADDR_ANY);
+ sin.sin_port = comm->ip_port;
+
+ status = (*recv_sock)->ops->bind(*recv_sock,
+ (struct sockaddr *)&sin,
+ sizeof(sin));
+ if (status < 0) {
+ LOG_ERROR_ARGS ("unable to bind socket, error=%d", status);
+ goto bail;
}
- if (inode)
- iput(inode);
-}
+bail:
+ LOG_EXIT_STATUS (status);
+ return status;
+} /* ocfs_init_udp_sock */
-static void ocfs2_vote_thread_do_work(ocfs_super *osb)
+
+/*
+ * ocfs_send_vote_reply()
+ *
+ */
+int ocfs_send_vote_reply (ocfs_super * osb, ocfs_dlm_msg * dlm_msg, __u32 vote_status)
{
- unsigned long processed;
- ocfs2_lock_res *lockres;
- ocfs2_vote_work *work;
+ ocfs_dlm_req_master *req_master;
+ ocfs_dlm_reply_master *reply_master;
+ ocfs_dlm_msg *send_dlm_msg;
+ ocfs_vote_obj *obj;
+ int status = 0;
+ __u8 *buf;
+ __u32 msg_len, obj_len;
+ ocfs_node_map vote_map;
- LOG_ENTRY();
+ LOG_ENTRY ();
- spin_lock(&osb->vote_task_lock);
- processed = osb->blocked_lock_count;
- while (processed) {
- OCFS_ASSERT(!list_empty(&osb->blocked_lock_list));
+ ocfs_node_map_init(osb, &vote_map);
- lockres = list_entry(osb->blocked_lock_list.next,
- ocfs2_lock_res, l_blocked_list);
- list_del_init(&lockres->l_blocked_list);
- osb->blocked_lock_count--;
- spin_unlock(&osb->vote_task_lock);
+ req_master = (ocfs_dlm_req_master *) dlm_msg->msg_buf;
- OCFS_ASSERT(processed);
- processed--;
+ msg_len = sizeof (ocfs_dlm_msg) + sizeof (ocfs_dlm_reply_master);
+ obj_len = sizeof (ocfs_vote_obj) + sizeof (ocfs_dlm_reply_master);
- ocfs2_process_blocked_lock(osb, lockres);
-
- spin_lock(&osb->vote_task_lock);
+ obj = ocfs_alloc_vote_obj (osb, obj_len, 0, NULL);
+ if (obj == NULL) {
+ LOG_ERROR_STATUS (status = -ENOMEM);
+ goto finally;
}
+ buf = (__u8 *)&(obj->m);
+ send_dlm_msg = (ocfs_dlm_msg *)buf;
+ reply_master = (ocfs_dlm_reply_master *) send_dlm_msg->msg_buf;
+
+ ocfs_init_dlm_msg (osb, send_dlm_msg, msg_len, OCFS_VOTE_REPLY);
- while (osb->vote_count) {
- OCFS_ASSERT(!list_empty(&osb->vote_list));
- work = list_entry(osb->vote_list.next,
- ocfs2_vote_work, w_list);
- list_del(&work->w_list);
- osb->vote_count--;
- spin_unlock(&osb->vote_task_lock);
+ reply_master->h.lock_id = req_master->lock_id;
+ reply_master->status = vote_status;
+ reply_master->h.lock_seq_num = req_master->lock_seq_num;
+ reply_master->h.flags = req_master->flags;
- ocfs2_process_vote(osb, &work->w_msg);
- kfree(work);
+ ocfs_node_map_set_bit(&vote_map, dlm_msg->src_node);
+ ocfs_node_map_set(&obj->req_vote_map, &vote_map);
- spin_lock(&osb->vote_task_lock);
- }
- spin_unlock(&osb->vote_task_lock);
+ spin_lock(&osb->vote_obj_queue_lock);
+ list_add_tail(&obj->list, &osb->vote_obj_queue);
+ spin_unlock(&osb->vote_obj_queue_lock);
- LOG_EXIT();
-}
+ ocfs_send_bcast (osb, &vote_map, send_dlm_msg);
+ spin_lock (&obj->lock);
+ obj->vote_state = VOTE_OBJ_STATE_SENT;
+ spin_unlock (&obj->lock);
-static inline int ocfs2_vote_thread_has_work(ocfs_super *osb)
-{
- if (list_empty(&osb->blocked_lock_list) &&
- list_empty(&osb->vote_list))
- return 0;
+ // silly ;-)
+ spin_lock (&obj->lock);
+ obj->vote_state = VOTE_OBJ_STATE_DESTROYING;
+ spin_unlock (&obj->lock);
- return 1;
-}
+ spin_lock(&osb->vote_obj_queue_lock);
+ list_del(&obj->list);
+ spin_unlock(&osb->vote_obj_queue_lock);
-int ocfs2_vote_thread(void *arg)
+finally:
+ ocfs_put_vote_obj (obj);
+ LOG_EXIT_STATUS (status);
+ return status;
+} /* ocfs_send_vote_reply */
+
+
+/*
+ * ocfs_check_ipc_msg()
+ *
+ */
+int ocfs_check_ipc_msg (__u8 * msg, __u32 msg_len)
{
- int status = 0;
- ocfs_super *osb = arg;
- char proc[16];
+ int ret = 0;
+ ocfs_dlm_msg *dlm_msg;
- sprintf (proc, "ocfs2vote-%d", osb->osb_id);
- ocfs_daemonize (proc, strlen(proc), 0);
+ LOG_ENTRY ();
- spin_lock(&osb->vote_task_lock);
- osb->vote_task = current;
- init_completion (&osb->vote_event_complete);
+ dlm_msg = (ocfs_dlm_msg *) msg;
- complete(&osb->vote_event_init);
+ if (dlm_msg == NULL) {
+ LOG_TRACE_STR("Null netdlm message");
+ goto bail;
+ }
- while (1) {
- if (osb->vote_exit) {
- if (!ocfs2_vote_thread_has_work(osb))
- break;
- /* don't want to sleep if we're supposed to quit. */
- atomic_set(&osb->wake_vote_task, 1);
- }
- spin_unlock(&osb->vote_task_lock);
+ if (msg_len < sizeof(ocfs_dlm_msg)) {
+ LOG_TRACE_STR("Netdlm message too short");
+ goto bail;
+ }
- wait_event_interruptible(osb->vote_event,
- atomic_read(&osb->wake_vote_task));
+ /* Compute and Compare the checksum */
+ if (dlm_msg->magic != OCFS_DLM_MSG_MAGIC) {
+ LOG_TRACE_ARGS ("Magic number mismatch in netdlm message: "
+ "0x%08x != 0x%08x\n",
+ dlm_msg->magic, OCFS_DLM_MSG_MAGIC);
+ goto bail;
+ }
- atomic_set(&osb->wake_vote_task, 0);
- LOG_TRACE_STR("vote_thread: awoken");
+ ret = 1;
- ocfs2_vote_thread_do_work(osb);
- spin_lock(&osb->vote_task_lock);
- }
+bail:
+ LOG_EXIT_INT (ret);
+ return ret;
+} /* ocfs_check_ipc_msg */
- osb->vote_task = NULL;
- spin_unlock(&osb->vote_task_lock);
- complete(&osb->vote_event_complete);
+int ocfs_lookup_obj_for_proc (ocfs_vote_obj *obj, ocfs_vote_obj_lookup_data *data)
+{
+ int status = -ENOENT;
+ ocfs_dlm_msg *dlm_msg = NULL;
+ ocfs_dlm_msg_hdr *request = NULL;
+ ocfs_dlm_reply_master *reply = NULL;
+ int *len = data->u.proc.len;
+ int max = data->u.proc.max - *len;
+ char *p = data->u.proc.page + *len;
+ int ret = 0;
+ char *reqstr=NULL, *gotstr=NULL;
+ /* just run thru everything to populate /proc */
+ /* return -ENOENT to keep going */
+ dlm_msg = &(obj->m);
+
+ switch (dlm_msg->msg_type) {
+ case OCFS_VOTE_REQUEST:
+ request = (ocfs_dlm_msg_hdr *) dlm_msg->msg_buf;
+
+ if (ocfs_node_map_stringify(&obj->req_vote_map, &reqstr) < 0)
+ break;
+ if (ocfs_node_map_stringify(&obj->got_vote_map, &gotstr) < 0)
+ break;
+ ret = snprintf(p, max, "REQST: %d %c %3d %s %21llu %21llu %08x | %s\n",
+ obj->pid,
+ vote_state_str[obj->vote_state],
+ obj->vote_status,
+ reqstr,
+ request->lock_id,
+ request->lock_seq_num,
+ request->flags, gotstr);
+ break;
+ case OCFS_VOTE_REPLY:
+ reply = (ocfs_dlm_reply_master *) dlm_msg->msg_buf;
+ if (ocfs_node_map_stringify(&obj->req_vote_map, &reqstr) < 0)
+ break;
+ ret = snprintf(p, max, "REPLY: %d %c %3d %s %21llu %21llu %08x | %3d\n",
+ obj->pid,
+ vote_state_str[obj->vote_state],
+ obj->vote_status,
+ reqstr,
+ reply->h.lock_id,
+ reply->h.lock_seq_num,
+ reply->h.flags,
+ reply->status);
+
+ break;
+ case OCFS_INFO_DISMOUNT:
+ ret = snprintf(p, max, "UNMNT: %d\n", obj->pid);
+ break;
+ default:
+ ret = snprintf(p, max, "BAD!!: %d\n", obj->pid);
+ break;
+ }
+ (*len) += ret;
+ p[max-1] = '\0';
+
+ if (reqstr)
+ kfree(reqstr);
+ if (gotstr)
+ kfree(gotstr);
return status;
}
-static ocfs2_net_wait_ctxt *ocfs2_new_net_wait_ctxt(ocfs_super *osb,
- unsigned int response_id)
+
+int ocfs_lookup_obj_by_lockid (ocfs_vote_obj *obj, ocfs_vote_obj_lookup_data *data)
{
- ocfs2_net_wait_ctxt *w;
+ int status = 0;
+ ocfs_dlm_msg *dlm_msg = NULL;
+ ocfs_dlm_msg_hdr *req = NULL;
- w = kmalloc(sizeof(*w), GFP_KERNEL);
- if (!w) {
- LOG_ERROR_STATUS(-ENOMEM);
- goto bail;
+ dlm_msg = &(obj->m);
+ req = (ocfs_dlm_msg_hdr *) dlm_msg->msg_buf;
+ if (dlm_msg->msg_type != OCFS_VOTE_REQUEST ||
+ obj->vote_state == VOTE_OBJ_STATE_DESTROYING ||
+ req->lock_id != data->u.s.lock_id) {
+ status = -ENOENT;
}
- memset(w, 0, sizeof(*w));
-
- INIT_LIST_HEAD(&w->n_list);
- init_waitqueue_head(&w->n_event);
- ocfs_node_map_init(osb, &w->n_node_map);
- w->n_response_id = response_id;
-bail:
- return w;
+ return status;
}
-static unsigned int ocfs2_new_response_id(ocfs_super *osb)
+static int ocfs_lookup_obj_by_seq (ocfs_vote_obj *obj, ocfs_vote_obj_lookup_data *data)
{
- unsigned int ret;
+ int status = -ENOENT;
+ ocfs_dlm_msg *dlm_msg = NULL;
+ ocfs_dlm_msg_hdr *req = NULL;
- spin_lock(&osb->net_response_lock);
- ret = ++osb->net_response_ids;
- spin_unlock(&osb->net_response_lock);
-
- return ret;
+ if (obj->seq_num == data->u.s.seq_num) {
+ status = 0;
+ dlm_msg = &(obj->m);
+ req = (ocfs_dlm_msg_hdr *) dlm_msg->msg_buf;
+ // error if there is a non-request with a matching seqnum, or
+ // a vote object that is in too early or too late a state, or
+ // a vote object with the right seqnum but wrong lockid
+ if (dlm_msg->msg_type != OCFS_VOTE_REQUEST ||
+ obj->vote_state == VOTE_OBJ_STATE_DESTROYING ||
+ obj->vote_state == VOTE_OBJ_STATE_UNSENT ||
+ req->lock_id != data->u.s.lock_id) {
+ LOG_ERROR_ARGS("bad message: vote_state=%d type=%d "
+ "lockid=%llu expected=%llu\n",
+ obj->vote_state, dlm_msg->msg_type,
+ req->lock_id, data->u.s.lock_id);
+ status = -EINVAL;
+ }
+ }
+ return status;
}
-static void ocfs2_dequeue_net_wait_ctxt(ocfs_super *osb,
- ocfs2_net_wait_ctxt *w)
+/*
+ * returns an ocfs_vote_obj with a ref on it or NULL
+ */
+int ocfs_lookup_vote_request_obj (ocfs_super *osb, ocfs_vote_obj_lookup_data *data)
{
- spin_lock(&osb->net_response_lock);
- list_del(&w->n_list);
- spin_unlock(&osb->net_response_lock);
-}
+ int status = -ENOENT;
+ struct list_head *iter;
+ ocfs_vote_obj *obj = NULL;
-static void ocfs2_queue_net_wait_ctxt(ocfs_super *osb,
- ocfs2_net_wait_ctxt *w)
-{
- spin_lock(&osb->net_response_lock);
- list_add_tail(&w->n_list,
- &osb->net_response_list);
- spin_unlock(&osb->net_response_lock);
+ spin_lock(&osb->vote_obj_queue_lock);
+
+ list_for_each (iter, &osb->vote_obj_queue) {
+ obj = list_entry (iter, ocfs_vote_obj, list);
+ ocfs_get_vote_obj (obj);
+ spin_lock(&obj->lock);
+ status = data->func(obj, data);
+ spin_unlock(&obj->lock);
+ if (status < 0) {
+ ocfs_put_vote_obj (obj);
+ obj = NULL;
+ }
+ if (status != -ENOENT)
+ break;
+ obj = NULL;
+ }
+
+ spin_unlock(&osb->vote_obj_queue_lock);
+
+ // return the obj, or drop the ref
+ if (data->ret)
+ *(data->ret) = obj;
+ else if (obj)
+ ocfs_put_vote_obj (obj);
+ return status;
}
-#define OCFS2_RESPONSE_WAIT_JIFFIES (30 * HZ)
-static int ocfs2_wait_on_vote_responses(ocfs_super *osb,
- ocfs2_net_wait_ctxt *w)
+
+/*
+ * ocfs_comm_process_vote_reply()
+ *
+ */
+int ocfs_comm_process_vote_reply (ocfs_super * osb, ocfs_dlm_msg * dlm_msg)
{
int status = 0;
- signed long timeout = OCFS2_RESPONSE_WAIT_JIFFIES;
- DECLARE_WAITQUEUE(wait, current);
+ ocfs_dlm_reply_master *reply;
+ ocfs_dlm_msg_hdr *reply_msg;
+ ocfs_vote_reply_ctxt ctxt;
+ ocfs_vote_obj *obj = NULL;
+ ocfs_vote_obj_lookup_data data;
- if (ocfs_node_map_is_empty(osb, &w->n_node_map))
- return 0;
+ LOG_ENTRY ();
- add_wait_queue(&w->n_event, &wait);
- while (1) {
- set_current_state(TASK_INTERRUPTIBLE);
+ down (&(osb->comm_lock));
- if (ocfs_node_map_is_empty(osb, &w->n_node_map))
- break;
+ reply = (ocfs_dlm_reply_master *) dlm_msg->msg_buf;
+ reply_msg = &(reply->h);
- if (!signal_pending(current)) {
- timeout = schedule_timeout(timeout);
- if (!timeout) {
- status = -ETIMEDOUT;
- break;
- }
- continue;
- }
- status = -ERESTARTSYS;
- break;
+ /* find the original request object for this reply */
+ data.u.s.seq_num = reply_msg->lock_seq_num;
+ data.u.s.lock_id = reply_msg->lock_id;
+ data.func = ocfs_lookup_obj_by_seq;
+ data.ret = &obj;
+ status = ocfs_lookup_vote_request_obj (osb, &data);
+ if (status < 0 || obj==NULL) {
+ LOG_ERROR_STATUS (status);
+ goto bail;
}
- set_current_state(TASK_RUNNING);
- remove_wait_queue(&w->n_event, &wait);
+ spin_lock(&obj->lock);
+ if (obj->vote_state != VOTE_OBJ_STATE_SENT &&
+ obj->vote_state != VOTE_OBJ_STATE_PARTIAL_REPLY) {
+ LOG_ERROR_ARGS("bad vote reply state=%d, node=%u, lockid=%llu, seq=%llu, vote=%d\n",
+ obj->vote_state, dlm_msg->src_node,
+ reply_msg->lock_id,
+ reply_msg->lock_seq_num, reply->status);
+ status = -EINVAL;
+ goto unlock;
+ }
+
+ LOG_TRACE_ARGS("node=%u, lockid=%llu, seq=%llu, vote=%d\n",
+ dlm_msg->src_node, reply_msg->lock_id,
+ reply_msg->lock_seq_num, reply->status);
+
+ ctxt.got_vote_map = &(obj->got_vote_map);
+ ctxt.status = &(obj->vote_status);
+ ctxt.flags = reply_msg->flags;
+ ctxt.reply = reply;
+
+ ocfs_process_one_vote_reply(osb, &ctxt, dlm_msg->src_node);
+
+ if (ocfs_node_map_is_equal(&obj->got_vote_map, &obj->req_vote_map))
+ obj->vote_state = VOTE_OBJ_STATE_FULL_REPLY;
+ else
+ obj->vote_state = VOTE_OBJ_STATE_PARTIAL_REPLY;
+
+unlock:
+ // wake if complete or error
+ if (obj->vote_status < 0 || status < 0 ||
+ obj->vote_state == VOTE_OBJ_STATE_FULL_REPLY) {
+ atomic_set (&obj->voted_event_woken, 1);
+ wake_up (&obj->voted_event);
+ }
+ spin_unlock(&obj->lock);
+ ocfs_put_vote_obj (obj);
+
+bail:
+ up (&(osb->comm_lock));
+ LOG_EXIT_STATUS (status);
return status;
-}
+} /* ocfs_comm_process_vote_reply */
-static int ocfs2_broadcast_vote(ocfs_super *osb,
- ocfs2_vote_msg *request,
- unsigned int response_id)
+/*
+ * ocfs_dlm_recv_msg()
+ *
+ */
+void ocfs_dlm_recv_msg (void *val)
{
- int status, i, remote_err;
- ocfs2_net_wait_ctxt *w = NULL;
- struct inode *remote_node;
- int dequeued = 0;
+ ocfs_recv_ctxt *recv_ctxt;
+ __u8 *dlm_packet;
- LOG_ENTRY();
+ LOG_ENTRY ();
- w = ocfs2_new_net_wait_ctxt(osb, response_id);
- if (!w) {
- status = -ENOMEM;
- LOG_ERROR_STATUS(status);
- goto bail;
+ recv_ctxt = (ocfs_recv_ctxt *) val;
+ dlm_packet = (__u8 *) recv_ctxt->msg;
+
+ if (recv_ctxt->status >= 0) {
+ if (ocfs_check_ipc_msg (dlm_packet, recv_ctxt->msg_len))
+ ocfs_comm_process_msg (dlm_packet);
}
- /* we're pretty much ready to go at this point, and this fills
- * in n_response which we need anyway... */
- ocfs2_queue_net_wait_ctxt(osb, w);
+ kfree(recv_ctxt);
- i = ocfs_node_map_iterate(osb, &osb->mounted_map, 0);
+ LOG_EXIT ();
+ return;
+} /* ocfs_dlm_recv_msg */
- while (i != OCFS_INVALID_NODE_NUM) {
- if (i != osb->node_num) {
- LOG_TRACE_ARGS("trying to send request to node %i\n",
- i);
- ocfs_node_map_set_bit(osb, &w->n_node_map, i);
+/*
+ * ocfs_comm_process_msg()
+ *
+ */
+int ocfs_comm_process_msg (__u8 * msg)
+{
+ int status = 0;
+ ocfs_super *osb = NULL;
+ ocfs_dlm_msg *dlm_msg;
+ ocfs_dlm_req_master *req_master;
+ struct list_head *iter_osb, *temp_iter;
+ __s16 src_node;
- remote_node = nm_get_node_by_num(i);
- if (!remote_node) {
- status = -EINVAL;
- LOG_ERROR_STATUS(status);
- goto bail;
- }
+ LOG_ENTRY ();
- remote_err = 0;
- status = net_send_message(OCFS2_MESSAGE_TYPE_VOTE,
- osb->net_key,
- request,
- sizeof(*request),
- remote_node,
- &remote_err);
- iput(remote_node);
- if (status == -ETIMEDOUT) {
- LOG_TRACE_ARGS("remote node %d timed out!\n",
- i);
- status = -EAGAIN;
- goto bail;
- }
- if (remote_err < 0) {
- status = remote_err;
- LOG_TRACE_ARGS("remote error %d on node %d!\n",
- remote_err, i);
- LOG_ERROR_STATUS(status);
- goto bail;
- }
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
- }
- i++;
- i = ocfs_node_map_iterate(osb, &osb->mounted_map, i);
- LOG_TRACE_ARGS("next is %d, i am %d\n", i, osb->node_num);
+ dlm_msg = (ocfs_dlm_msg *) msg;
+
+ down (&(OcfsGlobalCtxt.global_res));
+ list_for_each_safe (iter_osb, temp_iter, &(OcfsGlobalCtxt.osb_next)) {
+ osb = list_entry (iter_osb, ocfs_super, osb_next);
+ if (!memcmp (osb->uuid, dlm_msg->vol_id,
+ MAX_VOL_ID_LENGTH))
+ break;
+ osb = NULL;
}
- LOG_TRACE_STR("done sending, now waiting on responses...");
+ up (&(OcfsGlobalCtxt.global_res));
+
+ if (osb == NULL) {
+ LOG_ERROR_STR("Ignoring netdlm message with invalid volume id");
+ goto bail;
+ }
- status = ocfs2_wait_on_vote_responses(osb, w);
- if (status < 0) {
- if (status != -EINTR)
- LOG_ERROR_STATUS(status);
+ if (dlm_msg->src_node >= osb->max_nodes) {
+ LOG_ERROR_ARGS ("Invalid source node in netdlm message: %d\n",
+ dlm_msg->src_node);
goto bail;
}
- ocfs2_dequeue_net_wait_ctxt(osb, w);
- dequeued = 1;
- status = w->n_response;
+ if (!ocfs_node_map_test_bit(&osb->publ_map, dlm_msg->src_node)) {
+ LOG_TRACE_STR("Ignoring netdlm message from dead node");
+ goto bail;
+ }
+
+ switch (dlm_msg->msg_type) {
+ case OCFS_VOTE_REQUEST:
+ status = ocfs_process_vote (osb, dlm_msg);
+ break;
+
+ case OCFS_VOTE_REPLY:
+ ocfs_comm_process_vote_reply (osb, dlm_msg);
+ break;
+
+ case OCFS_INFO_DISMOUNT:
+ src_node = dlm_msg->src_node;
+ req_master = (ocfs_dlm_req_master *) dlm_msg->msg_buf;
+ printk ("ocfs2: Received dismount message for device (%u,%u) "
+ "from %s (node %d)\n", MAJOR(osb->sb->s_dev),
+ MINOR(osb->sb->s_dev), osb->node_cfg_info[src_node]->node_name,
+ src_node);
+ atomic_set (&(osb->vol_node_map[src_node].dismount), 1);
+ break;
+
+ default:
+ break;
+ }
+
bail:
- if (w) {
- if (!dequeued)
- ocfs2_dequeue_net_wait_ctxt(osb, w);
- kfree(w);
+ LOG_EXIT_STATUS (status);
+ return status;
+} /* ocfs_comm_process_msg */
+
+
+
+/*
+ * ocfs_send_dismount_msg()
+ *
+ */
+int ocfs_send_dismount_msg (ocfs_super * osb)
+{
+ int status = 0;
+ ocfs_dlm_msg *dlm_msg = NULL;
+ ocfs_dlm_msg_hdr *req;
+ ocfs_vote_obj *obj;
+ __u32 msg_len, obj_len;
+ ocfs_node_map map;
+
+ LOG_ENTRY_ARGS ("(osb=0x%p)\n", osb);
+
+ ocfs_node_map_dup(osb, &map, &osb->publ_map);
+ ocfs_node_map_clear_bit(&map, osb->node_num);
+
+ msg_len = sizeof (ocfs_dlm_msg) + sizeof (ocfs_dlm_req_master);
+ obj_len = sizeof (ocfs_vote_obj) + sizeof (ocfs_dlm_req_master);
+
+ obj = ocfs_alloc_vote_obj (osb, obj_len, 0, NULL);
+ if (obj == NULL) {
+ LOG_ERROR_STATUS (status = -ENOMEM);
+ goto finally;
}
+ dlm_msg = &(obj->m);
+ req = (ocfs_dlm_msg_hdr *) dlm_msg->msg_buf;
+ ocfs_init_dlm_msg (osb, dlm_msg, msg_len, OCFS_INFO_DISMOUNT);
+ req->lock_id = 0;
+ req->flags = 0;
+ req->lock_seq_num = 0;
- LOG_EXIT_STATUS(status);
+ spin_lock(&osb->vote_obj_queue_lock);
+ list_add_tail(&obj->list, &osb->vote_obj_queue);
+ spin_unlock(&osb->vote_obj_queue_lock);
+
+ ocfs_send_bcast (osb, &map, dlm_msg);
+ spin_lock (&obj->lock);
+ obj->vote_state = VOTE_OBJ_STATE_SENT;
+ spin_unlock (&obj->lock);
+
+ // silly ;-)
+ spin_lock (&obj->lock);
+ obj->vote_state = VOTE_OBJ_STATE_DESTROYING;
+ spin_unlock (&obj->lock);
+
+ spin_lock(&osb->vote_obj_queue_lock);
+ list_del(&obj->list);
+ spin_unlock(&osb->vote_obj_queue_lock);
+
+finally:
+ ocfs_put_vote_obj (obj);
+ LOG_EXIT_STATUS (status);
return status;
+} /* ocfs_send_dismount_msg */
+
+/*
+ * ocfs_init_dlm_msg()
+ *
+ */
+static void ocfs_init_dlm_msg (ocfs_super * osb, ocfs_dlm_msg * dlm_msg, __u32 msg_len, __u32 type)
+{
+ LOG_ENTRY ();
+
+ dlm_msg->magic = OCFS_DLM_MSG_MAGIC;
+ dlm_msg->msg_len = msg_len;
+ dlm_msg->src_node = osb->node_num;
+ dlm_msg->msg_type = type;
+ memcpy (dlm_msg->vol_id, osb->uuid, MAX_VOL_ID_LENGTH);
+
+ LOG_EXIT ();
+ return;
+} /* ocfs_init_dlm_msg */
+
+
+static ocfs_vote_obj * ocfs_alloc_vote_obj (ocfs_super *osb, int bytes, __u32 reqlock, ocfs_node_map *votemap)
+{
+ ocfs_vote_obj *obj = NULL;
+
+ obj = ocfs_malloc (bytes);
+ if (obj == NULL)
+ return NULL;
+
+ memset(obj, 0, bytes);
+ obj->vote_state = VOTE_OBJ_STATE_UNSENT;
+ spin_lock_init (&obj->lock);
+ atomic_set(&obj->refcount, 1);
+ atomic_set(&obj->voted_event_woken, 0);
+ init_waitqueue_head (&obj->voted_event);
+ INIT_LIST_HEAD (&obj->list);
+
+ if (votemap)
+ ocfs_node_map_dup(osb, &obj->req_vote_map, votemap);
+ else
+ ocfs_node_map_init(osb, &obj->req_vote_map);
+ ocfs_node_map_init(osb, &obj->got_vote_map);
+
+ obj->seq_num = 0ULL;
+ obj->req_lock_type = reqlock;
+ obj->vote_status = 0;
+ obj->pid = current->pid;
+
+ return obj;
}
-static int ocfs2_do_request_vote(ocfs_super *osb,
- u64 blkno,
- unsigned int generation,
- enum ocfs2_vote_request type)
+#define OCFS_DLM_NET_TIMEOUT (30000) // 30 seconds
+
+/*
+ * ocfs_send_dlm_request_msg()
+ * inode is definitely non NULL
+ */
+int ocfs_send_dlm_request_msg (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *votemap, struct inode *inode, __u32 num_ident, int *vote_status)
{
- int status;
- unsigned int response_id;
- ocfs2_vote_msg *request = NULL;
- ocfs2_msg_hdr *hdr;
+ int status = 0;
+ ocfs_dlm_msg *dlm_msg = NULL;
+ ocfs_dlm_msg_hdr *req;
+ ocfs_vote_obj *obj;
+ __u32 msg_len, obj_len;
- OCFS_ASSERT(type == OCFS2_VOTE_REQ_DELETE ||
- type == OCFS2_VOTE_REQ_UNLINK ||
- type == OCFS2_VOTE_REQ_RENAME ||
- type == OCFS2_VOTE_REQ_UMOUNT ||
- type == OCFS2_VOTE_REQ_MOUNT);
+ LOG_ENTRY_ARGS ("(osb=0x%p, id=%llu, ty=%u, fl=%u)\n",
+ osb, lock_id, lock_type, flags);
- request = kmalloc(sizeof(*request), GFP_KERNEL);
- if (!request) {
- status = -ENOMEM;
- LOG_ERROR_STATUS(status);
- goto bail;
+ msg_len = sizeof (ocfs_dlm_msg) + sizeof (ocfs_dlm_req_master);
+ obj_len = sizeof (ocfs_vote_obj) + sizeof (ocfs_dlm_req_master);
+
+ obj = ocfs_alloc_vote_obj (osb, obj_len, lock_type, votemap);
+ if (obj == NULL) {
+ LOG_ERROR_STATUS (status = -ENOMEM);
+ goto finally;
}
- memset(request, 0, sizeof(*request));
- hdr = &request->v_hdr;
+ dlm_msg = &(obj->m);
+ req = (ocfs_dlm_msg_hdr *) dlm_msg->msg_buf;
+ ocfs_init_dlm_msg (osb, dlm_msg, msg_len, OCFS_VOTE_REQUEST);
- response_id = ocfs2_new_response_id(osb);
+ spin_lock (&OcfsGlobalCtxt.comm_seq_lock);
+ req->lock_seq_num = ++OcfsGlobalCtxt.comm_seq_num;
+ obj->seq_num = req->lock_seq_num;
+ spin_unlock (&OcfsGlobalCtxt.comm_seq_lock);
- hdr->h_response_id = htonl(response_id);
- hdr->h_request = htonl(type);
- hdr->h_blkno = cpu_to_be64(blkno);
- hdr->h_generation = htonl(generation);
- hdr->h_node_num = htonl((unsigned int) osb->node_num);
+ req->lock_id = lock_id;
+ req->flags = flags;
+ req->num_ident = num_ident;
- status = ocfs2_broadcast_vote(osb, request, response_id);
- if (status < 0) {
- if (status != -EINTR)
- LOG_ERROR_STATUS(status);
- goto bail;
+#ifdef VERBOSE_LOCKING_TRACE
+ printk("ocfs_send_dlm_request_msg: inode=%p, lockid = %llu\n",
+ inode, lock_id);
+#endif
+
+ spin_lock(&osb->vote_obj_queue_lock);
+ list_add_tail(&obj->list, &osb->vote_obj_queue);
+ spin_unlock(&osb->vote_obj_queue_lock);
+
+ ocfs_send_bcast (osb, votemap, dlm_msg);
+ spin_lock (&obj->lock);
+ obj->vote_state = VOTE_OBJ_STATE_SENT;
+ spin_unlock (&obj->lock);
+ status = ocfs_wait_uninterruptible(obj->voted_event,
+ atomic_read (&obj->voted_event_woken),
+ OCFS_DLM_NET_TIMEOUT);
+
+ spin_lock (&obj->lock);
+ if (obj->vote_status >= 0 && obj->vote_state == VOTE_OBJ_STATE_FULL_REPLY) {
+ LOG_TRACE_ARGS ("OK vote, lockid=%llu\n", lock_id);
+ } else if ((obj->vote_status != -EAGAIN && obj->vote_status != -EBUSY) || obj->vote_state != VOTE_OBJ_STATE_FULL_REPLY) {
+#warning "should we even be erroring here at all!"
+ LOG_ERROR_ARGS("inode %llu, vote_status=%d, vote_state=%d, "
+ "lockid=%llu, flags = 0x%x, asked type = %u "
+ "master = %d, state = 0x%lx, type = %u\n",
+ OCFS_I(inode)->ip_blkno, obj->vote_status,
+ obj->vote_state, lock_id, flags, lock_type,
+ GET_INODE_LOCKRES(inode)->master_node_num,
+ GET_INODE_LOCKRES(inode)->readonly_state,
+ GET_INODE_LOCKRES(inode)->lock_type);
}
+ *vote_status = obj->vote_status;
+ obj->vote_state = VOTE_OBJ_STATE_DESTROYING;
+ ocfs_node_map_clear_bits(votemap, &obj->got_vote_map);
+ spin_unlock (&obj->lock);
-bail:
- if (request)
- kfree(request);
+ spin_lock(&osb->vote_obj_queue_lock);
+ list_del(&obj->list);
+ spin_unlock(&osb->vote_obj_queue_lock);
+
+ ocfs_compute_dlm_stats (status, *vote_status,
+ &(OcfsGlobalCtxt.net_reqst_stats));
+
+ ocfs_compute_dlm_stats (status, *vote_status,
+ &(osb->net_reqst_stats));
+finally:
+ ocfs_put_vote_obj (obj);
+ LOG_EXIT_STATUS (status);
return status;
-}
+} /* ocfs_send_dlm_request_msg */
-static int ocfs2_request_vote(struct inode *inode,
- enum ocfs2_vote_request type)
+
+void ocfs_process_one_vote_reply(ocfs_super *osb, ocfs_vote_reply_ctxt *ctxt, __u32 node_num)
{
int status;
- ocfs_super *osb = OCFS2_SB(inode->i_sb);
+ int reply_status;
- if (ocfs_inode_is_new(inode))
- return 0;
+ reply_status = ctxt->reply->status;
- status = -EAGAIN;
- while (status == -EAGAIN) {
- if (signal_pending(current))
- return -EINTR;
+ status = 0;
- status = ocfs2_super_lock(osb, 0);
- if (status < 0) {
- if (status != -EINTR)
- LOG_ERROR_STATUS(status);
+ switch (reply_status) {
+ case FLAG_VOTE_NODE:
+ ocfs_node_map_set_bit(ctxt->got_vote_map, node_num);
break;
- }
-
- status = 0;
- if (!ocfs_node_map_is_only(osb, &osb->mounted_map,
- osb->node_num))
- status = ocfs2_do_request_vote(osb,
- OCFS_I(inode)->ip_blkno,
- inode->i_generation,
- type);
-
- ocfs2_super_unlock(osb, 0);
+ case FLAG_VOTE_OIN_ALREADY_INUSE:
+ ocfs_node_map_set_bit(ctxt->got_vote_map, node_num);
+ status = -EINVAL;
+ if (ctxt->flags & FLAG_FILE_DELETE)
+ status = -EBUSY;
+ break;
+ case FLAG_VOTE_OIN_UPDATED:
+ status = 0;
+ ocfs_node_map_set_bit(ctxt->got_vote_map, node_num);
+ break;
+ case FLAG_VOTE_UPDATE_RETRY:
+ ocfs_node_map_set_bit(ctxt->got_vote_map, node_num);
+ status = -EAGAIN;
+ break;
+ case FLAG_VOTE_FILE_DEL:
+#warning "don't we need to set the node map bit here?"
+ status = -ENOENT;
+ break;
}
- return status;
+ *(ctxt->status) = status;
}
-int ocfs2_request_delete_vote(struct inode *inode)
+/* special case -1 for now
+ * TODO: should *really* make sure the calling func never passes -1!! */
+void ocfs_node_map_init(ocfs_super *osb, ocfs_node_map *map)
{
- return ocfs2_request_vote(inode, OCFS2_VOTE_REQ_DELETE);
+ map->num_nodes = osb->max_nodes;
+ memset(map->map, 0, BITS_TO_LONGS(OCFS_NODE_MAP_MAX_NODES) *
+ sizeof(unsigned long));
}
-int ocfs2_request_unlink_vote(struct inode *inode)
+void ocfs_node_map_set_bit(ocfs_node_map *map, int bit)
{
- return ocfs2_request_vote(inode, OCFS2_VOTE_REQ_UNLINK);
+ if (bit==-1)
+ return;
+ OCFS_ASSERT(bit < map->num_nodes);
+ set_bit(bit, map->map);
}
-int ocfs2_request_rename_vote(struct inode *inode)
+void ocfs_node_map_clear_bit(ocfs_node_map *map, int bit)
{
- return ocfs2_request_vote(inode, OCFS2_VOTE_REQ_RENAME);
+ if (bit==-1)
+ return;
+ OCFS_ASSERT(bit < map->num_nodes);
+ clear_bit(bit, map->map);
}
-int ocfs2_request_mount_vote(ocfs_super *osb)
+// clear all the bits in "target" which are set in "mask"
+void ocfs_node_map_clear_bits(ocfs_node_map *target, ocfs_node_map *mask)
{
- int status;
-
- status = -EAGAIN;
- while (status == -EAGAIN) {
- if (signal_pending(current))
- return -EINTR;
-
- if (ocfs_node_map_is_only(osb, &osb->mounted_map,
- osb->node_num))
- return 0;
-
- status = ocfs2_do_request_vote(osb, 0ULL, 0,
- OCFS2_VOTE_REQ_MOUNT);
+ int bit, prev=0;
+ while (1) {
+ bit = find_next_bit (mask->map, mask->num_nodes, prev);
+ if (bit >= mask->num_nodes)
+ break;
+ ocfs_node_map_clear_bit(target, bit);
+ prev = bit+1;
}
- return status;
}
-int ocfs2_request_umount_vote(ocfs_super *osb)
+// set all the bits in "target" which are set in "mask"
+void ocfs_node_map_set_bits(ocfs_node_map *target, ocfs_node_map *mask)
{
- int status;
-
- status = -EAGAIN;
- while (status == -EAGAIN) {
- if (signal_pending(current))
- return -EINTR;
-
- if (ocfs_node_map_is_only(osb, &osb->mounted_map,
- osb->node_num))
- return 0;
-
- status = ocfs2_do_request_vote(osb, 0ULL, 0,
- OCFS2_VOTE_REQ_UMOUNT);
+ int bit, prev=0;
+ while (1) {
+ bit = find_next_bit (mask->map, mask->num_nodes, prev);
+ if (bit >= mask->num_nodes)
+ break;
+ ocfs_node_map_set_bit(target, bit);
+ prev = bit+1;
}
- return status;
}
-/* TODO: This should eventually be a hash table! */
-static ocfs2_net_wait_ctxt * __ocfs2_find_net_wait_ctxt(ocfs_super *osb,
- u32 response_id)
+int ocfs_node_map_test_bit(ocfs_node_map *map, int bit)
{
- struct list_head *p;
- ocfs2_net_wait_ctxt *w = NULL;
-
- list_for_each(p, &osb->net_response_list) {
- w = list_entry(p, ocfs2_net_wait_ctxt, n_list);
- if (response_id == w->n_response_id)
- break;
- w = NULL;
+ if (bit >= map->num_nodes) {
+ LOG_ERROR_ARGS("bit=%d map->num_nodes=%d\n", bit, map->num_nodes);
+ BUG();
}
-
- return w;
+ return test_bit(bit, map->map);
}
-static int ocfs2_handle_response_message(net_msg *msg,
- u32 len,
- void *data)
+static int ocfs_node_map_stringify(ocfs_node_map *map, char **str)
{
- unsigned int response_id, node_num;
- int response_status;
- ocfs_super *osb = data;
- ocfs2_response_msg *resp;
- ocfs2_net_wait_ctxt * w;
+ int i, n;
+ char *s;
- resp = (ocfs2_response_msg *) msg->buf;
+ OCFS_ASSERT(map->num_nodes > 0);
- response_id = ntohl(resp->r_hdr.h_response_id);
- node_num = ntohl(resp->r_hdr.h_node_num);
- response_status = ntohl(resp->r_response);
+ *str = kmalloc( strlen("123 ") * map->num_nodes, GFP_KERNEL);
+ if (!(*str))
+ return -ENOMEM;
- LOG_TRACE_STR("recieved response message:");
- LOG_TRACE_ARGS("h_response_id = %u\n", response_id);
- LOG_TRACE_ARGS("h_request = %u\n", ntohl(resp->r_hdr.h_request));
- LOG_TRACE_ARGS("h_blkno = %llu\n", be64_to_cpu(resp->r_hdr.h_blkno));
- LOG_TRACE_ARGS("h_generation = %u\n", ntohl(resp->r_hdr.h_generation));
- LOG_TRACE_ARGS("h_node_num = %u\n", node_num);
- LOG_TRACE_ARGS("r_response = %d\n", response_status);
+ memset(*str, 0, strlen("123 ") * map->num_nodes);
- spin_lock(&osb->net_response_lock);
- w = __ocfs2_find_net_wait_ctxt(osb, response_id);
- if (!w) {
- LOG_TRACE_ARGS("request not found!\n");
- goto bail;
+ s = *str;
+ for (i=0; i<map->num_nodes; i++) {
+ if (ocfs_node_map_test_bit(map, i)) {
+ n = sprintf(s, "%3d ", i);
+ if (n != strlen("123 ")) {
+ kfree(*str);
+ return -ENOMEM;
+ }
+ s += n;
+ }
}
+ return 0;
+}
- if (response_status && (!w->n_response)) {
- /* we only really need one negative response so don't
- * set it twice. */
- w->n_response = response_status;
- }
+int ocfs_node_map_is_empty(ocfs_node_map *map)
+{
+ int bit;
+ OCFS_ASSERT(map->num_nodes > 0);
+ bit = find_next_bit(map->map, map->num_nodes, 0);
+ if (bit < map->num_nodes)
+ return 0;
+ return 1;
+}
- ocfs_node_map_clear_bit(osb, &w->n_node_map, node_num);
- if (ocfs_node_map_is_empty(osb, &w->n_node_map))
- wake_up(&w->n_event);
-bail:
- spin_unlock(&osb->net_response_lock);
+int ocfs_node_map_is_equal(ocfs_node_map *map1, ocfs_node_map *map2)
+{
+ int num_longs, i;
- return 0;
+ OCFS_ASSERT(map1->num_nodes == map2->num_nodes);
+ OCFS_ASSERT(map1->num_nodes > 0);
+
+ num_longs = BITS_TO_LONGS(map1->num_nodes);
+ for (i=0; i<num_longs; i++) {
+ if (map1->map[i] != map2->map[i])
+ return 0;
+ }
+ return 1;
}
-static int ocfs2_handle_vote_message(net_msg *msg,
- u32 len,
- void *data)
+
+void ocfs_node_map_and(ocfs_node_map *target, ocfs_node_map *mask)
{
- int status;
- ocfs_super *osb = data;
- ocfs2_vote_work *work;
+ int num_longs, i;
- work = kmalloc(sizeof(ocfs2_vote_work), GFP_KERNEL);
- if (!work) {
- status = -ENOMEM;
- LOG_ERROR_STATUS(status);
- goto bail;
- }
+ OCFS_ASSERT(target->num_nodes == mask->num_nodes);
+ OCFS_ASSERT(target->num_nodes > 0);
+
+ num_longs = BITS_TO_LONGS(target->num_nodes);
+ for (i=0; i<num_longs; i++)
+ target->map[i] &= mask->map[i];
+}
- INIT_LIST_HEAD(&work->w_list);
- memcpy(&work->w_msg, msg->buf, sizeof(ocfs2_vote_msg));
+void ocfs_node_map_set(ocfs_node_map *target, ocfs_node_map *from)
+{
+ int num_longs, i;
- LOG_TRACE_STR("scheduling vote request:\n");
- LOG_TRACE_ARGS("h_response_id = %u\n",
- work->w_msg.v_hdr.h_response_id);
- LOG_TRACE_ARGS("h_request = %u\n", work->w_msg.v_hdr.h_request);
- LOG_TRACE_ARGS("h_blkno = %llu\n", work->w_msg.v_hdr.h_blkno);
- LOG_TRACE_ARGS("h_generation = %u\n", work->w_msg.v_hdr.h_generation);
- LOG_TRACE_ARGS("h_node_num = %u\n", work->w_msg.v_hdr.h_node_num);
+ OCFS_ASSERT(target->num_nodes == from->num_nodes);
+ OCFS_ASSERT(target->num_nodes > 0);
- spin_lock(&osb->vote_task_lock);
- list_add_tail(&work->w_list, &osb->vote_list);
- osb->vote_count++;
- spin_unlock(&osb->vote_task_lock);
+ num_longs = BITS_TO_LONGS(target->num_nodes);
+ for (i=0; i<num_longs; i++)
+ target->map[i] = from->map[i];
+}
- ocfs2_kick_vote_thread(osb);
- status = 0;
-bail:
- return status;
+void ocfs_node_map_dup(ocfs_super *osb, ocfs_node_map *target, ocfs_node_map *from)
+{
+ OCFS_ASSERT(from->num_nodes > 0);
+ ocfs_node_map_init(osb, target);
+ ocfs_node_map_set(target, from);
}
-int ocfs2_register_net_handlers(ocfs_super *osb)
+/* returns 1 if bit is the only bit set in target, 0 otherwise */
+int ocfs_node_map_is_only(ocfs_super *osb, ocfs_node_map *target, int bit)
{
- int status;
- int i = MAX_VOL_ID_LENGTH - sizeof(osb->net_key);
+ ocfs_node_map temp;
+ int ret;
- memcpy(&osb->net_key, &osb->uuid[i], sizeof(osb->net_key));
- osb->net_response_ids = 0;
- spin_lock_init(&osb->net_response_lock);
- INIT_LIST_HEAD(&osb->net_response_list);
+ ocfs_node_map_dup(osb, &temp, target);
+ ocfs_node_map_clear_bit(&temp, bit);
+ ret = ocfs_node_map_is_empty(&temp);
+ return ret;
+}
- status = net_register_handler(OCFS2_MESSAGE_TYPE_RESPONSE,
- osb->net_key,
- 0,
- sizeof(ocfs2_response_msg),
- ocfs2_handle_response_message,
- osb);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
+/*
+** All structures have a type, and a size associated with it.
+** The type serves to identify the structure. The size is used for
+** consistency checking ...
+*/
+void ocfs_publish_map_set(ocfs_node_map *pubmap, int num)
+{
+ ocfs_node_map_set_bit(pubmap, num);
+}
- status = net_register_handler(OCFS2_MESSAGE_TYPE_VOTE,
- osb->net_key,
- 0,
- sizeof(ocfs2_vote_msg),
- ocfs2_handle_vote_message,
- osb);
- if (status < 0) {
- /* TODO: net_unregister here! */
- LOG_ERROR_STATUS(status);
- goto bail;
- }
+void ocfs_publish_map_clear(ocfs_node_map *pubmap, int num)
+{
+ ocfs_node_map_clear_bit(pubmap, num);
+}
-bail:
- if (status < 0)
- osb->net_key = 0;
+/* update the recovery map here */
+void ocfs_recovery_map_set(ocfs_super *osb, int num)
+{
+ spin_lock(&osb->recovery_map_lock);
+ ocfs_node_map_set_bit(&osb->recovery_map, num);
+ spin_unlock(&osb->recovery_map_lock);
+}
- return status;
+void ocfs_recovery_map_clear(ocfs_super *osb, int num)
+{
+ spin_lock(&osb->recovery_map_lock);
+ ocfs_node_map_clear_bit(&osb->recovery_map, num);
+ spin_unlock(&osb->recovery_map_lock);
}
-void ocfs2_unregister_net_handlers(ocfs_super *osb)
+int ocfs_node_is_recovering(ocfs_super *osb, int num)
+{
+ if (num == -1)
+ return 0;
+ return ocfs_node_map_test_bit(&osb->recovery_map, num);
+}
+
+int ocfs_node_is_alive(ocfs_node_map *pubmap, int index)
{
- if (!osb->net_key)
- return;
+ if (index == -1)
+ return 0;
+ return ocfs_node_map_test_bit(pubmap, index);
+}
- /* TODO: net_unregister here! */
- /* TODO: net_unregister here! */
-
- if (!list_empty(&osb->net_response_list))
- LOG_ERROR_STR("net response list not empty!\n");
-}
Modified: branches/endian/src/vote.h
===================================================================
--- branches/endian/src/vote.h 2005-01-18 21:39:45 UTC (rev 1788)
+++ branches/endian/src/vote.h 2005-01-18 21:41:46 UTC (rev 1789)
@@ -3,7 +3,7 @@
*
* vote.h
*
- * description here
+ * Function prototypes
*
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
*
@@ -23,23 +23,53 @@
* Boston, MA 021110-1307, USA.
*/
+#ifndef OCFS2_VOTE_H
+#define OCFS2_VOTE_H
-#ifndef VOTE_H
-#define VOTE_H
+int ocfs_init_udp_sock(struct socket **send_sock,
+ struct socket **recv_sock);
+int ocfs_lookup_obj_for_proc(ocfs_vote_obj *obj,
+ ocfs_vote_obj_lookup_data *data);
+int ocfs_lookup_obj_by_lockid(ocfs_vote_obj *obj,
+ ocfs_vote_obj_lookup_data *data);
+int ocfs_lookup_vote_request_obj(ocfs_super *osb,
+ ocfs_vote_obj_lookup_data *data);
+void ocfs_process_one_vote_reply(ocfs_super *osb,
+ ocfs_vote_reply_ctxt *ctxt,
+ __u32 node_num);
+int ocfs_recv_udp_msg(ocfs_recv_ctxt *recv_ctxt);
+int ocfs_send_dismount_msg(ocfs_super *osb);
+int ocfs_send_dlm_request_msg (ocfs_super * osb, __u64 lock_id,
+ __u32 lock_type, __u32 flags,
+ ocfs_node_map *votemap,
+ struct inode *inode, __u32 num_ident,
+ int *vote_status);
+int ocfs_send_vote_reply(ocfs_super *osb, ocfs_dlm_msg *dlm_msg,
+ __u32 vote_status);
+int ocfs_lookup_vote_request_obj (ocfs_super *osb,
+ ocfs_vote_obj_lookup_data *data);
-int ocfs2_vote_thread(void *arg);
-static inline void ocfs2_kick_vote_thread(ocfs_super *osb)
-{
- atomic_set(&osb->wake_vote_task, 1);
- wake_up(&osb->vote_event);
-}
+void ocfs_node_map_init(ocfs_super *osb, ocfs_node_map *map);
+void ocfs_node_map_set_bit(ocfs_node_map *map, int bit);
+void ocfs_node_map_clear_bit(ocfs_node_map *map, int bit);
+// clear all the bits in "target" which are set in "mask"
+void ocfs_node_map_clear_bits(ocfs_node_map *target, ocfs_node_map *mask);
+// set all the bits in "target" which are set in "mask"
+void ocfs_node_map_set_bits(ocfs_node_map *target, ocfs_node_map *mask);
+int ocfs_node_map_test_bit(ocfs_node_map *map, int bit);
+int ocfs_node_map_is_empty(ocfs_node_map *map);
+int ocfs_node_map_is_equal(ocfs_node_map *map1, ocfs_node_map *map2);
+void ocfs_node_map_and(ocfs_node_map *target, ocfs_node_map *mask);
+void ocfs_node_map_set(ocfs_node_map *target, ocfs_node_map *from);
+void ocfs_node_map_dup(ocfs_super *osb, ocfs_node_map *target, ocfs_node_map *from);
+/* returns 1 if bit is the only bit set in target, 0 otherwise */
+int ocfs_node_map_is_only(ocfs_super *osb, ocfs_node_map *target, int bit);
-int ocfs2_request_delete_vote(struct inode *inode);
-int ocfs2_request_unlink_vote(struct inode *inode);
-int ocfs2_request_rename_vote(struct inode *inode);
-int ocfs2_request_mount_vote(ocfs_super *osb);
-int ocfs2_request_umount_vote(ocfs_super *osb);
-int ocfs2_register_net_handlers(ocfs_super *osb);
-void ocfs2_unregister_net_handlers(ocfs_super *osb);
+int ocfs_node_is_recovering(ocfs_super *osb, int num);
+int ocfs_node_is_alive(ocfs_node_map *pubmap, int index);
+void ocfs_publish_map_set(ocfs_node_map *pubmap, int num);
+void ocfs_publish_map_clear(ocfs_node_map *pubmap, int num);
+void ocfs_recovery_map_set(ocfs_super *osb, int num);
+void ocfs_recovery_map_clear(ocfs_super *osb, int num);
-#endif
+#endif /* OCFS2_VOTE_H */
More information about the Ocfs2-commits
mailing list