[Ocfs2-devel] [PATCH] ocfs2: Add statistics for the checksum and ecc operations.
Sunil Mushran
sunil.mushran at oracle.com
Tue Jan 6 15:43:48 PST 2009
Why not one "statistics" debugfs file where we can keep adding all the
stats?
Joel Becker wrote:
> It would be nice to know how often we get checksum failures. Even
> better, how many of them we can fix with the single bit ecc. So, we add
> a statistics structure. The structure can be installed into debugfs
> wherever the user wants.
>
> For ocfs2, we'll put it in the superblock-specific debugfs directory and
> pass it down from our higher-level functions. The stats are only
> registered with debugfs when the filesystem supports metadata ecc.
>
> Signed-off-by: Joel Becker <joel.becker at oracle.com>
> ---
> fs/ocfs2/blockcheck.c | 184 ++++++++++++++++++++++++++++++++++++++++++++++--
> fs/ocfs2/blockcheck.h | 29 +++++++-
> fs/ocfs2/ocfs2.h | 4 +
> fs/ocfs2/super.c | 42 +++++++++---
> 4 files changed, 240 insertions(+), 19 deletions(-)
>
> diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c
> index 2a947c4..a1163b8 100644
> --- a/fs/ocfs2/blockcheck.c
> +++ b/fs/ocfs2/blockcheck.c
> @@ -22,6 +22,9 @@
> #include <linux/crc32.h>
> #include <linux/buffer_head.h>
> #include <linux/bitops.h>
> +#include <linux/debugfs.h>
> +#include <linux/module.h>
> +#include <linux/fs.h>
> #include <asm/byteorder.h>
>
> #include <cluster/masklog.h>
> @@ -222,6 +225,155 @@ void ocfs2_hamming_fix_block(void *data, unsigned int blocksize,
> ocfs2_hamming_fix(data, blocksize * 8, 0, fix);
> }
>
> +
> +/*
> + * Debugfs handling.
> + */
> +
> +#ifdef CONFIG_DEBUG_FS
> +
> +static int blockcheck_u64_get(void *data, u64 *val)
> +{
> + *val = *(u64 *)data;
> + return 0;
> +}
> +DEFINE_SIMPLE_ATTRIBUTE(blockcheck_fops, blockcheck_u64_get, NULL, "%llu\n");
> +
> +static struct dentry *blockcheck_debugfs_create(const char *name,
> + struct dentry *parent,
> + u64 *value)
> +{
> + return debugfs_create_file(name, S_IFREG | S_IRUSR, parent, value,
> + &blockcheck_fops);
> +}
> +
> +static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
> +{
> + if (stats) {
> + debugfs_remove(stats->b_debug_check);
> + stats->b_debug_check = NULL;
> + debugfs_remove(stats->b_debug_failure);
> + stats->b_debug_failure = NULL;
> + debugfs_remove(stats->b_debug_recover);
> + stats->b_debug_recover = NULL;
> + debugfs_remove(stats->b_debug_dir);
> + stats->b_debug_dir = NULL;
> + }
> +}
> +
> +static int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
> + struct dentry *parent)
> +{
> + int rc = -EINVAL;
> +
> + if (!stats)
> + goto out;
> +
> + stats->b_debug_dir = debugfs_create_dir("blockcheck", parent);
> + if (!stats->b_debug_dir)
> + goto out;
> +
> + stats->b_debug_check =
> + blockcheck_debugfs_create("blocks_checked",
> + stats->b_debug_dir,
> + &stats->b_check_count);
> +
> + stats->b_debug_failure =
> + blockcheck_debugfs_create("checksums_failed",
> + stats->b_debug_dir,
> + &stats->b_failure_count);
> +
> + stats->b_debug_recover =
> + blockcheck_debugfs_create("ecc_recoveries",
> + stats->b_debug_dir,
> + &stats->b_recover_count);
> + if (stats->b_debug_check && stats->b_debug_failure &&
> + stats->b_debug_recover)
> + rc = 0;
> +
> +out:
> + if (rc)
> + ocfs2_blockcheck_debug_remove(stats);
> + return rc;
> +}
> +#else
> +static inline int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
> + struct dentry *parent)
> +{
> + return 0;
> +}
> +
> +static inline void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
> +{
> +}
> +#endif /* CONFIG_DEBUG_FS */
> +
> +/* Always-called wrappers for starting and stopping the debugfs files */
> +int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats,
> + struct dentry *parent)
> +{
> + return ocfs2_blockcheck_debug_install(stats, parent);
> +}
> +
> +void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats)
> +{
> + ocfs2_blockcheck_debug_remove(stats);
> +}
> +
> +static void ocfs2_blockcheck_inc_check(struct ocfs2_blockcheck_stats *stats)
> +{
> + u64 new_count;
> +
> + if (!stats)
> + return;
> +
> + spin_lock(&stats->b_lock);
> + stats->b_check_count++;
> + new_count = stats->b_check_count;
> + spin_unlock(&stats->b_lock);
> +
> + if (!new_count)
> + mlog(ML_NOTICE, "Block check count has wrapped\n");
> +}
> +
> +static void ocfs2_blockcheck_inc_failure(struct ocfs2_blockcheck_stats *stats)
> +{
> + u64 new_count;
> +
> + if (!stats)
> + return;
> +
> + spin_lock(&stats->b_lock);
> + stats->b_failure_count++;
> + new_count = stats->b_failure_count;
> + spin_unlock(&stats->b_lock);
> +
> + if (!new_count)
> + mlog(ML_NOTICE, "Checksum failure count has wrapped\n");
> +}
> +
> +static void ocfs2_blockcheck_inc_recover(struct ocfs2_blockcheck_stats *stats)
> +{
> + u64 new_count;
> +
> + if (!stats)
> + return;
> +
> + spin_lock(&stats->b_lock);
> + stats->b_recover_count++;
> + new_count = stats->b_recover_count;
> + spin_unlock(&stats->b_lock);
> +
> + if (!new_count)
> + mlog(ML_NOTICE, "ECC recovery count has wrapped\n");
> +}
> +
> +
> +
> +/*
> + * These are the low-level APIs for using the ocfs2_block_check structure.
> + */
> +
> /*
> * This function generates check information for a block.
> * data is the block to be checked. bc is a pointer to the
> @@ -266,12 +418,15 @@ void ocfs2_block_check_compute(void *data, size_t blocksize,
> * Again, the data passed in should be the on-disk endian.
> */
> int ocfs2_block_check_validate(void *data, size_t blocksize,
> - struct ocfs2_block_check *bc)
> + struct ocfs2_block_check *bc,
> + struct ocfs2_blockcheck_stats *stats)
> {
> int rc = 0;
> struct ocfs2_block_check check;
> u32 crc, ecc;
>
> + ocfs2_blockcheck_inc_check(stats);
> +
> check.bc_crc32e = le32_to_cpu(bc->bc_crc32e);
> check.bc_ecc = le16_to_cpu(bc->bc_ecc);
>
> @@ -282,6 +437,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize,
> if (crc == check.bc_crc32e)
> goto out;
>
> + ocfs2_blockcheck_inc_failure(stats);
> mlog(ML_ERROR,
> "CRC32 failed: stored: %u, computed %u. Applying ECC.\n",
> (unsigned int)check.bc_crc32e, (unsigned int)crc);
> @@ -292,8 +448,10 @@ int ocfs2_block_check_validate(void *data, size_t blocksize,
>
> /* And check the crc32 again */
> crc = crc32_le(~0, data, blocksize);
> - if (crc == check.bc_crc32e)
> + if (crc == check.bc_crc32e) {
> + ocfs2_blockcheck_inc_recover(stats);
> goto out;
> + }
>
> mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
> (unsigned int)check.bc_crc32e, (unsigned int)crc);
> @@ -366,7 +524,8 @@ void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,
> * Again, the data passed in should be the on-disk endian.
> */
> int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
> - struct ocfs2_block_check *bc)
> + struct ocfs2_block_check *bc,
> + struct ocfs2_blockcheck_stats *stats)
> {
> int i, rc = 0;
> struct ocfs2_block_check check;
> @@ -377,6 +536,8 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
> if (!nr)
> return 0;
>
> + ocfs2_blockcheck_inc_check(stats);
> +
> check.bc_crc32e = le32_to_cpu(bc->bc_crc32e);
> check.bc_ecc = le16_to_cpu(bc->bc_ecc);
>
> @@ -388,6 +549,7 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
> if (crc == check.bc_crc32e)
> goto out;
>
> + ocfs2_blockcheck_inc_failure(stats);
> mlog(ML_ERROR,
> "CRC32 failed: stored: %u, computed %u. Applying ECC.\n",
> (unsigned int)check.bc_crc32e, (unsigned int)crc);
> @@ -416,8 +578,10 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
> /* And check the crc32 again */
> for (i = 0, crc = ~0; i < nr; i++)
> crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
> - if (crc == check.bc_crc32e)
> + if (crc == check.bc_crc32e) {
> + ocfs2_blockcheck_inc_recover(stats);
> goto out;
> + }
>
> mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
> (unsigned int)check.bc_crc32e, (unsigned int)crc);
> @@ -448,9 +612,11 @@ int ocfs2_validate_meta_ecc(struct super_block *sb, void *data,
> struct ocfs2_block_check *bc)
> {
> int rc = 0;
> + struct ocfs2_super *osb = OCFS2_SB(sb);
>
> - if (ocfs2_meta_ecc(OCFS2_SB(sb)))
> - rc = ocfs2_block_check_validate(data, sb->s_blocksize, bc);
> + if (ocfs2_meta_ecc(osb))
> + rc = ocfs2_block_check_validate(data, sb->s_blocksize, bc,
> + &osb->osb_ecc_stats);
>
> return rc;
> }
> @@ -468,9 +634,11 @@ int ocfs2_validate_meta_ecc_bhs(struct super_block *sb,
> struct ocfs2_block_check *bc)
> {
> int rc = 0;
> + struct ocfs2_super *osb = OCFS2_SB(sb);
>
> - if (ocfs2_meta_ecc(OCFS2_SB(sb)))
> - rc = ocfs2_block_check_validate_bhs(bhs, nr, bc);
> + if (ocfs2_meta_ecc(osb))
> + rc = ocfs2_block_check_validate_bhs(bhs, nr, bc,
> + &osb->osb_ecc_stats);
>
> return rc;
> }
> diff --git a/fs/ocfs2/blockcheck.h b/fs/ocfs2/blockcheck.h
> index 70ec3fe..d4b69fe 100644
> --- a/fs/ocfs2/blockcheck.h
> +++ b/fs/ocfs2/blockcheck.h
> @@ -21,6 +21,24 @@
> #define OCFS2_BLOCKCHECK_H
>
>
> +/* Count errors and error correction from blockcheck.c */
> +struct ocfs2_blockcheck_stats {
> + spinlock_t b_lock;
> + u64 b_check_count; /* Number of blocks we've checked */
> + u64 b_failure_count; /* Number of failed checksums */
> + u64 b_recover_count; /* Number of blocks fixed by ecc */
> +
> + /*
> + * debugfs entries, used if this is passed to
> + * ocfs2_blockcheck_stats_debugfs_install()
> + */
> + struct dentry *b_debug_dir; /* Parent of the debugfs files */
> + struct dentry *b_debug_check; /* Exposes b_check_count */
> + struct dentry *b_debug_failure; /* Exposes b_failure_count */
> + struct dentry *b_debug_recover; /* Exposes b_recover_count */
> +};
> +
> +
> /* High level block API */
> void ocfs2_compute_meta_ecc(struct super_block *sb, void *data,
> struct ocfs2_block_check *bc);
> @@ -37,11 +55,18 @@ int ocfs2_validate_meta_ecc_bhs(struct super_block *sb,
> void ocfs2_block_check_compute(void *data, size_t blocksize,
> struct ocfs2_block_check *bc);
> int ocfs2_block_check_validate(void *data, size_t blocksize,
> - struct ocfs2_block_check *bc);
> + struct ocfs2_block_check *bc,
> + struct ocfs2_blockcheck_stats *stats);
> void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,
> struct ocfs2_block_check *bc);
> int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
> - struct ocfs2_block_check *bc);
> + struct ocfs2_block_check *bc,
> + struct ocfs2_blockcheck_stats *stats);
> +
> +/* Debug Initialization */
> +int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats,
> + struct dentry *parent);
> +void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats);
>
> /*
> * Hamming code functions
> diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
> index ad5c24a..ba37433 100644
> --- a/fs/ocfs2/ocfs2.h
> +++ b/fs/ocfs2/ocfs2.h
> @@ -47,6 +47,9 @@
> #include "ocfs2_fs.h"
> #include "ocfs2_lockid.h"
>
> +/* For struct ocfs2_blockcheck_stats */
> +#include "blockcheck.h"
> +
> /* Most user visible OCFS2 inodes will have very few pieces of
> * metadata, but larger files (including bitmaps, etc) must be taken
> * into account when designing an access scheme. We allow a small
> @@ -297,6 +300,7 @@ struct ocfs2_super
> struct ocfs2_dinode *local_alloc_copy;
> struct ocfs2_quota_recovery *quota_rec;
>
> + struct ocfs2_blockcheck_stats osb_ecc_stats;
> struct ocfs2_alloc_stats alloc_stats;
> char dev_str[20]; /* "major,minor" of the device */
>
> diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
> index 43ed113..518b08a 100644
> --- a/fs/ocfs2/super.c
> +++ b/fs/ocfs2/super.c
> @@ -118,10 +118,12 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb);
> static int ocfs2_check_volume(struct ocfs2_super *osb);
> static int ocfs2_verify_volume(struct ocfs2_dinode *di,
> struct buffer_head *bh,
> - u32 sectsize);
> + u32 sectsize,
> + struct ocfs2_blockcheck_stats *stats);
> static int ocfs2_initialize_super(struct super_block *sb,
> struct buffer_head *bh,
> - int sector_size);
> + int sector_size,
> + struct ocfs2_blockcheck_stats *stats);
> static int ocfs2_get_sector(struct super_block *sb,
> struct buffer_head **bh,
> int block,
> @@ -539,7 +541,8 @@ out:
>
> static int ocfs2_sb_probe(struct super_block *sb,
> struct buffer_head **bh,
> - int *sector_size)
> + int *sector_size,
> + struct ocfs2_blockcheck_stats *stats)
> {
> int status, tmpstat;
> struct ocfs1_vol_disk_hdr *hdr;
> @@ -605,7 +608,8 @@ static int ocfs2_sb_probe(struct super_block *sb,
> goto bail;
> }
> di = (struct ocfs2_dinode *) (*bh)->b_data;
> - status = ocfs2_verify_volume(di, *bh, blksize);
> + memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats));
> + status = ocfs2_verify_volume(di, *bh, blksize, stats);
> if (status >= 0)
> goto bail;
> brelse(*bh);
> @@ -811,6 +815,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
> struct ocfs2_super *osb = NULL;
> struct buffer_head *bh = NULL;
> char nodestr[8];
> + struct ocfs2_blockcheck_stats stats;
>
> mlog_entry("%p, %p, %i", sb, data, silent);
>
> @@ -820,13 +825,13 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
> }
>
> /* probe for superblock */
> - status = ocfs2_sb_probe(sb, &bh, §or_size);
> + status = ocfs2_sb_probe(sb, &bh, §or_size, &stats);
> if (status < 0) {
> mlog(ML_ERROR, "superblock probe failed!\n");
> goto read_super_error;
> }
>
> - status = ocfs2_initialize_super(sb, bh, sector_size);
> + status = ocfs2_initialize_super(sb, bh, sector_size, &stats);
> osb = OCFS2_SB(sb);
> if (status < 0) {
> mlog_errno(status);
> @@ -926,6 +931,18 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
> goto read_super_error;
> }
>
> + if (ocfs2_meta_ecc(osb)) {
> + status = ocfs2_blockcheck_stats_debugfs_install(
> + &osb->osb_ecc_stats,
> + osb->osb_debug_root);
> + if (status) {
> + mlog(ML_ERROR,
> + "Unable to create blockcheck statistics "
> + "files\n");
> + goto read_super_error;
> + }
> + }
> +
> status = ocfs2_mount_volume(sb);
> if (osb->root_inode)
> inode = igrab(osb->root_inode);
> @@ -1656,6 +1673,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
> if (osb->cconn)
> ocfs2_dlm_shutdown(osb, hangup_needed);
>
> + ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats);
> debugfs_remove(osb->osb_debug_root);
>
> if (hangup_needed)
> @@ -1703,7 +1721,8 @@ static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uu
>
> static int ocfs2_initialize_super(struct super_block *sb,
> struct buffer_head *bh,
> - int sector_size)
> + int sector_size,
> + struct ocfs2_blockcheck_stats *stats)
> {
> int status;
> int i, cbits, bbits;
> @@ -1755,6 +1774,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
> atomic_set(&osb->alloc_stats.bg_allocs, 0);
> atomic_set(&osb->alloc_stats.bg_extends, 0);
>
> + /* Copy the blockcheck stats from the superblock probe */
> + osb->osb_ecc_stats = *stats;
> +
> ocfs2_init_node_maps(osb);
>
> snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
> @@ -1982,7 +2004,8 @@ bail:
> */
> static int ocfs2_verify_volume(struct ocfs2_dinode *di,
> struct buffer_head *bh,
> - u32 blksz)
> + u32 blksz,
> + struct ocfs2_blockcheck_stats *stats)
> {
> int status = -EAGAIN;
>
> @@ -1995,7 +2018,8 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
> OCFS2_FEATURE_INCOMPAT_META_ECC) {
> status = ocfs2_block_check_validate(bh->b_data,
> bh->b_size,
> - &di->i_check);
> + &di->i_check,
> + stats);
> if (status)
> goto out;
> }
>
More information about the Ocfs2-devel
mailing list