[OracleOSS] [TitleIndex] [WordIndex]

OCFS2/DesignDocs/BlockErrorDetection/ocfs2-tools-ecc-patch

ocfs2-tools-ecc.patch

Index: Config.make.in
===================================================================
--- Config.make.in      (revision 1204)
+++ Config.make.in      (revision 1283)
@@ -51,6 +51,7 @@ LDFLAGS = @LDFLAGS@ 
 COM_ERR_CFLAGS = @COM_ERR_CFLAGS@
 COM_ERR_LIBS = @COM_ERR_LIBS@
 UUID_LIBS = @UUID_LIBS@
+ZLIB_LIBS = @ZLIB_LIBS@
 
 GLIB_CFLAGS = @GLIB_CFLAGS@
 GLIB_LIBS = @GLIB_LIBS@
Index: libocfs2/newdir.c
===================================================================
--- libocfs2/newdir.c   (revision 1204)
+++ libocfs2/newdir.c   (revision 1283)
@@ -43,6 +43,7 @@ errcode_t ocfs2_new_dir_block(ocfs2_file
                              uint64_t parent_ino, char **block)
 {
        struct ocfs2_dir_entry  *dir;
+       struct ocfs2_dir_check_entry *dc;
        errcode_t               ret;
        char                    *buf;
 
@@ -55,6 +56,14 @@ errcode_t ocfs2_new_dir_block(ocfs2_file
        dir = (struct ocfs2_dir_entry *) buf;
        dir->rec_len = fs->fs_blocksize;
 
+       if (OCFS2_HAS_INCOMPAT_FEATURE(OCFS2_RAW_SB(fs->fs_super),
+                                      OCFS2_FEATURE_INCOMPAT_BLOCK_CHECK)) {
+               dc = (struct ocfs2_dir_check_entry *)dir;
+               memset(dc, 0, sizeof(struct ocfs2_dir_check_entry));
+               dc->rec_len = OCFS2_DIR_REC_LEN(sizeof(struct ocfs2_block_check));
+               dir = (struct ocfs2_dir_entry *) (((char *)dc) + dc->rec_len);
+       }
+
        if (dir_ino) {
                /*
                 * Set up entry for '.'
@@ -68,9 +77,9 @@ errcode_t ocfs2_new_dir_block(ocfs2_file
                /*
                 * Set up entry for '..'
                 */
-               dir = (struct ocfs2_dir_entry *) (buf + dir->rec_len);
+               dir = (struct ocfs2_dir_entry *) (((char *)dir) + dir->rec_len);
                dir->inode = parent_ino;
-               dir->rec_len = fs->fs_blocksize - OCFS2_DIR_REC_LEN(1);
+               dir->rec_len = fs->fs_blocksize - (buf - (char *)dir);
                dir->name_len = 2;
                dir->file_type = OCFS2_FT_DIR;
                dir->name[0] = '.';
Index: libocfs2/Makefile
===================================================================
--- libocfs2/Makefile   (revision 1204)
+++ libocfs2/Makefile   (revision 1283)
@@ -46,6 +46,7 @@ CFILES =              \
        alloc.c         \
        bitmap.c        \
        bitops.c        \
+       blockcheck.c    \
        cached_inode.c  \
        chain.c         \
        chainalloc.c    \
@@ -85,6 +86,7 @@ CFILES =              \
 HFILES =                               \
        include/bitmap.h                \
        include/bitops.h                \
+       include/blockcheck.h            \
        include/byteorder.h             \
        include/dir_iterate.h           \
        include/dir_util.h              \
Index: libocfs2/inode.c
===================================================================
--- libocfs2/inode.c    (revision 1204)
+++ libocfs2/inode.c    (revision 1283)
@@ -32,6 +32,7 @@
 #include <inttypes.h>
 
 #include "ocfs2.h"
+#include "blockcheck.h"
 
 
 errcode_t ocfs2_check_directory(ocfs2_filesys *fs, uint64_t dir)
@@ -234,6 +235,11 @@ errcode_t ocfs2_read_inode(ocfs2_filesys
                   strlen(OCFS2_INODE_SIGNATURE)))
                goto out;
 
+       /* XXX: Someone tell me if I've got the endianess of this right */
+       ret = ocfs2_block_check_validate(fs, di, &di->i_check);
+       if (ret)
+               goto out;  /* XXX: Should we change EIO to a specific et? */
+
        memcpy(inode_buf, blk, fs->fs_blocksize);
 
        di = (struct ocfs2_dinode *) inode_buf;
@@ -269,6 +275,7 @@ errcode_t ocfs2_write_inode(ocfs2_filesy
        di = (struct ocfs2_dinode *)blk;
        ocfs2_swap_inode_from_cpu(di);
 
+       ocfs2_block_check_compute(fs, di, &di->i_check);
        ret = io_write_block(fs->fs_io, blkno, 1, blk);
        if (ret)
                goto out;
Index: libocfs2/include/ocfs2.h
===================================================================
--- libocfs2/include/ocfs2.h    (revision 1204)
+++ libocfs2/include/ocfs2.h    (revision 1283)
@@ -48,6 +48,7 @@
 
 #include "byteorder.h"
 
+#define OCFS2_SB(sb) (sb)
 
 #if OCFS2_FLAT_INCLUDES
 #include "o2dlm.h"
Index: libocfs2/include/ocfs2_fs.h
===================================================================
--- libocfs2/include/ocfs2_fs.h (revision 1204)
+++ libocfs2/include/ocfs2_fs.h (revision 1283)
@@ -85,9 +85,8 @@
 #define OCFS2_CLEAR_INCOMPAT_FEATURE(sb,mask)                  \
        OCFS2_SB(sb)->s_feature_incompat &= ~(mask)
 
-#define OCFS2_FEATURE_COMPAT_SUPP      0
-#define OCFS2_FEATURE_INCOMPAT_SUPP    0
-#define OCFS2_FEATURE_RO_COMPAT_SUPP   0
+/* 802.3 crc32 + ECC */
+#define OCFS2_FEATURE_INCOMPAT_BLOCK_CHECK     0x0001
 
 /*
  * Heartbeat-only devices are missing journals and other files.  The
@@ -97,6 +96,11 @@
 #define OCFS2_FEATURE_INCOMPAT_HEARTBEAT_DEV   0x0002
 
 
+#define OCFS2_FEATURE_COMPAT_SUPP      0
+#define OCFS2_FEATURE_INCOMPAT_SUPP    (OCFS2_FEATURE_INCOMPAT_BLOCK_CHECK)
+#define OCFS2_FEATURE_RO_COMPAT_SUPP   0
+
+
 /*
  * Flags on ocfs2_dinode.i_flags
  */
@@ -234,6 +238,21 @@ static unsigned char ocfs2_type_by_mode[
 #define OCFS2_RAW_SB(dinode)           (&((dinode)->id2.i_super))
 
 /*
+ * Metadata block check structure.  If OCFS2_FEATURE_INCOMPAT_BLOCK_CHECK
+ * is not set, it is all zeros.
+ */
+struct ocfs2_block_check {
+/*00*/ __le32 bc_crc32e;       /* 802.3 Ethernet II CRC32 */
+       __le16 bc_ecc;          /* Single-error-correction parity vector.
+                                  This is a simple Hamming code dependant
+                                  on the blocksize.  OCFS2's maximum 
+                                  blocksize, 4K, requires 16 parity bits,
+                                  so we fit in __le16. */
+       __le16 bc_reserved1;
+/*08*/
+};
+
+/*
  * On disk extent record for OCFS2
  * It describes a range of clusters on disk.
  */
@@ -245,14 +264,16 @@ struct ocfs2_extent_rec {
 };
 
 struct ocfs2_chain_rec {
-       __le32 c_free;  /* Number of free bits in this chain. */
+/*00*/ __le32 c_free;  /* Number of free bits in this chain. */
        __le32 c_total; /* Number of total bits in this chain */
        __le64 c_blkno; /* Physical disk offset (blocks) of 1st group */
+/*10*/
 };
 
 struct ocfs2_truncate_rec {
-       __le32 t_start;         /* 1st cluster in this log */
+/*00*/ __le32 t_start;         /* 1st cluster in this log */
        __le32 t_clusters;      /* Number of total clusters covered */
+/*08*/
 };
 
 /*
@@ -306,14 +327,14 @@ struct ocfs2_truncate_log {
 struct ocfs2_extent_block
 {
 /*00*/ __u8 h_signature[8];            /* Signature for verification */
-       __le64 h_reserved1;
+       struct ocfs2_block_check h_check;       /* Error checking */
 /*10*/ __le16 h_suballoc_slot;         /* Slot suballocator this
                                           extent_header belongs to */
        __le16 h_suballoc_bit;          /* Bit offset in suballocator
                                           block group */
        __le32 h_fs_generation;         /* Must match super block */
        __le64 h_blkno;                 /* Offset on disk, in blocks */
-/*20*/ __le64 h_reserved3;
+/*20*/ __le64 h_reserved1;
        __le64 h_next_leaf_blk;         /* Offset on disk, in blocks,
                                           of next leaf header pointing
                                           to data */
@@ -380,7 +401,7 @@ struct ocfs2_dinode {
                                           belongs to */
        __le16 i_suballoc_bit;          /* Bit offset in suballocator
                                           block group */
-/*10*/ __le32 i_reserved0;
+/*10*/ __le32 i_reserved1;
        __le32 i_clusters;              /* Cluster count */
        __le32 i_uid;                   /* Owner UID */
        __le32 i_gid;                   /* Owning GID */
@@ -399,7 +420,8 @@ struct ocfs2_dinode {
        __le32 i_atime_nsec;
        __le32 i_ctime_nsec;
        __le32 i_mtime_nsec;
-/*70*/ __le64 i_reserved1[9];
+       struct ocfs2_block_check i_check;       /* Error checking */
+/*70*/ __le64 i_reserved2[8];
 /*B8*/ union {
                __le64 i_pad1;          /* Generic way to refer to this
                                           64bit union */
@@ -444,6 +466,22 @@ struct ocfs2_dir_entry {
 } __attribute__ ((packed));
 
 /*
+ * Filesystems with OCFS2_FEATURE_INCOMPAT_BLOCK_CHECK enabled will
+ * have this entry as the first part of each dirblock, except the very first
+ * dirblock, where it follows '.' and '..'.
+ */
+struct ocfs2_dir_check_entry {
+/*00*/ __le64  inode;
+       __le16  rec_len;
+       __u8    name_len;
+       __u8    file_type;
+       __le32  reserved1;
+/*10*/ struct ocfs2_block_check check;
+/*18*/
+/* Actual on-disk length specified by rec_len, but it will always be 0x18 */
+};
+
+/*
  * On disk allocator group structure for OCFS2
  */
 struct ocfs2_group_desc
@@ -462,8 +500,10 @@ struct ocfs2_group_desc
 /*20*/ __le64   bg_parent_dinode;       /* dinode which owns me, in
                                           blocks */
        __le64   bg_blkno;               /* Offset on disk, in blocks */
-/*30*/ __le64   bg_reserved2[2];
+/*30*/ struct ocfs2_block_check bg_check;      /* Error checking */
+       __le64   bg_reserved2;
 /*40*/ __u8    bg_bitmap[0];
+/* Actual on-disk length is one block */
 };
 
 #ifdef __KERNEL__
Index: libocfs2/include/blockcheck.h
===================================================================
--- libocfs2/include/blockcheck.h       (revision 0)
+++ libocfs2/include/blockcheck.h       (revision 1283)
@@ -0,0 +1,41 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * blockcheck.h
+ *
+ * Checksum and ECC codes for the OCFS2 userspace library.
+ *
+ * Copyright (C) 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License, version 2,  as published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Joel Becker
+ */
+
+#ifndef _BLOCKCHECK_H
+#define _BLOCKCHECK_H
+
+extern void ocfs2_hamming_encode(unsigned char *data, unsigned int d,
+                                uint32_t *parity);
+extern void ocfs2_hamming_fix(unsigned char *data, unsigned int d,
+                             unsigned int fix);
+extern uint32_t crc32_le(uint32_t crc, unsigned char const *p, size_t len);
+extern void ocfs2_block_check_compute(ocfs2_filesys *fs,
+                                     void *data,
+                                     struct ocfs2_block_check *bc);
+extern errcode_t ocfs2_block_check_validate(ocfs2_filesys *fs,
+                                           void *data,
+                                           struct ocfs2_block_check *bc);
+#endif
Index: libocfs2/chain.c
===================================================================
--- libocfs2/chain.c    (revision 1204)
+++ libocfs2/chain.c    (revision 1283)
@@ -28,6 +28,7 @@
 #include <string.h>
 
 #include "ocfs2.h"
+#include "blockcheck.h"
 
 void ocfs2_swap_group_desc(struct ocfs2_group_desc *gd)
 {
@@ -70,9 +71,14 @@ errcode_t ocfs2_read_group_desc(ocfs2_fi
                   strlen(OCFS2_GROUP_DESC_SIGNATURE)))
                goto out;
 
+       ret = ocfs2_block_check_validate(fs, gd, &gd->bg_check);
+       if (ret)
+               goto out;
+
        memcpy(gd_buf, blk, fs->fs_blocksize);
 
        gd = (struct ocfs2_group_desc *)gd_buf;
+       
        ocfs2_swap_group_desc(gd);
 
        ret = 0;
@@ -105,6 +111,7 @@ errcode_t ocfs2_write_group_desc(ocfs2_f
        gd = (struct ocfs2_group_desc *)blk;
        ocfs2_swap_group_desc(gd);
 
+       ocfs2_block_check_compute(fs, gd, &gd->bg_check);
        ret = io_write_block(fs->fs_io, blkno, 1, blk);
        if (ret)
                goto out;
Index: libocfs2/blockcheck.c
===================================================================
--- libocfs2/blockcheck.c       (revision 0)
+++ libocfs2/blockcheck.c       (revision 1283)
@@ -0,0 +1,308 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * blockcheck.c
+ *
+ * Checksum and ECC codes for the OCFS2 userspace library.
+ *
+ * Copyright (C) 2006 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License, version 2,  as published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ *   The 802.3 CRC32 algorithm is copied from the Linux kernel, lib/crc32.c.
+ *   Code was from the public domain, is now GPL, so no real copyright
+ *   attribution other than "The Linux Kernel".  XXX: better text, anyone?
+ */
+
+#define _XOPEN_SOURCE 600 /* Triggers magic in features.h */
+#define _LARGEFILE64_SOURCE
+
+#include <inttypes.h>
+
+#include "ocfs2.h"
+
+#include "bitops.h"
+#include "blockcheck.h"
+
+
+
+static inline unsigned int hc_hweight32(unsigned int w)
+{
+       unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
+       res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
+       res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
+       res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
+       return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
+}
+
+/*
+ * We use the following conventions:
+ *
+ * d = # data bits
+ * p = # parity bits
+ * c = # total code bits (d + p)
+ */
+static int calc_parity_bits(unsigned int d)
+{
+       unsigned int p;
+
+       /*
+        * Bits required for Single Error Correction is as follows: 
+        *
+        * d + p + 1 <= 2^p
+        *
+        * We're restricting ourselves to 31 bits of parity, that should be
+        * sufficient.
+        */
+       for (p = 1; p < 32; p++)
+       {
+               if ((d + p + 1) <= (1 << p))
+                       return p;
+       }
+
+       return 0;
+}
+
+/*
+ * Calculate the bit offset in the hamming code buffer based on the bit's
+ * offset in the data buffer.  Since the hamming code reserves all
+ * power-of-two bits for parity, the data bit number and the code bit
+ * number are offest by all the parity bits beforehand.
+ *
+ * Recall that bit numbers in hamming code are 1-based.  This function
+ * takes the 0-based data bit from the caller.
+ *
+ * An example.  Take bit 1 of the data buffer.  1 is a power of two (2^0),
+ * so it's a parity bit.  2 is a power of two (2^1), so it's a parity bit.
+ * 3 is not a power of two.  So bit 1 of the data buffer ends up as bit 3
+ * in the code buffer.
+ */
+static unsigned int calc_code_bit(unsigned int i)
+{
+       unsigned int b, p;
+
+       /*
+        * Data bits are 0-based, but we're talking code bits, which
+        * are 1-based.
+        */
+       b = i + 1;
+
+       /*
+        * For every power of two below our bit number, bump our bit.
+        *
+        * We compare with (b + 1) becuase we have to compare with what b
+        * would be _if_ it were bumped up by the parity bit.  Capice?
+        */
+       for (p = 0; (1 << p) < (b + 1); p++)
+               b++;
+
+       return b;
+}
+
+/* XXX: Not endian safe? */
+void ocfs2_hamming_encode(unsigned char *data, unsigned int d,
+                         uint32_t *parity)
+{
+       unsigned int p = calc_parity_bits(d);
+       unsigned int i, j, b;
+       unsigned int plist[p];  /* XXX: We use a simple array to avoid ugly
+                                  bitops on *parity.  Perhaps this makes us
+                                  slower? */
+
+       if (!p)
+               abort();
+
+       *parity = 0;
+       memset(plist, 0, sizeof(unsigned int) * p);
+
+       /*
+        * b is the hamming code bit number.  Hamming code specifies a
+        * 1-based array, but C uses 0-based.  So 'i' is for C, and 'b' is
+        * for the algorithm.
+        *
+        * The i++ in the for loop is so that the start offset passed
+        * to ocfs2_find_next_bit_set() is one greater than the previously
+        * found bit.
+        */
+       for (i = 0; (i = ocfs2_find_next_bit_set(data, d, i)) < d; i++)
+       {
+               
+               b = calc_code_bit(i);
+
+               for (j = 0; j < p; j++)
+               {
+                       /*
+                        * Data bits in the resultant code are checked by
+                        * parity bits that are part of the bit number
+                        * representation.  Huh?
+                        *
+                        * <wikipedia href="http://en.wikipedia.org/wiki/Hamming_code">
+                        * In other words, the parity bit at position 2^k
+                        * checks bits in positions having bit k set in
+                        * their binary representation.  Conversely, for
+                        * instance, bit 13, i.e. 1101(2), is checked by
+                        * bits 1000(2) = 8, 0100(2)=4 and 0001(2) = 1.
+                        * </wikipedia>
+                        *
+                        * Note that 'k' is the _code_ bit number.  'b' in
+                        * our loop.
+                        */
+                       if (b & (1 << j))
+                               plist[j] ^= 1;
+               }
+       }
+
+       /* Now fill *parity */
+       for (j = 0; j < p; j++)
+               if (plist[j])
+                       ocfs2_set_bit(j, parity);
+}
+
+void ocfs2_hamming_fix(unsigned char *data, unsigned int d,
+                      unsigned int fix)
+{
+       unsigned int p = calc_parity_bits(d);
+       unsigned int i, b;
+
+       if (!p)
+               abort();
+
+       /*
+        * If the bit to fix has an hweight of 1, it's a parity bit.  One
+        * busted parity bit is its own error.  Nothing to do here.
+        */
+       if (hc_hweight32(fix) == 1)
+               return;
+
+       /* See hamming_encode() for a description of 'b' */
+       for (i = 0, b = 1; i < d; i++, b++)
+       {
+               /* Skip past parity bits */
+               while (hc_hweight32(b) == 1)
+                       b++; 
+
+               if (b == fix)
+               {
+                       if (ocfs2_test_bit(i, data))
+                               ocfs2_clear_bit(i, data);
+                       else
+                               ocfs2_set_bit(i, data);
+                       break;
+               }
+       }
+}
+
+/*
+ * table-less crc32_le() stolen from the kernel.  The kernel's slowest
+ * version :-)
+ *
+ * RFC 3385 shows that the 802.3 crc32 (this one) has the same properties
+ * and probabilities as crc32c (which iSCSI uses) for data blocks < 2^16
+ * bits.  We fit.
+ */
+
+/*
+ * There are multiple 16-bit CRC polynomials in common use, but this is
+ * *the* standard CRC-32 polynomial, first popularized by Ethernet.
+ * x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0
+ */
+#define CRCPOLY_LE 0xedb88320
+
+/**
+ * crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32
+ * @crc - seed value for computation.  ~0 for Ethernet, sometimes 0 for
+ *        other uses, or the previous crc32 value if computing incrementally.
+ * @p   - pointer to buffer over which CRC is run
+ * @len - length of buffer @p
+ * 
+ */
+uint32_t crc32_le(uint32_t crc, unsigned char const *p, size_t len)
+{
+       int i;
+       while (len--) {
+               crc ^= *p++;
+               for (i = 0; i < 8; i++)
+                       crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0);
+       }
+       return crc;
+}
+
+/*
+ * This function generates check information for a block.
+ * data is the block to be checked.  bc is a pointer to the
+ * ocfs2_block_check structure describing the crc32 and the ecc.
+ *
+ * bc should be a pointer inside data, as the function will
+ * take care of zeroing it before calculating the check information.  If
+ * bc does not point inside data, the caller must make sure any inline
+ * ocfs2_block_check structures are zeroed.
+ */
+void ocfs2_block_check_compute(ocfs2_filesys *fs, void *data,
+                              struct ocfs2_block_check *bc)
+{
+       uint32_t crc, ecc;
+
+       /* XXX: Caller has already swapped the inode, so this needs
+        * endian fixup */
+       if (!OCFS2_HAS_INCOMPAT_FEATURE(OCFS2_RAW_SB(fs->fs_super),
+                                       OCFS2_FEATURE_INCOMPAT_BLOCK_CHECK))
+               return;
+
+       memset(bc, 0, sizeof(struct ocfs2_block_check));
+
+       /* XXX: I think crc32_le() might need bitreverse() */
+       crc = crc32_le(~0, data, fs->fs_blocksize);
+       ocfs2_hamming_encode(data, fs->fs_blocksize, &ecc);
+
+       bc->bc_crc32e = crc;
+       bc->bc_ecc = (uint16_t)ecc;  /* We know it's max 16 bits */
+}
+
+/*
+ * This function validates existing check information.  Like _compute,
+ * the function will take care of zeroing bc before calculating check codes.
+ * If bc is not a pointer inside data, the caller must have zeroed any
+ * inline ocfs2_block_check structures.
+ */
+errcode_t ocfs2_block_check_validate(ocfs2_filesys *fs, void *data,
+                                    struct ocfs2_block_check *bc)
+{
+       struct ocfs2_block_check check = *bc;
+       uint32_t crc, ecc;
+
+       /* XXX: Caller has already swapped the inode, so this needs
+        * endian fixup */
+       if (!OCFS2_HAS_INCOMPAT_FEATURE(OCFS2_RAW_SB(fs->fs_super),
+                                       OCFS2_FEATURE_INCOMPAT_BLOCK_CHECK))
+               return 0;
+
+       memset(bc, 0, sizeof(struct ocfs2_block_check));
+
+       /* Fast path - if the crc32 validates, we're good to go */
+       crc = crc32_le(~0, data, fs->fs_blocksize);
+       if (crc == check.bc_crc32e)
+               return 0;
+
+       /* Ok, try ECC fixups */
+       ocfs2_hamming_encode(data, fs->fs_blocksize, &ecc);
+       ocfs2_hamming_fix(data, fs->fs_blocksize, ecc ^ check.bc_ecc);
+
+       /* And check the crc32 again */
+       crc = crc32_le(~0, data, fs->fs_blocksize);
+       if (crc == check.bc_crc32e)
+               return 0;
+
+       return OCFS2_ET_IO;
+}
Index: libocfs2/dirblock.c
===================================================================
--- libocfs2/dirblock.c (revision 1204)
+++ libocfs2/dirblock.c (revision 1283)
@@ -31,6 +31,7 @@
 #include <string.h>
 
 #include "ocfs2.h"
+#include "blockcheck.h"
 
 static void ocfs2_swap_dir_entry(struct ocfs2_dir_entry *dirent)
 {
@@ -87,11 +88,17 @@ errcode_t ocfs2_read_dir_block(ocfs2_fil
                                void *buf)
 {
        errcode_t       retval;
+       struct ocfs2_dir_check_entry *dc;
 
        retval = io_read_block(fs->fs_io, block, 1, buf);
        if (retval)
                return retval;
 
+       dc = (struct ocfs2_dir_check_entry *)buf;
+       retval = ocfs2_block_check_validate(fs, buf, &dc->check);
+       if (retval)
+               return retval;
+
        return ocfs2_swap_dir_entries_to_cpu(buf, fs->fs_blocksize);
 }
 
@@ -100,6 +107,7 @@ errcode_t ocfs2_write_dir_block(ocfs2_fi
 {
        errcode_t       retval;
        char            *buf = NULL;
+       struct ocfs2_dir_check_entry *dc;
 
        retval = ocfs2_malloc_block(fs->fs_io, &buf);
        if (retval)
@@ -111,6 +119,8 @@ errcode_t ocfs2_write_dir_block(ocfs2_fi
        if (retval)
                goto out;
        
+       dc = (struct ocfs2_dir_check_entry *) buf;
+       ocfs2_block_check_compute(fs, buf, &dc->check);
        retval = io_write_block(fs->fs_io, block, 1, buf);
 out:
        ocfs2_free(&buf);
Index: libocfs2/extents.c
===================================================================
--- libocfs2/extents.c  (revision 1204)
+++ libocfs2/extents.c  (revision 1283)
@@ -33,6 +33,7 @@
 #include <inttypes.h>
 
 #include "ocfs2.h"
+#include "blockcheck.h"
 
 static void ocfs2_swap_extent_list_primary(struct ocfs2_extent_list *el)
 {
@@ -126,9 +127,16 @@ errcode_t ocfs2_read_extent_block_nochec
                goto out;
        }
 
+       /* XXX: fsck seems to expect all extent block corruption to be
+        * ignored, so let's ask zach what to do here */
+       ret = ocfs2_block_check_validate(fs, eb, &eb->h_check);
+       if (ret)
+               goto out;
+
        memcpy(eb_buf, blk, fs->fs_blocksize);
 
        eb = (struct ocfs2_extent_block *) eb_buf;
+
        ocfs2_swap_extent_block_to_cpu(eb);
 
 out:
@@ -175,6 +183,7 @@ errcode_t ocfs2_write_extent_block(ocfs2
        eb = (struct ocfs2_extent_block *) blk;
        ocfs2_swap_extent_block_from_cpu(eb);
 
+       ocfs2_block_check_compute(fs, eb, &eb->h_check);
        ret = io_write_block(fs->fs_io, blkno, 1, blk);
        if (ret)
                goto out;
Index: extras/hamming.c
===================================================================
--- extras/hamming.c    (revision 0)
+++ extras/hamming.c    (revision 1283)
@@ -0,0 +1,214 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <inttypes.h>
+
+/* Adler32 test */
+#include <zlib.h>
+
+#include "ocfs2.h"
+
+#include "bitops.h"
+#include "blockcheck.h"
+
+
+static int fill_buf(unsigned char *data, int blocksize)
+{
+    int fd;
+    ssize_t ret;
+
+    fd = open("/dev/urandom", O_RDONLY);
+    if (fd < 0)
+    {
+        fprintf(stderr, "Unable to open\n");
+        return 1;
+    }
+
+    ret = read(fd, data, blocksize);
+    close(fd);
+    if (ret != blocksize)
+    {
+        fprintf(stderr, "short read\n");
+        return 1;
+    }
+
+    memset(data + 0xC0, 0, blocksize - 0xC0);
+
+    return 0;
+}
+
+/* So time(1) can tell us we're slow! */
+static int time_encode(unsigned char *data, int blocksize, int count)
+{
+    int i;
+    unsigned int d = blocksize * 8;
+    uint32_t parity;
+
+    for (i = 0; i < count; i++)
+        ocfs2_hamming_encode(data, d, &parity);
+
+    return 0;
+}
+
+static int time_cpy(unsigned char *data, int blocksize, int count)
+{
+    int i, j;
+    char *dest;
+
+    dest = malloc(blocksize);
+    if (!dest)
+        return 1;
+
+    for (i = 0; i < count; i++)
+        /*
+         * This is purposely inefficient - memcpy(3) is way fast
+         * ... later ... and still, way fast ...
+         */
+        for (j = 0; j < blocksize; j++)
+            dest[j] = data[j];
+
+    free(dest);
+
+    return 0;
+}
+
+static int time_crc32(unsigned char *data, int blocksize, int count)
+{
+    int i;
+    unsigned long crc_val;
+
+    for (i = 0; i < count; i++)
+    {
+#if 0  /* zlib crc32 */
+        crc_val = crc32(0L, Z_NULL, 0);
+        crc_val = crc32(crc_val, (unsigned char *)data, blocksize);
+#endif
+        crc_val = crc32_le(~0, data, blocksize);
+    }
+
+    return 0;
+}
+
+#ifdef HAVE_ZLIB
+static int time_adler32(unsigned char *data, int blocksize, int count)
+{
+    int i;
+    unsigned long adler_val;
+
+    for (i = 0; i < count; i++)
+    {
+        adler_val = adler32(0L, Z_NULL, 0);
+        adler_val = adler32(adler_val, data, blocksize);
+    }
+
+    return 0;
+}
+#endif
+
+
+static int test_buf(unsigned char *data, int blocksize)
+{
+    int i, rc = 1;
+    unsigned int d = blocksize * 8;
+    unsigned char *broken;
+    uint32_t parity, parity_broken;
+
+    ocfs2_hamming_encode(data, d, &parity);
+
+    broken = malloc(blocksize);
+    if (!broken)
+        goto out;
+
+    for (i = 0; i < d; i++) {
+        fprintf(stdout, "Testing bit %d... ", i);
+
+        memcpy(broken, data, blocksize);
+        if (ocfs2_test_bit(i, broken))
+            ocfs2_clear_bit(i, broken);
+        else
+            ocfs2_set_bit(i, broken);
+
+        /* The twiddled bit should break things */
+        if (!memcmp(data, broken, blocksize))
+        {
+            fprintf(stdout, "Failed\n");
+            fprintf(stderr, "Breaking didn't!\n");
+            goto out_free;
+        }
+
+        ocfs2_hamming_encode(broken, d, &parity_broken);
+        ocfs2_hamming_fix(broken, d, parity ^ parity_broken);
+
+        /* Now it should work */
+        if (memcmp(data, broken, blocksize))
+        {
+            fprintf(stdout, "Failed\n");
+            fprintf(stderr, "Fix didn't!\n");
+            goto out_free;
+        }
+
+        fprintf(stdout, "Pass\n");
+    }
+
+    rc = 0;
+
+out_free:
+    free(broken);
+
+out:
+    return rc;
+}
+
+
+int main(int argc, char *argv[])
+{
+    int rc = 1;
+    int blocksize = 0;
+    unsigned char *data;
+
+    if (argc < 2)
+        goto out;
+
+    blocksize = atoi(argv[1]);
+
+    data = malloc(blocksize);
+    if (!data)
+        goto out;
+
+    if (fill_buf(data, blocksize))
+        goto out_free;
+
+    if (argc < 3 || !strcmp(argv[2], "test"))
+        if (test_buf(data, blocksize))
+            goto out_free;
+
+    if (argc < 4)
+        goto out_free;
+
+    if (!strcmp(argv[2], "encode"))
+        time_encode(data, blocksize, atoi(argv[3]));
+    else if (!strcmp(argv[2], "cpy"))
+        time_cpy(data, blocksize, atoi(argv[3]));
+#ifdef HAVE_ZLIB
+    else if (!strcmp(argv[2], "adler32"))
+        time_adler32(data, blocksize, atoi(argv[3]));
+#endif
+    else if (!strcmp(argv[2], "crc32"))
+        time_crc32(data, blocksize, atoi(argv[3]));
+    else
+        goto out_free;
+
+
+    rc = 0;
+
+out_free:
+    free(data);
+
+out:
+    return rc;
+}
Index: extras/Makefile
===================================================================
--- extras/Makefile     (revision 1204)
+++ extras/Makefile     (revision 1283)
@@ -11,7 +11,7 @@ endif
 
 CFLAGS = $(OPTS) $(WARNINGS) 
 
-UNINST_PROGRAMS = find_hardlinks find_dup_extents find_inode_paths set_random_bits decode_lockres encode_lockres mark_journal_dirty
+UNINST_PROGRAMS = find_hardlinks find_dup_extents find_inode_paths set_random_bits decode_lockres encode_lockres mark_journal_dirty hamming
 
 INCLUDES = -I../libocfs2/include -I$(TOPDIR)/libo2dlm/include -I$(TOPDIR)/libo2cb/include
 
@@ -28,8 +28,13 @@ SET_RANDOM_BITS_CFILES = set_random_bits
 DECODE_LOCKRES_CFILES = decode_lockres.c
 ENCODE_LOCKRES_CFILES = encode_lockres.c
 MARK_JOURNAL_DIRTY_CFILES = mark_journal_dirty.c
+HAMMING_CFILES = hamming.c
 
-DIST_FILES = $(FIND_HARDLINKS_CFILES) $(FIND_DUP_EXTENTS_CFILES) $(FIND_INODE_PATHS_CFILES) $(SET_RANDOM_BITS_CFILES) $(DECODE_LOCKRES_CFILES) $(ENCODE_LOCKRES_CFILES) $(MARK_JOURNAL_DIRTY_CFILES)
+ifneq ($(ZLIB_LIBS),)
+  hamming_CFLAGS = -DHAVE_ZLIB
+endif
+
+DIST_FILES = $(FIND_HARDLINKS_CFILES) $(FIND_DUP_EXTENTS_CFILES) $(FIND_INODE_PATHS_CFILES) $(SET_RANDOM_BITS_CFILES) $(DECODE_LOCKRES_CFILES) $(ENCODE_LOCKRES_CFILES) $(MARK_JOURNAL_DIRTY_CFILES) $(HAMMING_CFILES)
 
 FIND_HARDLINKS_OBJS = $(subst .c,.o,$(FIND_HARDLINKS_CFILES))
 FIND_DUP_EXTENTS_OBJS = $(subst .c,.o,$(FIND_DUP_EXTENTS_CFILES))
@@ -38,6 +43,7 @@ SET_RANDOM_BITS_OBJS  = $(subst .c,.o,$(
 DECODE_LOCKRES_OBJS  = $(subst .c,.o,$(DECODE_LOCKRES_CFILES))
 ENCODE_LOCKRES_OBJS  = $(subst .c,.o,$(ENCODE_LOCKRES_CFILES))
 MARK_JOURNAL_DIRTY_OBJS = $(subst .c,.o,$(MARK_JOURNAL_DIRTY_CFILES))
+HAMMING_OBJS = $(subst .c,.o,$(HAMMING_CFILES))
 LIBOCFS2 = ../libocfs2/libocfs2.a
 EXTRAS_LIBS = $(LIBOCFS2) $(COM_ERR_LIBS)
 
@@ -62,4 +68,7 @@ encode_lockres: $(ENCODE_LOCKRES_OBJS) $
 mark_journal_dirty: $(MARK_JOURNAL_DIRTY_OBJS) $(LIBOCFS2)
        $(LINK) $(EXTRAS_LIBS)
 
+hamming: $(HAMMING_OBJS) $(LIBOCFS2)
+       $(LINK) $(EXTRAS_LIBS) $(ZLIB_LIBS)
+
 include $(TOPDIR)/Postamble.make

Property changes on: extras
___________________________________________________________________
Name: svn:ignore
   - .*.sw?
find_hardlinks
find_dup_extents
find_inode_paths
set_random_bits
decode_lockres
mark_journal_dirty
encode_lockres
*.d

   + .*.sw?
find_hardlinks
find_dup_extents
find_inode_paths
set_random_bits
decode_lockres
mark_journal_dirty
encode_lockres
hamming
*.d


Index: tunefs.ocfs2/tunefs.c
===================================================================
--- tunefs.ocfs2/tunefs.c       (revision 1204)
+++ tunefs.ocfs2/tunefs.c       (revision 1283)
@@ -45,13 +45,14 @@
 #include <ctype.h>
 #include <signal.h>
 
-#include <ocfs2.h>
-#include <ocfs2_fs.h>
-#include <ocfs1_fs_compat.h>
+#include "ocfs2.h"
+#include "ocfs2_fs.h"
+#include "ocfs1_fs_compat.h"
+#include "bitops.h"
 
-#include <jbd.h>
+#include "jbd.h"
 
-#include <kernel-list.h>
+#include "kernel-list.h"
 
 #define SYSTEM_FILE_NAME_MAX   40
 
@@ -74,6 +75,8 @@ typedef struct _ocfs2_tune_opts {
        int prompt;
        time_t tune_time;
        int fd;
+       uint32_t feature_incompat;
+       uint32_t no_feature_incompat;
 } ocfs2_tune_opts;
 
 static ocfs2_tune_opts opts;
@@ -82,11 +85,11 @@ static int cluster_locked = 0;
 
 static void usage(const char *progname)
 {
-       fprintf(stderr, "usage: %s [-N number-of-node-slots] "
-                       "[-L volume-label]\n"
-                       "\t[-J journal-options] [-S volume-size] [-qvV] "
-                       "device\n",
-                       progname);
+       fprintf(stderr,
+               "usage: %s [-N number-of-node-slots] [-L volume-label]\n"
+               "       %*s [-J journal-options] [-S volume-size] [-O [^]feature[,...]]\n"
+               "       %*s [-qvV] device \n",
+               progname, strlen(progname), " ", strlen(progname), " ");
        exit(0);
 }
 
@@ -117,12 +120,12 @@ static void handle_signal(int sig)
 /* Call this with SIG_BLOCK to block and SIG_UNBLOCK to unblock */
 static void block_signals(int how)
 {
-     sigset_t sigs;
+        sigset_t sigs;
 
-     sigfillset(&sigs);
-     sigdelset(&sigs, SIGTRAP);
-     sigdelset(&sigs, SIGSEGV);
-     sigprocmask(how, &sigs, NULL);
+        sigfillset(&sigs);
+        sigdelset(&sigs, SIGTRAP);
+        sigdelset(&sigs, SIGSEGV);
+        sigprocmask(how, &sigs, NULL);
 }
 
 static int get_number(char *arg, uint64_t *res)
@@ -167,6 +170,51 @@ static int get_number(char *arg, uint64_
        return 0;
 }
 
+/*
+ * This big switch is so we store the "feature name" -> "feature field"
+ * mapping just once.
+ */
+static void set_feature(char *feature, int yes)
+{
+       uint32_t *incompatp = yes ? &opts.feature_incompat :
+               &opts.no_feature_incompat;
+
+       if (!strcmp(feature, "check"))
+               ocfs2_set_bit(OCFS2_FEATURE_INCOMPAT_BLOCK_CHECK,
+                             incompatp);
+       else {
+               fprintf(stderr, "%s: Invalid feature: %s\n",
+                       opts.progname, feature);
+               exit(1);
+       }
+}
+
+static void parse_features(const char *features)
+{
+       char *p, *n;
+
+       p = strdup(features);
+       if (!p) {
+               fprintf(stderr, "%s: Unable to allocate memory\n",
+                       opts.progname);
+               exit(1);
+       }
+
+       while (p) {
+               n = strchr(p, ',');
+               if (n) {
+                       *n = '\0';
+                       n++;
+               }
+               if (p[0] == '^')
+                       set_feature(p + 1, 0);
+               else
+                       set_feature(p, 1);
+
+               p = n;
+       }
+}
+
 /* derived from e2fsprogs */
 static void parse_journal_opts(char *progname, const char *opts,
                               uint64_t *journal_size_in_bytes)
@@ -241,8 +289,9 @@ static void get_options(int argc, char *
                { "verbose", 0, 0, 'v' },
                { "quiet", 0, 0, 'q' },
                { "version", 0, 0, 'V' },
-               { "journal-options", 0, 0, 'J'},
-               { "volume-size", 0, 0, 'S'},
+               { "journal-options", 1, 0, 'J'},
+               { "volume-size", 1, 0, 'S'},
+               { "feature", 1, 0, 'O'},
                { 0, 0, 0, 0}
        };
 
@@ -254,7 +303,7 @@ static void get_options(int argc, char *
        opts.prompt = 1;
 
        while (1) {
-               c = getopt_long(argc, argv, "L:N:J:S:vqVx", long_options, 
+               c = getopt_long(argc, argv, "L:N:J:S:O:vqVx", long_options, 
                                NULL);
 
                if (c == -1)
@@ -303,6 +352,10 @@ static void get_options(int argc, char *
                        opts.vol_size = val;
                        break;
 
+                       case 'O':
+                               parse_features(optarg);
+                               break;
+
                case 'v':
                        opts.verbose = 1;
                        break;
@@ -366,8 +419,8 @@ static errcode_t add_slots(ocfs2_filesys
 
                        /* create inode for system file */
                        ret = ocfs2_new_system_inode(fs, &blkno,
-                                                     ocfs2_system_inodes[i].si_mode,
-                                                     ocfs2_system_inodes[i].si_iflags);
+                                                    ocfs2_system_inodes[i].si_mode,
+                                                    ocfs2_system_inodes[i].si_iflags);
                        if (ret)
                                goto bail;
 
Index: configure.in
===================================================================
--- configure.in        (revision 1204)
+++ configure.in        (revision 1283)
@@ -97,6 +97,13 @@ AC_CHECK_HEADER(uuid/uuid.h, :,
   AC_MSG_ERROR([Unable to find uuid headers]))
 AC_SUBST(UUID_LIBS)
 
+ZLIB_LIBS=
+AC_CHECK_LIB(z, adler32, ZLIB_LIBS=-lz)
+if test "x$ZLIB_LIBS" != "x"; then
+  AC_CHECK_HEADER(zlib.h, :, ZLIB_LIBS=)
+fi
+AC_SUBST(ZLIB_LIBS)
+
 AC_MSG_CHECKING(for debug executables)
 AC_ARG_ENABLE(debugexe, [  --enable-debugexe=[yes/no]     Enable debug executables for library source files [default=no]],,enable_debugexe=no)
 OCFS2_DEBUG_EXE=

ocfs2-tools-ecc.patch


2011-12-23 01:01