[Ocfs2-tools-devel] [PATCH 2/4] defrag.ocfs2: Defrag files

Goldwyn Rodrigues rgoldwyn at gmail.com
Wed Nov 17 09:00:23 PST 2010


Hi Joel,

On Fri, Nov 12, 2010 at 7:22 PM, Joel Becker <Joel.Becker at oracle.com> wrote:
> On Tue, Sep 07, 2010 at 10:49:16PM -0500, Goldwyn Rodrigues wrote:
>> +static errcode_t calc_num_extents(ocfs2_filesys *fs,
>> +             struct ocfs2_extent_list *el,
>> +             uint32_t *ne)
>> +{
>> +     struct ocfs2_extent_block *eb;
>> +     struct ocfs2_extent_rec *rec;
>> +     errcode_t ret = 0;
>> +     char *buf = NULL;
>> +     int i;
>> +     uint32_t clusters;
>> +     uint32_t extents = 0;
>> +
>> +     ret = ocfs2_malloc_block(fs->fs_io, &buf);
>> +     if (ret)
>> +             goto bail;
>> +
>> +     *ne = 0;
>> +     for (i = 0; i < el->l_next_free_rec; ++i) {
>> +             rec = &(el->l_recs[i]);
>> +             clusters = ocfs2_rec_clusters(el->l_tree_depth, rec);
>> +
>> +             /*
>> +              * In a unsuccessful insertion, we may shift a tree
>> +              * add a new branch for it and do no insertion. So we
>> +              * may meet a extent block which have
>> +              * clusters == 0, this should only be happen
>> +              * in the last extent rec. */
>> +             if (!clusters && i == el->l_next_free_rec - 1)
>> +                     break;
>> +
>> +             extents = 1;
>> +
>> +             if (el->l_tree_depth) {
>> +                     memset(buf, 0, fs->fs_blocksize);
>> +
>> +                     ret = ocfs2_read_extent_block(fs, rec->e_blkno, buf);
>> +                     if (ret)
>> +                             goto bail;
>> +
>> +                     eb = (struct ocfs2_extent_block *)buf;
>> +
>> +                     ret = calc_num_extents(fs, &(eb->h_list), &extents);
>> +                     if (ret)
>> +                             goto bail;
>> +
>> +             }
>> +             *ne = *ne + extents;
>> +     }
>> +
>> +bail:
>> +     if (buf)
>> +             ocfs2_free(&buf);
>> +     return ret;
>> +}
>> +
>> +#define MAX_RECS     4096
>> +
>> +struct save_cluster {
>> +     struct {
>> +             uint64_t start;
>> +             uint32_t len;
>> +     } rec[MAX_RECS];
>> +     int num_recs;
>> +};
>
>        Please define constants and structures above all functions.
>

ok

>> +static errcode_t free_extents(ocfs2_filesys *fs, uint32_t len,
>> +             uint64_t start, void *private)
>> +{
>> +     struct save_cluster *sc = (struct save_cluster *)private;
>> +     int i;
>> +     for (i = 0; i < sc->num_recs; i++) {
>> +             if ((start == sc->rec[i].start) && (len == sc->rec[i].len)) {
>> +                     verbosef("Preserved %"PRIu64" l %d\n",
>> +                             sc->rec[i].start, sc->rec[i].len);
>> +                     return 0;
>> +             }
>> +     }
>> +     /* TODO: change call ocfs2_truncate_clusters to incorporate
>> +        refcounting */
>> +     verbosef("Generic free: %"PRIu64" l %d\n", start, len);
>> +     return ocfs2_free_clusters(fs, len, start);
>
>        This TODO is important before inclusion.

Yes, I agree. The patch to find the largest possible extent is also
required, or else defragmentation will not be as successful. I don't
think it has been included so far.

>
>> +static errcode_t copy_clusters(ocfs2_filesys *fs,
>> +             uint64_t from, uint64_t to, int n)
>> +{
>> +     char *buf = NULL;
>> +     int i, c = ocfs2_clusters_in_blocks(fs, 1);
>
>        Don't you mean ocfs2_clusters_to_blocks()?  You're trying to
> figure out how many I/O blocks for one cluster.  Also, 'c' is too short
> a name for this.  Perhaps 'ioblocks'.

ok, perhaps csize.

>
>> +     errcode_t ret = ocfs2_malloc_blocks(fs->fs_io, c, &buf);
>
>        More importantly, given that this is linear, you can allocate
> more than one cluster at a time.  I would say do this in 1MB chunks,
> regardless of cluster size.
>

Yes, I started with that, but when things got complicated, I
simplified this part. I will put that back. If I understand correctly,
this is from the point of view of efficiency.

>> +static errcode_t alloc_clusters(struct defrag_file_context *ctxt)
>> +{
>> +     errcode_t ret;
>> +     int request = ctxt->clusters_left;
>> +
>> +     if (request <= 0) {
>> +             verbosef("Requested: %d\n", request);
>> +             return OCFS2_ET_INVALID_ARGUMENT;
>> +     }
>> +
>> +     ret = ocfs2_new_clusters(ctxt->dst->dst_fs, MIN_REQUEST,
>> +                     ctxt->clusters_left, &ctxt->blkno, &ctxt->c_end);
>> +     verbosef("alloc'd %d clusters req %d/%d at %"PRIu64"\n", ctxt->c_end,
>> +                     request, ctxt->clusters_left, ctxt->blkno);
>
>        ctxt->c_end isn't the end of the range, it is the length of the
> allocation.  I'd say call it c_clusters or c_len.  c_end sounds like it
> should be "c_offset + new_clusters".

Sure, will change this.

>
>> +static int copy_file_data(ocfs2_filesys *fs,
>> +             struct ocfs2_extent_rec *rec,
>> +             int tree_depth, uint32_t ccount, uint64_t ref_blkno,
>> +             int ref_recno, void *private)
>> +{
>> +     errcode_t ret;
>> +     struct defrag_file_context *ctxt =
>> +                             (struct defrag_file_context *)private;
>> +     int nc, left, num_blocks = ocfs2_clusters_to_blocks(fs, 1);
>> +
>> +     verbosef("nc %d offset %d/%d pos %"PRIu64" blkno %"PRIu64
>> +                     " exts %d/%d\n",
>> +                     rec->e_leaf_clusters, ctxt->c_offset, ctxt->c_end,
>> +                     (uint64_t)ctxt->cpos, (uint64_t)ctxt->blkno,
>> +                     ctxt->num_extents, ctxt->orig_extents);
>> +
>> +     /* If our cluster length is more than existing
>> +        file num extents, abort or we'll make it worse! */
>> +     if (ctxt->num_extents > ctxt->orig_extents) {
>> +             verbosef("Extents exceeded orig %d now %d cpos %d\n",
>> +                     ctxt->orig_extents, ctxt->num_extents, ctxt->cpos);
>> +             ctxt->status = DEFRAG_EXTENTS_EXCEEDED;
>> +             return OCFS2_EXTENT_ABORT;
>> +     }
>> +
>> +     /* Handle holes */
>> +     if (ctxt->expected_cpos < rec->e_cpos) {
>> +             verbosef("Hole detected expected/rec cpos %d/%d\n",
>> +                             ctxt->expected_cpos, rec->e_cpos);
>> +
>> +             if (ctxt->c_offset) {
>> +                     verbosef("Adding collected extent rec p %d blk %"
>> +                             PRIu64"l %d\n", ctxt->cpos, ctxt->blkno,
>> +                             ctxt->c_offset);
>> +                     ret = ocfs2_cached_inode_insert_extent(
>> +                             ctxt->new_inode, ctxt->cpos, ctxt->blkno,
>> +                             ctxt->c_offset, 0);
>> +                     if (ret) {
>> +                             com_err(whoami, ret,
>> +                                             "while adding extent rec\n");
>> +                             ctxt->status = DEFRAG_ERROR;
>> +                             return OCFS2_EXTENT_ABORT;
>> +                     }
>> +                     ctxt->num_extents++;
>> +                     ctxt->c_offset = 0;
>> +             }
>
>        Do you trim the other end of this extent ever?  Here's what I
> mean: Let's say you have a file that looks like this:
>
>  MB    0       1       2       3       4       5
>        |extent | hole  |extent | hole  |extent |
>
> When you are at cpos 1, you try to get the largest extent you can find.
> Let's say that's 10MB.  You discover the end of the hole at cpos 2.  You
> insert your 10MB extent at 2, but there is a hole at 3.  Are you ever
> going to trim that extent, or will you leave 10MB allocated at cpos 2?
>

Yes, I call free_clusters() which discards remaining unused clusters
and resets variables in ctxt.

>
> Life's Little Instruction Book #396
>
>        "Never give anyone a fruitcake."
>

Why? We do it all the time for Christmas.


-- 
Goldwyn



More information about the Ocfs2-tools-devel mailing list