[Ocfs2-devel] Patch for hash.c
John L. Villalovos
john.l.villalovos at intel.com
Mon Apr 26 14:48:58 CDT 2004
Going through hash.c to figure things out. Here is a patch with comments. Where you see FIXME: are places where I think the code needs fixing.
One thing I noticed is that it seems like in ocfs_bh_sem_hash_prune() that the logic to have it do a mandatory prune every 10 times through is broken. I put a comment in that function where to me it seems like it is not working as intended.
I also changed a goto statement into a do/while loop in ocfs_bh_sem_hash_prune_all()
John
Index: hash.c
===================================================================
--- hash.c (revision 867)
+++ hash.c (working copy)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noet sw=8 ts=8 ai tw=80 sts=0:
+ *
* hash.c
*
* lockid hash, bh sem hash, inode hash
@@ -9,12 +11,12 @@
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
- *
+ *
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
- *
+ *
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
@@ -687,19 +689,36 @@
#define ocfs_bh_sem_hash_fn(_b) \
(_hashfn((unsigned int)BH_GET_DEVICE((_b)), (_b)->b_blocknr) & ocfs_bh_hash_shift)
+/*
+ * ocfs_bh_sem_hash_init()
+ *
+ * Setup our buffer head semaphore hash table.
+ *
+ * The purpose of the buffer head semaphore hash table is to store semaphores
+ * for our buffer heads. In order to help speed up the performance of finding
+ * the correct semaphore for our buffer heads we use a hash. The way this is
+ * done is we create a table that has "buckets" of semaphores. Each bucket
+ * contains a list of semaphores for our buffer heads which all have the same
+ * hash value. When we need to find the correct semaphore for a buffer head we
+ * call the ocfs_bh_sem_lookup() function.
+ */
int ocfs_bh_sem_hash_init()
{
int i, ret;
spin_lock_init (&OcfsGlobalCtxt.bh_sem_hash_lock);
+ /* Get 4 ( = pow(2, 2) ) pages of memory. */
OcfsGlobalCtxt.bh_sem_hash = (struct list_head *)__get_free_pages(GFP_KERNEL, 2);
if (!OcfsGlobalCtxt.bh_sem_hash) {
LOG_ERROR_STR("ENOMEM allocating ocfs_bh_sem_hash");
ret = -ENOMEM;
goto bail;
}
+ /* Keep track of how many buckets we have spots for in our hash table
+ * */
OcfsGlobalCtxt.bh_sem_hash_sz = (PAGE_SIZE * 4) / sizeof(struct list_head);
-
+ /* Setup all our buckets, so that they have properly initialized lists
+ * */
for (i=OcfsGlobalCtxt.bh_sem_hash_sz-1; i>=0; i--)
INIT_LIST_HEAD(&OcfsGlobalCtxt.bh_sem_hash[i]);
@@ -728,15 +747,25 @@
return 0;
}
-
+/*
+ * ocfs_bh_sem_lookup()
+ *
+ * Lookup the semaphore for our buffer head. If one does NOT exist then we
+ * will create it and add it to the buffer head semaphore hash table.
+ */
ocfs_bh_sem * ocfs_bh_sem_lookup(struct buffer_head *bh)
{
int depth, bucket;
struct list_head *head, *iter = NULL;
ocfs_bh_sem *sem = NULL, *newsem = NULL;
+ /* Figure out which bucket our buffer head is in and then get the list
+ * of semaphores for that bucket */
bucket = ocfs_bh_sem_hash_fn(bh);
head = &OcfsGlobalCtxt.bh_sem_hash[bucket];
+
+/* We will jump to this point after we add a new semaphore for an entry that
+ * was not prexisting in the hash table */
again:
depth = 0;
spin_lock (&OcfsGlobalCtxt.bh_sem_hash_lock);
@@ -744,11 +773,20 @@
list_for_each(iter, head) {
if (++depth > OCFS_BH_SEM_HASH_PRUNE_TRIGGER) {
/* Grandma, what a long list you have? */
+ /* Make this list the one that should be pruned next
+ * time pruning is done. */
atomic_set(&OcfsGlobalCtxt.bh_sem_hash_target_bucket, bucket);
}
sem = list_entry (iter, ocfs_bh_sem, s_list);
+ /* Is this the correct semaphore for our buffer head? */
if (sem->s_blocknr == bh->b_blocknr &&
sem->s_dev == BH_GET_DEVICE(bh)) {
+ /* We have found our semaphore. */
+
+ /* We want to make sure that if nobody is currently
+ * using the semaphore that there should be no buffer
+ * head associated with it. If it isn't being used
+ * then we can use it for our buffer head */
if (atomic_read(&sem->s_refcnt)==0) {
if (sem->s_bh) {
LOG_ERROR_STR("refcount was zero but s_bh not NULL!");
@@ -757,6 +795,9 @@
get_bh(bh);
sem->s_bh = bh;
}
+ /* Make sure that the buffer head associated with the
+ * semaphore is our buffer head. If it isn't then
+ * something went wrong somewhere :( */
if (sem->s_bh != bh) {
LOG_ERROR_ARGS("bad bh_sem->bh: sem(%p,%lu,%lu), new(%p,%lu)\n",
sem->s_bh, sem->s_bh ? sem->s_bh->b_blocknr : 0,
@@ -768,8 +809,13 @@
sem = NULL;
}
+/* TODO: Should "again:" be moved here? Or should the logic of this be moved
+ * down below? */
if (newsem && !sem) {
- /* second pass, we are first to insert */
+ /* We have gone through the list a second time and did not find
+ * our list item. Though it seem obvious that we wouldn't find
+ * it in the second pass since we didn't add it to the list.
+ * So not sure why we go through the list a second time?? */
sem = newsem;
list_add(&sem->s_list, head);
get_bh(bh);
@@ -777,10 +823,14 @@
}
if (sem) {
- /* found something on first or second pass */
+ /* We have found a match, either on the first time through or
+ * we created a semaphore and added it to the list on the
+ * second time through. */
+
ocfs_bh_sem_get(sem);
if (newsem != sem) {
- /* if not just added, mru to front */
+ /* if it wasn't just added, the Most Recently Used goes
+ * to the front of the list */
list_del(&sem->s_list);
list_add(&sem->s_list, head);
}
@@ -788,17 +838,25 @@
// sem->s_bh->b_blocknr,
// buffer_modified(sem->s_bh) ? "true" : "false",
// sem->s_pid);
-
+
if (buffer_modified(sem->s_bh) && sem->s_pid == 0) {
LOG_ERROR_ARGS("found a%s sem with a modified bh but no pid!!! (block=%d)\n",
newsem != sem ? "n old" : " new",
sem->s_bh->b_blocknr);
}
} else {
- /* first pass. not found. do alloc */
+ /* We went through the list and did NOT find a match for our
+ * buffer head. So now we need to create a new semaphore and
+ * then go back and go through the list again. */
spin_unlock (&OcfsGlobalCtxt.bh_sem_hash_lock);
+ /* Create our new semaphore */
newsem = ocfs_bh_sem_alloc();
if (newsem) {
+ /* Setup the new semaphore. When a new semaphore is
+ * created we don't yet assign our buffer head to it.
+ * We just set the refcnt to 0 and set the block # and
+ * device. The buffer head will get put into place
+ * above. */
newsem->s_bh = NULL;
atomic_set(&newsem->s_refcnt, 0);
newsem->s_blocknr = bh->b_blocknr;
@@ -809,8 +867,12 @@
#ifdef BH_SEM_DEBUG
memset(&newsem->s_modifier[0], 0, 40);
#endif
+ /* Go back and look up the semaphore again. It is
+ * there now since we have just created it */
goto again;
}
+ /* ERROR. Shouldn't get here. If we did then we failed to
+ * allocate a semaphore. */
sem = NULL;
goto bail;
}
@@ -822,7 +884,7 @@
ocfs_bh_sem_free(newsem);
}
-bail:
+bail:
return sem;
}
@@ -919,8 +981,18 @@
}
-/* returns number of pruned entries */
-int ocfs_bh_sem_hash_prune()
+/*
+ * ocfs_bh_sem_hash_prune()
+ *
+ * This function will prune the semaphore hash list. If
+ * OcfsGlobalCtxt.bh_sem_hash_target_bucket is a valid bucket then the list
+ * contained in that bucket will be pruned. Otherwise if we have gone through
+ * this function more than 10 times without doing a mandatory prune then we will
+ * do a prune of a random bucket.
+ *
+ * returns number of pruned entries
+ */
+int ocfs_bh_sem_hash_prune(void)
{
unsigned int bucket;
int pruned = 0, mandatory=0;
@@ -928,16 +1000,24 @@
ocfs_bh_sem *sem = NULL;
LIST_HEAD(tmp);
+ /* Every 10 times through this function we will force a prune to be done
+ * */
atomic_inc(&OcfsGlobalCtxt.bh_sem_hash_num_iters);
if (atomic_read(&OcfsGlobalCtxt.bh_sem_hash_num_iters) > 10) {
mandatory = 1;
atomic_sub(10, &OcfsGlobalCtxt.bh_sem_hash_num_iters);
}
- /* The better to prune you with, my dear! */
+ /* Get the bucket that we are supposed to prune. If it is -1 then we
+ * don't have any bucket specified. In that case, if we have gone
+ * through 10 or more times time through this function without a
+ * mandatory prune then we will randomly pick a bucket and do a prune on
+ * it */
bucket = atomic_read(&OcfsGlobalCtxt.bh_sem_hash_target_bucket);
if (bucket == -1) {
if (mandatory) {
+ /* Pruning is mandatory. Pick a random bucket and prune
+ * it. */
get_random_bytes(&bucket, sizeof(bucket));
bucket %= OcfsGlobalCtxt.bh_sem_hash_sz;
} else {
@@ -948,6 +1028,8 @@
spin_lock(&OcfsGlobalCtxt.bh_sem_hash_lock);
+ /* FIXME: BUG: This seems to kill the mandatory pruning that is setup
+ * above */
bucket = atomic_read(&OcfsGlobalCtxt.bh_sem_hash_target_bucket);
if (bucket == -1) {
spin_unlock (&OcfsGlobalCtxt.bh_sem_hash_lock);
@@ -958,7 +1040,10 @@
head = &OcfsGlobalCtxt.bh_sem_hash[bucket];
pruned = 0;
- /* run in lru order */
+ /* We need to get the Least Recently Used (LRU) entries from our list.
+ * We look for entries that are not being used and then delete them from
+ * the list and move them into a temporary list that we will use later
+ * to actually delete the semaphores from memory. */
list_for_each_prev_safe(iter, tmpiter, head) {
sem = list_entry (iter, ocfs_bh_sem, s_list);
if (atomic_read(&sem->s_refcnt) < 1) {
@@ -973,6 +1058,8 @@
spin_unlock (&OcfsGlobalCtxt.bh_sem_hash_lock);
+ /* Go through the list of unused semaphores and free the memory being
+ * used by each semaphore */
list_for_each_safe(iter, tmpiter, &tmp) {
sem = list_entry (iter, ocfs_bh_sem, s_list);
if (sem->s_bh) {
@@ -1034,7 +1121,18 @@
return found;
} /* ocfs_bh_sem_hash_cleanup_pid() */
-/* returns number of missed entries */
+/* ocfs_bh_sem_hash_prune_all()
+ *
+ * Goes through and removes all semaphores in the semaphore hash table which are
+ * not being used.
+ *
+ * - Returns number of missed entries. Missed entries are ones that are still
+ * - being referenced and were not able to be pruned.
+ *
+ * This function has an assumption that all the entries should be available to
+ * be removed. If it finds some which are not available to be removed it will
+ * log that information.
+ */
int ocfs_bh_sem_hash_prune_all()
{
int bucket, missed;
@@ -1045,39 +1143,46 @@
missed = 0;
bucket = 0;
spin_lock(&OcfsGlobalCtxt.bh_sem_hash_lock);
+ /* Turn off pruning for the ocfs_bh_sem_hash_prune() function */
atomic_set(&OcfsGlobalCtxt.bh_sem_hash_target_bucket, -1);
-again:
- head = &OcfsGlobalCtxt.bh_sem_hash[bucket];
- /* run in lru order */
- list_for_each_prev_safe(iter, tmpiter, head) {
- sem = list_entry (iter, ocfs_bh_sem, s_list);
- if (atomic_read(&sem->s_refcnt) < 1) {
- if (sem->s_bh) {
- LOG_ERROR_ARGS("killing off bh_sem with bh still attached! block=%lu, count=%d\n",
- sem->s_bh->b_blocknr, atomic_read(&sem->s_bh->b_count));
+ /* Go through every bucket and move all the semaphores which are not
+ * being used to a temporary list. Keep a count of all the semaphores
+ * which are still being used and thus can not be moved. */
+ do {
+ head = &OcfsGlobalCtxt.bh_sem_hash[bucket];
+
+ /* run in lru order */
+ list_for_each_prev_safe(iter, tmpiter, head) {
+ sem = list_entry (iter, ocfs_bh_sem, s_list);
+ if (atomic_read(&sem->s_refcnt) < 1) {
+ if (sem->s_bh) {
+ LOG_ERROR_ARGS("killing off bh_sem with bh still attached! block=%lu, count=%d\n",
+ sem->s_bh->b_blocknr, atomic_read(&sem->s_bh->b_count));
+ }
+ list_del(&sem->s_list);
+ list_add(&sem->s_list, &tmp);
+ } else {
+ missed++;
+ LOG_TRACE_ARGS("missed block %lu, refcount %u, "
+ "pid = %u\n",
+ sem->s_blocknr,
+ sem->s_refcnt,
+ sem->s_pid);
}
- list_del(&sem->s_list);
- list_add(&sem->s_list, &tmp);
- } else {
- missed++;
- LOG_TRACE_ARGS("missed block %lu, refcount %u, "
- "pid = %u\n",
- sem->s_blocknr,
- sem->s_refcnt,
- sem->s_pid);
}
- }
+ } while ( ++bucket < OcfsGlobalCtxt.bh_sem_hash_sz );
- if (++bucket < OcfsGlobalCtxt.bh_sem_hash_sz)
- goto again;
-
LOG_TRACE_ARGS("finished pruning, missed %d entries\n", missed);
spin_unlock (&OcfsGlobalCtxt.bh_sem_hash_lock);
+ /* Go through our temporary list of semaphores which are unused and
+ * release the memory associated with them */
list_for_each_safe(iter, tmpiter, &tmp) {
sem = list_entry (iter, ocfs_bh_sem, s_list);
+ /* FIXME: REDUNDANT: This seems to duplicate the error message
+ * printed above for the same condition */
if (sem->s_bh) {
LOG_ERROR_STR("s_bh is NOT NULL");
BUG();
@@ -1868,5 +1973,3 @@
return(inode);
} /* ocfs_get_inode_from_offset */
-
-
-------------- next part --------------
Index: hash.c
===================================================================
--- hash.c (revision 867)
+++ hash.c (working copy)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noet sw=8 ts=8 ai tw=80 sts=0:
+ *
* hash.c
*
* lockid hash, bh sem hash, inode hash
@@ -9,12 +11,12 @@
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
- *
+ *
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
- *
+ *
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
@@ -687,19 +689,36 @@
#define ocfs_bh_sem_hash_fn(_b) \
(_hashfn((unsigned int)BH_GET_DEVICE((_b)), (_b)->b_blocknr) & ocfs_bh_hash_shift)
+/*
+ * ocfs_bh_sem_hash_init()
+ *
+ * Setup our buffer head semaphore hash table.
+ *
+ * The purpose of the buffer head semaphore hash table is to store semaphores
+ * for our buffer heads. In order to help speed up the performance of finding
+ * the correct semaphore for our buffer heads we use a hash. The way this is
+ * done is we create a table that has "buckets" of semaphores. Each bucket
+ * contains a list of semaphores for our buffer heads which all have the same
+ * hash value. When we need to find the correct semaphore for a buffer head we
+ * call the ocfs_bh_sem_lookup() function.
+ */
int ocfs_bh_sem_hash_init()
{
int i, ret;
spin_lock_init (&OcfsGlobalCtxt.bh_sem_hash_lock);
+ /* Get 4 ( = pow(2, 2) ) pages of memory. */
OcfsGlobalCtxt.bh_sem_hash = (struct list_head *)__get_free_pages(GFP_KERNEL, 2);
if (!OcfsGlobalCtxt.bh_sem_hash) {
LOG_ERROR_STR("ENOMEM allocating ocfs_bh_sem_hash");
ret = -ENOMEM;
goto bail;
}
+ /* Keep track of how many buckets we have spots for in our hash table
+ * */
OcfsGlobalCtxt.bh_sem_hash_sz = (PAGE_SIZE * 4) / sizeof(struct list_head);
-
+ /* Setup all our buckets, so that they have properly initialized lists
+ * */
for (i=OcfsGlobalCtxt.bh_sem_hash_sz-1; i>=0; i--)
INIT_LIST_HEAD(&OcfsGlobalCtxt.bh_sem_hash[i]);
@@ -728,15 +747,25 @@
return 0;
}
-
+/*
+ * ocfs_bh_sem_lookup()
+ *
+ * Lookup the semaphore for our buffer head. If one does NOT exist then we
+ * will create it and add it to the buffer head semaphore hash table.
+ */
ocfs_bh_sem * ocfs_bh_sem_lookup(struct buffer_head *bh)
{
int depth, bucket;
struct list_head *head, *iter = NULL;
ocfs_bh_sem *sem = NULL, *newsem = NULL;
+ /* Figure out which bucket our buffer head is in and then get the list
+ * of semaphores for that bucket */
bucket = ocfs_bh_sem_hash_fn(bh);
head = &OcfsGlobalCtxt.bh_sem_hash[bucket];
+
+/* We will jump to this point after we add a new semaphore for an entry that
+ * was not prexisting in the hash table */
again:
depth = 0;
spin_lock (&OcfsGlobalCtxt.bh_sem_hash_lock);
@@ -744,11 +773,20 @@
list_for_each(iter, head) {
if (++depth > OCFS_BH_SEM_HASH_PRUNE_TRIGGER) {
/* Grandma, what a long list you have? */
+ /* Make this list the one that should be pruned next
+ * time pruning is done. */
atomic_set(&OcfsGlobalCtxt.bh_sem_hash_target_bucket, bucket);
}
sem = list_entry (iter, ocfs_bh_sem, s_list);
+ /* Is this the correct semaphore for our buffer head? */
if (sem->s_blocknr == bh->b_blocknr &&
sem->s_dev == BH_GET_DEVICE(bh)) {
+ /* We have found our semaphore. */
+
+ /* We want to make sure that if nobody is currently
+ * using the semaphore that there should be no buffer
+ * head associated with it. If it isn't being used
+ * then we can use it for our buffer head */
if (atomic_read(&sem->s_refcnt)==0) {
if (sem->s_bh) {
LOG_ERROR_STR("refcount was zero but s_bh not NULL!");
@@ -757,6 +795,9 @@
get_bh(bh);
sem->s_bh = bh;
}
+ /* Make sure that the buffer head associated with the
+ * semaphore is our buffer head. If it isn't then
+ * something went wrong somewhere :( */
if (sem->s_bh != bh) {
LOG_ERROR_ARGS("bad bh_sem->bh: sem(%p,%lu,%lu), new(%p,%lu)\n",
sem->s_bh, sem->s_bh ? sem->s_bh->b_blocknr : 0,
@@ -768,8 +809,13 @@
sem = NULL;
}
+/* TODO: Should "again:" be moved here? Or should the logic of this be moved
+ * down below? */
if (newsem && !sem) {
- /* second pass, we are first to insert */
+ /* We have gone through the list a second time and did not find
+ * our list item. Though it seem obvious that we wouldn't find
+ * it in the second pass since we didn't add it to the list.
+ * So not sure why we go through the list a second time?? */
sem = newsem;
list_add(&sem->s_list, head);
get_bh(bh);
@@ -777,10 +823,14 @@
}
if (sem) {
- /* found something on first or second pass */
+ /* We have found a match, either on the first time through or
+ * we created a semaphore and added it to the list on the
+ * second time through. */
+
ocfs_bh_sem_get(sem);
if (newsem != sem) {
- /* if not just added, mru to front */
+ /* if it wasn't just added, the Most Recently Used goes
+ * to the front of the list */
list_del(&sem->s_list);
list_add(&sem->s_list, head);
}
@@ -788,17 +838,25 @@
// sem->s_bh->b_blocknr,
// buffer_modified(sem->s_bh) ? "true" : "false",
// sem->s_pid);
-
+
if (buffer_modified(sem->s_bh) && sem->s_pid == 0) {
LOG_ERROR_ARGS("found a%s sem with a modified bh but no pid!!! (block=%d)\n",
newsem != sem ? "n old" : " new",
sem->s_bh->b_blocknr);
}
} else {
- /* first pass. not found. do alloc */
+ /* We went through the list and did NOT find a match for our
+ * buffer head. So now we need to create a new semaphore and
+ * then go back and go through the list again. */
spin_unlock (&OcfsGlobalCtxt.bh_sem_hash_lock);
+ /* Create our new semaphore */
newsem = ocfs_bh_sem_alloc();
if (newsem) {
+ /* Setup the new semaphore. When a new semaphore is
+ * created we don't yet assign our buffer head to it.
+ * We just set the refcnt to 0 and set the block # and
+ * device. The buffer head will get put into place
+ * above. */
newsem->s_bh = NULL;
atomic_set(&newsem->s_refcnt, 0);
newsem->s_blocknr = bh->b_blocknr;
@@ -809,8 +867,12 @@
#ifdef BH_SEM_DEBUG
memset(&newsem->s_modifier[0], 0, 40);
#endif
+ /* Go back and look up the semaphore again. It is
+ * there now since we have just created it */
goto again;
}
+ /* ERROR. Shouldn't get here. If we did then we failed to
+ * allocate a semaphore. */
sem = NULL;
goto bail;
}
@@ -822,7 +884,7 @@
ocfs_bh_sem_free(newsem);
}
-bail:
+bail:
return sem;
}
@@ -919,8 +981,18 @@
}
-/* returns number of pruned entries */
-int ocfs_bh_sem_hash_prune()
+/*
+ * ocfs_bh_sem_hash_prune()
+ *
+ * This function will prune the semaphore hash list. If
+ * OcfsGlobalCtxt.bh_sem_hash_target_bucket is a valid bucket then the list
+ * contained in that bucket will be pruned. Otherwise if we have gone through
+ * this function more than 10 times without doing a mandatory prune then we will
+ * do a prune of a random bucket.
+ *
+ * returns number of pruned entries
+ */
+int ocfs_bh_sem_hash_prune(void)
{
unsigned int bucket;
int pruned = 0, mandatory=0;
@@ -928,16 +1000,24 @@
ocfs_bh_sem *sem = NULL;
LIST_HEAD(tmp);
+ /* Every 10 times through this function we will force a prune to be done
+ * */
atomic_inc(&OcfsGlobalCtxt.bh_sem_hash_num_iters);
if (atomic_read(&OcfsGlobalCtxt.bh_sem_hash_num_iters) > 10) {
mandatory = 1;
atomic_sub(10, &OcfsGlobalCtxt.bh_sem_hash_num_iters);
}
- /* The better to prune you with, my dear! */
+ /* Get the bucket that we are supposed to prune. If it is -1 then we
+ * don't have any bucket specified. In that case, if we have gone
+ * through 10 or more times time through this function without a
+ * mandatory prune then we will randomly pick a bucket and do a prune on
+ * it */
bucket = atomic_read(&OcfsGlobalCtxt.bh_sem_hash_target_bucket);
if (bucket == -1) {
if (mandatory) {
+ /* Pruning is mandatory. Pick a random bucket and prune
+ * it. */
get_random_bytes(&bucket, sizeof(bucket));
bucket %= OcfsGlobalCtxt.bh_sem_hash_sz;
} else {
@@ -948,6 +1028,8 @@
spin_lock(&OcfsGlobalCtxt.bh_sem_hash_lock);
+ /* FIXME: BUG: This seems to kill the mandatory pruning that is setup
+ * above */
bucket = atomic_read(&OcfsGlobalCtxt.bh_sem_hash_target_bucket);
if (bucket == -1) {
spin_unlock (&OcfsGlobalCtxt.bh_sem_hash_lock);
@@ -958,7 +1040,10 @@
head = &OcfsGlobalCtxt.bh_sem_hash[bucket];
pruned = 0;
- /* run in lru order */
+ /* We need to get the Least Recently Used (LRU) entries from our list.
+ * We look for entries that are not being used and then delete them from
+ * the list and move them into a temporary list that we will use later
+ * to actually delete the semaphores from memory. */
list_for_each_prev_safe(iter, tmpiter, head) {
sem = list_entry (iter, ocfs_bh_sem, s_list);
if (atomic_read(&sem->s_refcnt) < 1) {
@@ -973,6 +1058,8 @@
spin_unlock (&OcfsGlobalCtxt.bh_sem_hash_lock);
+ /* Go through the list of unused semaphores and free the memory being
+ * used by each semaphore */
list_for_each_safe(iter, tmpiter, &tmp) {
sem = list_entry (iter, ocfs_bh_sem, s_list);
if (sem->s_bh) {
@@ -1034,7 +1121,18 @@
return found;
} /* ocfs_bh_sem_hash_cleanup_pid() */
-/* returns number of missed entries */
+/* ocfs_bh_sem_hash_prune_all()
+ *
+ * Goes through and removes all semaphores in the semaphore hash table which are
+ * not being used.
+ *
+ * - Returns number of missed entries. Missed entries are ones that are still
+ * - being referenced and were not able to be pruned.
+ *
+ * This function has an assumption that all the entries should be available to
+ * be removed. If it finds some which are not available to be removed it will
+ * log that information.
+ */
int ocfs_bh_sem_hash_prune_all()
{
int bucket, missed;
@@ -1045,39 +1143,46 @@
missed = 0;
bucket = 0;
spin_lock(&OcfsGlobalCtxt.bh_sem_hash_lock);
+ /* Turn off pruning for the ocfs_bh_sem_hash_prune() function */
atomic_set(&OcfsGlobalCtxt.bh_sem_hash_target_bucket, -1);
-again:
- head = &OcfsGlobalCtxt.bh_sem_hash[bucket];
- /* run in lru order */
- list_for_each_prev_safe(iter, tmpiter, head) {
- sem = list_entry (iter, ocfs_bh_sem, s_list);
- if (atomic_read(&sem->s_refcnt) < 1) {
- if (sem->s_bh) {
- LOG_ERROR_ARGS("killing off bh_sem with bh still attached! block=%lu, count=%d\n",
- sem->s_bh->b_blocknr, atomic_read(&sem->s_bh->b_count));
+ /* Go through every bucket and move all the semaphores which are not
+ * being used to a temporary list. Keep a count of all the semaphores
+ * which are still being used and thus can not be moved. */
+ do {
+ head = &OcfsGlobalCtxt.bh_sem_hash[bucket];
+
+ /* run in lru order */
+ list_for_each_prev_safe(iter, tmpiter, head) {
+ sem = list_entry (iter, ocfs_bh_sem, s_list);
+ if (atomic_read(&sem->s_refcnt) < 1) {
+ if (sem->s_bh) {
+ LOG_ERROR_ARGS("killing off bh_sem with bh still attached! block=%lu, count=%d\n",
+ sem->s_bh->b_blocknr, atomic_read(&sem->s_bh->b_count));
+ }
+ list_del(&sem->s_list);
+ list_add(&sem->s_list, &tmp);
+ } else {
+ missed++;
+ LOG_TRACE_ARGS("missed block %lu, refcount %u, "
+ "pid = %u\n",
+ sem->s_blocknr,
+ sem->s_refcnt,
+ sem->s_pid);
}
- list_del(&sem->s_list);
- list_add(&sem->s_list, &tmp);
- } else {
- missed++;
- LOG_TRACE_ARGS("missed block %lu, refcount %u, "
- "pid = %u\n",
- sem->s_blocknr,
- sem->s_refcnt,
- sem->s_pid);
}
- }
+ } while ( ++bucket < OcfsGlobalCtxt.bh_sem_hash_sz );
- if (++bucket < OcfsGlobalCtxt.bh_sem_hash_sz)
- goto again;
-
LOG_TRACE_ARGS("finished pruning, missed %d entries\n", missed);
spin_unlock (&OcfsGlobalCtxt.bh_sem_hash_lock);
+ /* Go through our temporary list of semaphores which are unused and
+ * release the memory associated with them */
list_for_each_safe(iter, tmpiter, &tmp) {
sem = list_entry (iter, ocfs_bh_sem, s_list);
+ /* FIXME: REDUNDANT: This seems to duplicate the error message
+ * printed above for the same condition */
if (sem->s_bh) {
LOG_ERROR_STR("s_bh is NOT NULL");
BUG();
@@ -1868,5 +1973,3 @@
return(inode);
} /* ocfs_get_inode_from_offset */
-
-
More information about the Ocfs2-devel
mailing list