[Ocfs2-devel] [PATCH] ocfs2: fix dead lock risk when kmalloc failed in dlm_query_region_handler
Andrew Morton
akpm at linux-foundation.org
Thu Mar 20 16:41:12 PDT 2014
On Thu, 20 Mar 2014 12:25:14 +0800 Joseph Qi <joseph.qi at huawei.com> wrote:
> From: Zhonghua Guo <guozhonghua at h3c.com>
>
> In dlm_query_region_handler(), once kmalloc failed, it will unlock
> dlm_domain_lock without lock first, then dead lock happens.
> This patch adds a flag domain_locked to fix this issue.
>
> ...
>
> --- a/fs/ocfs2/dlm/dlmdomain.c
> +++ b/fs/ocfs2/dlm/dlmdomain.c
> @@ -1124,6 +1124,7 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
> char *local = NULL;
> int status = 0;
> int locked = 0;
> + int domain_locked = 0;
>
> qr = (struct dlm_query_region *) msg->buf;
>
> @@ -1140,6 +1141,7 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
> status = -EINVAL;
>
> spin_lock(&dlm_domain_lock);
> + domain_locked = 1;
> dlm = __dlm_lookup_domain_full(qr->qr_domain, qr->qr_namelen);
> if (!dlm) {
> mlog(ML_ERROR, "Node %d queried hb regions on domain %s "
> @@ -1171,7 +1173,8 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
> bail:
> if (locked)
> spin_unlock(&dlm->spinlock);
> - spin_unlock(&dlm_domain_lock);
> + if (domain_locked)
> + spin_unlock(&dlm_domain_lock);
>
> kfree(local);
That makes the code messier, by adding another local and testing it.
We can instead make the code cleaner by adopting conventional error
unwinding techniques:
--- a/fs/ocfs2/dlm/dlmdomain.c~ocfs2-fix-dead-lock-risk-when-kmalloc-failed-in-dlm_query_region_handler
+++ a/fs/ocfs2/dlm/dlmdomain.c
@@ -1123,7 +1123,6 @@ static int dlm_query_region_handler(stru
struct dlm_ctxt *dlm = NULL;
char *local = NULL;
int status = 0;
- int locked = 0;
qr = (struct dlm_query_region *) msg->buf;
@@ -1132,10 +1131,8 @@ static int dlm_query_region_handler(stru
/* buffer used in dlm_mast_regions() */
local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL);
- if (!local) {
- status = -ENOMEM;
- goto bail;
- }
+ if (!local)
+ return -ENOMEM;
status = -EINVAL;
@@ -1144,16 +1141,15 @@ static int dlm_query_region_handler(stru
if (!dlm) {
mlog(ML_ERROR, "Node %d queried hb regions on domain %s "
"before join domain\n", qr->qr_node, qr->qr_domain);
- goto bail;
+ goto out_domain_lock;
}
spin_lock(&dlm->spinlock);
- locked = 1;
if (dlm->joining_node != qr->qr_node) {
mlog(ML_ERROR, "Node %d queried hb regions on domain %s "
"but joining node is %d\n", qr->qr_node, qr->qr_domain,
dlm->joining_node);
- goto bail;
+ goto out_dlm_lock;
}
/* Support for global heartbeat was added in 1.1 */
@@ -1163,14 +1159,14 @@ static int dlm_query_region_handler(stru
"but active dlm protocol is %d.%d\n", qr->qr_node,
qr->qr_domain, dlm->dlm_locking_proto.pv_major,
dlm->dlm_locking_proto.pv_minor);
- goto bail;
+ goto out_dlm_lock;
}
status = dlm_match_regions(dlm, qr, local, sizeof(qr->qr_regions));
-bail:
- if (locked)
- spin_unlock(&dlm->spinlock);
+out_dlm_lock:
+ spin_unlock(&dlm->spinlock);
+out_domain_lock:
spin_unlock(&dlm_domain_lock);
kfree(local);
Please review and test this then send it back at me?
More information about the Ocfs2-devel
mailing list