[Ocfs2-devel] [PATCH 3/4] re-enable "ocfs2: mount shared volume without ha stack"
Heming Zhao
heming.zhao at suse.com
Sat Jul 30 01:14:10 UTC 2022
the key different between local mount and non-clustered mount:
local mount feature (tunefs.ocfs2 --fs-features=[no]local) can't do
convert job without ha stack. non-clustered mount feature can run
totally without ha stack.
commit 912f655d78c5 ("ocfs2: mount shared volume without ha stack") had
bug, then commit c80af0c250c8f8a3c978aa5aafbe9c39b336b813 reverted it.
Let's give some explain for the issue mentioned by commit c80af0c250c8.
Under Junxiao's call trace, in __ocfs2_find_empty_slot(), the 'if'
accessment is wrong. sl_node_num could be 0 at o2cb env.
with current information, the trigger flow (base on 912f655d78c5):
1>
nodeA with 'node_num = 0' for mounting. it will succeed.
at this time, slotmap extent block will contains es_valid:1 &
es_node_num:0 for nodeA
then ocfs2_update_disk_slot() will write back slotmap info to disk.
2>
then, nodeB with 'node_num = 1' for mounting
this time, osb->node_num is 1 (set by config file), osb->preferred is
OCFS2_INVALID_SLOT (set by ocfs2_parse_options).
ocfs2_find_slot
+ ocfs2_update_slot_info //read slotmap info from disk
| + set si->si_slots[0].es_valid = 1 & si->si_slots[0].sl_node_num = 0
|
+ __ocfs2_node_num_to_slot //will return -ENOENT.
+ __ocfs2_find_empty_slot
+ if ((preferred >= 0) && (preferred < si->si_num_slots))
| fails enter this 'if' for preferred value is OCFS2_INVALID_SLOT
|
+ 'for(i = 0; i < si->si_num_slots; i++)' search slot 0
successfully.
| 'si->si_slots[0].sl_node_num' is false. trigger 'break' condition.
|
+ return slot 0.
it will cause nodeB grab nodeA journal dlm lock, then trigger hung.
How to do for this bug?
This commit re-enabled 912f655d78c5, next commit (add MMP support) will fix
the issue.
Signed-off-by: Heming Zhao <heming.zhao at suse.com>
---
fs/ocfs2/ocfs2.h | 4 +++-
fs/ocfs2/slot_map.c | 46 ++++++++++++++++++++++++++-------------------
fs/ocfs2/super.c | 21 +++++++++++++++++++++
3 files changed, 51 insertions(+), 20 deletions(-)
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 740b64238312..337527571461 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -277,6 +277,7 @@ enum ocfs2_mount_options
OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */
OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */
OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */
+ OCFS2_MOUNT_NOCLUSTER = 1 << 18, /* No cluster aware filesystem mount */
};
#define OCFS2_OSB_SOFT_RO 0x0001
@@ -672,7 +673,8 @@ static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb)
static inline int ocfs2_mount_local(struct ocfs2_super *osb)
{
- return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT);
+ return ((osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)
+ || (osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER));
}
static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index da7718cef735..0b0ae3ebb0cf 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -252,14 +252,16 @@ static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,
int i, ret = -ENOSPC;
if ((preferred >= 0) && (preferred < si->si_num_slots)) {
- if (!si->si_slots[preferred].sl_valid) {
+ if (!si->si_slots[preferred].sl_valid ||
+ !si->si_slots[preferred].sl_node_num) {
ret = preferred;
goto out;
}
}
for(i = 0; i < si->si_num_slots; i++) {
- if (!si->si_slots[i].sl_valid) {
+ if (!si->si_slots[i].sl_valid ||
+ !si->si_slots[i].sl_node_num) {
ret = i;
break;
}
@@ -454,24 +456,30 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
spin_lock(&osb->osb_lock);
ocfs2_update_slot_info(si);
- /* search for ourselves first and take the slot if it already
- * exists. Perhaps we need to mark this in a variable for our
- * own journal recovery? Possibly not, though we certainly
- * need to warn to the user */
- slot = __ocfs2_node_num_to_slot(si, osb->node_num);
- if (slot < 0) {
- /* if no slot yet, then just take 1st available
- * one. */
- slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
+ if (ocfs2_mount_local(osb))
+ /* use slot 0 directly in local mode */
+ slot = 0;
+ else {
+ /* search for ourselves first and take the slot if it already
+ * exists. Perhaps we need to mark this in a variable for our
+ * own journal recovery? Possibly not, though we certainly
+ * need to warn to the user */
+ slot = __ocfs2_node_num_to_slot(si, osb->node_num);
if (slot < 0) {
- spin_unlock(&osb->osb_lock);
- mlog(ML_ERROR, "no free slots available!\n");
- status = -EINVAL;
- goto bail;
- }
- } else
- printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already "
- "allocated to this node!\n", slot, osb->dev_str);
+ /* if no slot yet, then just take 1st available
+ * one. */
+ slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
+ if (slot < 0) {
+ spin_unlock(&osb->osb_lock);
+ mlog(ML_ERROR, "no free slots available!\n");
+ status = -EINVAL;
+ goto bail;
+ }
+ } else
+ printk(KERN_INFO "ocfs2: Slot %d on device (%s) was "
+ "already allocated to this node!\n",
+ slot, osb->dev_str);
+ }
ocfs2_set_slot(si, slot, osb->node_num);
osb->slot_num = slot;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 438be028935d..f7298816d8d9 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -172,6 +172,7 @@ enum {
Opt_dir_resv_level,
Opt_journal_async_commit,
Opt_err_cont,
+ Opt_nocluster,
Opt_err,
};
@@ -205,6 +206,7 @@ static const match_table_t tokens = {
{Opt_dir_resv_level, "dir_resv_level=%u"},
{Opt_journal_async_commit, "journal_async_commit"},
{Opt_err_cont, "errors=continue"},
+ {Opt_nocluster, "nocluster"},
{Opt_err, NULL}
};
@@ -616,6 +618,13 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
goto out;
}
+ tmp = OCFS2_MOUNT_NOCLUSTER;
+ if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
+ ret = -EINVAL;
+ mlog(ML_ERROR, "Cannot change nocluster option on remount\n");
+ goto out;
+ }
+
tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
OCFS2_MOUNT_HB_NONE;
if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
@@ -856,6 +865,7 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb,
}
if (ocfs2_userspace_stack(osb) &&
+ !(osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&
strncmp(osb->osb_cluster_stack, mopt->cluster_stack,
OCFS2_STACK_LABEL_LEN)) {
mlog(ML_ERROR,
@@ -1127,6 +1137,11 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" :
"ordered");
+ if ((osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&
+ !(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT))
+ printk(KERN_NOTICE "ocfs2: The shared device (%s) is mounted "
+ "without cluster aware mode.\n", osb->dev_str);
+
atomic_set(&osb->vol_state, VOLUME_MOUNTED);
wake_up(&osb->osb_mount_event);
@@ -1437,6 +1452,9 @@ static int ocfs2_parse_options(struct super_block *sb,
case Opt_journal_async_commit:
mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
break;
+ case Opt_nocluster:
+ mopt->mount_opt |= OCFS2_MOUNT_NOCLUSTER;
+ break;
default:
mlog(ML_ERROR,
"Unrecognized mount option \"%s\" "
@@ -1548,6 +1566,9 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root)
if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT)
seq_printf(s, ",journal_async_commit");
+ if (opts & OCFS2_MOUNT_NOCLUSTER)
+ seq_printf(s, ",nocluster");
+
return 0;
}
--
2.37.1
More information about the Ocfs2-devel
mailing list