[Ocfs2-users] o2net_check_handshake ... advertised net protocol version 11 but 8 is required

Tomas Lavicky tomas.lavicky at racom.eu
Mon Jun 1 02:06:33 PDT 2009


Hi,
we have two node HP Proliant DL140 / Ubuntu 8.04 LTS / Heartbeat 2.1.3-2 
cluster with bunch of KVM virtual guests. Images for virtual servers are 
stored in two DRBD 8.0.11 / OCFS2 1.3.9 dual-primary mode resources.
I've run this cluster for one year yet and I need to upgrade it now. I can't 
stop cluster and upgrade both nodes at once. So I moved all virtual servers 
to one node, stopped Heartbeat, OCFS2 and DRBD and upgrade another node to 
Ubuntu 8.10. All seems to be well after reboot except I'm not able mount 
OCFS2/DRBD partitions on upgraded node:

~# mount -t ocfs2 /dev/drbd1 /drbd1/
mount.ocfs2: Transport endpoint is not connected while mounting /dev/drbd1 
on /drbd1/. Check 'dmesg' for more information on this error.

Tail of dmesg on upgraded node:

[329491.075605] (4849,0):o2net_check_handshake:1227 node rrhb-gandalfb (num 0) 
at 10.123.45.18:7777 advertised net protocol version 8 but 11 is required, 
disconnecting
[329495.106007] (7751,0):dlm_request_join:1033 ERROR: status = -107
[329495.106055] (7751,0):dlm_try_to_join_domain:1207 ERROR: status = -107
[329495.106086] (7751,0):dlm_join_domain:1485 ERROR: status = -107
[329495.106164] (7751,0):dlm_register_domain:1732 ERROR: status = -107
[329495.106198] (7751,0):o2cb_cluster_connect:302 ERROR: status = -107
[329495.106229] (7751,0):ocfs2_dlm_init:2756 ERROR: status = -107
[329495.106268] (7751,0):ocfs2_mount_volume:1208 ERROR: status = -107
[329495.106323] ocfs2: Unmounting device (147,1) on (node 0)

And on non-upgraded node:

[1626792.088584] (4888,0):o2net_check_handshake:1144 node rrhb-gandalfs (num 
1) at 10.123.45.19:7777 advertised net protocol version 11 but 8 is required, 
disconnecting

Configuration files are the same on both nodes:

#/etc/ocfs2/cluster.conf
node:
        ip_port = 7777
        ip_address = 10.123.45.18
        number = 0
        name = rrhb-gandalfb
        cluster = ocfs2
node:
        ip_port = 7777
        ip_address = 10.123.45.19
        number = 1
        name = rrhb-gandalfs
        cluster = ocfs2
cluster:
        node_count = 2
        name = ocfs2

Along to this thread 
http://oss.oracle.com/pipermail/ocfs2-users/2008-April/002769.html it could 
be kernel incompatibility problem. Is it chance to solve it anyhow with 
different kernels? I need to keep one node running continually so I'm afraid 
to upgrade it while the other one won't be OK. Thanks for your advice.

Tomas


Some other investigations:

~# /etc/init.d/o2cb status
Module "configfs": Loaded
Filesystem "configfs": Mounted
Module "ocfs2_nodemanager": Loaded
Module "ocfs2_dlm": Loaded
Module "ocfs2_dlmfs": Loaded
Module "ocfs2_stackglue": Loaded
Filesystem "ocfs2_dlmfs": Mounted
Checking O2CB cluster ocfs2: Online
Heartbeat dead threshold = 61
  Network idle timeout: 120000
  Network keepalive delay: 5000
  Network reconnect delay: 5000
Checking O2CB heartbeat: Not active

~# /etc/init.d/o2cb load

~# strace -ff -o /tmp/out o2cb_ctl -H -n ocfs2 -t cluster -a online=yes

~# cat /tmp/out.7794
execve("/sbin/o2cb_ctl", 
["o2cb_ctl", "-H", "-n", "ocfs2", "-t", "cluster", "-a", "online=yes"], [/* 
17 vars */]) = 0
brk(0)                                  = 0xecd000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
0x7f3739f06000
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or 
directory)
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
0x7f3739f04000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or 
directory)
open("/etc/ld.so.cache", O_RDONLY)      = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=17967, ...}) = 0
mmap(NULL, 17967, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f3739eff000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or 
directory)
open("/usr/lib/libglib-2.0.so.0", O_RDONLY) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0PQ\1\0\0"..., 832) = 
832
fstat(3, {st_mode=S_IFREG|0644, st_size=802936, ...}) = 0
mmap(NULL, 2900104, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 
0x7f3739a24000
mprotect(0x7f3739ae7000, 2093056, PROT_NONE) = 0
mmap(0x7f3739ce6000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|
MAP_DENYWRITE, 3, 0xc2000) = 0x7f3739ce6000
mmap(0x7f3739ce8000, 136, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|
MAP_ANONYMOUS, -1, 0) = 0x7f3739ce8000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or 
directory)
open("/lib/libcom_err.so.2", O_RDONLY)  = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\220\22\0"..., 832) = 
832
fstat(3, {st_mode=S_IFREG|0644, st_size=14496, ...}) = 0
mmap(NULL, 2109808, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 
0x7f3739820000
mprotect(0x7f3739823000, 2093056, PROT_NONE) = 0
mmap(0x7f3739a22000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|
MAP_DENYWRITE, 3, 0x2000) = 0x7f3739a22000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or 
directory)
open("/lib/libc.so.6", O_RDONLY)        = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\220\345"..., 832) = 
832
fstat(3, {st_mode=S_IFREG|0755, st_size=1502520, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
0x7f3739efe000
mmap(NULL, 3609304, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 
0x7f37394ae000
mprotect(0x7f3739617000, 2093056, PROT_NONE) = 0
mmap(0x7f3739816000, 20480, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|
MAP_DENYWRITE, 3, 0x168000) = 0x7f3739816000
mmap(0x7f373981b000, 17112, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|
MAP_ANONYMOUS, -1, 0) = 0x7f373981b000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or 
directory)
open("/lib/libpcre.so.3", O_RDONLY)     = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\340\23\0"..., 832) = 
832
fstat(3, {st_mode=S_IFREG|0644, st_size=165872, ...}) = 0
mmap(NULL, 2261152, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 
0x7f3739285000
mprotect(0x7f37392ad000, 2093056, PROT_NONE) = 0
mmap(0x7f37394ac000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|
MAP_DENYWRITE, 3, 0x27000) = 0x7f37394ac000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or 
directory)
open("/lib/libpthread.so.0", O_RDONLY)  = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\320W\0\0"..., 832) = 
832
fstat(3, {st_mode=S_IFREG|0755, st_size=130214, ...}) = 0
mmap(NULL, 2208624, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 
0x7f3739069000
mprotect(0x7f3739080000, 2093056, PROT_NONE) = 0
mmap(0x7f373927f000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|
MAP_DENYWRITE, 3, 0x16000) = 0x7f373927f000
mmap(0x7f3739281000, 13168, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|
MAP_ANONYMOUS, -1, 0) = 0x7f3739281000
close(3)                                = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
0x7f3739efd000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
0x7f3739efc000
arch_prctl(ARCH_SET_FS, 0x7f3739efc700) = 0
mprotect(0x7f373927f000, 4096, PROT_READ) = 0
mprotect(0x7f37394ac000, 4096, PROT_READ) = 0
mprotect(0x7f3739816000, 16384, PROT_READ) = 0
mprotect(0x7f3739a22000, 4096, PROT_READ) = 0
mprotect(0x7f3739ce6000, 4096, PROT_READ) = 0
mprotect(0x7f3739f07000, 4096, PROT_READ) = 0
munmap(0x7f3739eff000, 17967)           = 0
set_tid_address(0x7f3739efc790)         = 7794
set_robust_list(0x7f3739efc7a0, 0x18)   = 0
futex(0x7fff41f0866c, 0x81 /* FUTEX_??? */, 1) = 0
rt_sigaction(SIGRTMIN, {0x7f373906e660, [], SA_RESTORER|SA_SIGINFO, 
0x7f37390780f0}, NULL, 8) = 0
rt_sigaction(SIGRT_1, {0x7f373906e6f0, [], SA_RESTORER|SA_RESTART|SA_SIGINFO, 
0x7f37390780f0}, NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0
getrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM_INFINITY}) = 0
brk(0)                                  = 0xecd000
brk(0xeee000)                           = 0xeee000
open("/sys/o2cb/interface_revision", O_RDONLY) = 3
read(3, "5\n", 15)                      = 2
read(3, "", 13)                         = 0
close(3)                                = 0
stat("/sys/kernel/config", {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
statfs("/sys/kernel/config", {f_type=0x62656570, f_bsize=4096, f_blocks=0, 
f_bfree=0, f_bavail=0, f_files=0, f_ffree=0, f_fsid={0, 0}, f_namelen=255, 
f_frsize=4096}) = 0
stat("/etc/ocfs2/cluster.conf", {st_mode=S_IFREG|0644, st_size=348, ...}) = 0
open("/etc/ocfs2/cluster.conf", O_RDONLY) = 3
read(3, "#/etc/ocfs2/cluster.conf\nnode:\n "..., 4000) = 348
read(3, "", 4000)                       = 0
close(3)                                = 0
mkdir("/sys/kernel/config/cluster/ocfs2", 0755) = -1 EEXIST (File exists)
uname({sys="Linux", node="rrhb-gandalfs", ...}) = 0
mkdir("/sys/kernel/config/cluster/ocfs2/node/rrhb-gandalfb", 0755) = -1 EEXIST 
(File exists)
uname({sys="Linux", node="rrhb-gandalfs", ...}) = 0
mkdir("/sys/kernel/config/cluster/ocfs2/node/rrhb-gandalfs", 0755) = -1 EEXIST 
(File exists)
exit_group(0)                           = ?



More information about the Ocfs2-users mailing list