[Ocfs-users] ocfs hung

Jeremy Schneider jer1887 at asugroup.com
Fri Feb 20 17:43:39 CST 2004


correction...

in the info I attached the the last email, node 0 is locked up and node
1 seems to own all the resources node 0 needs:

dc1node1 - node 1
dc1node2 - node 0  (locked up)

[oraprod at dc1node1 ocfs]$ cat /proc/ocfs/version
1.0.9-PROD12
[oraprod at dc1node1 ocfs]$ cat /proc/ocfs/10/nodenum
1


Jeremy Schneider
Database/Systems Administrator
The ASU Group - IS Dept
email: jer1887 at asugroup.com

>>> "Jeremy Schneider" <jer1887 at asugroup.com> 02/20/2004 5:32:25 PM
>>>
having a problem with ocfs.


device /dev/sdd mounted on 2 nodes, node 0 and node 1

tried to create file /u01/oracle/prod/proddata/temp01.dbf from node 1
(ALTER TABLESPACE TEMP ADD TEMPFILE...) caused oracle server process
to
hang in a "D" state apparently trying to create the file.  the file
has
not been created yet.  If I type "ls" from node 2 in directory
/u01/oracle/prod it lists the contents of the directory -- however if
I
type "ls" from node 2 in directory /u01/oracle/prod/proddata it locks
up
in a "D" state.  From node 1 all is well.

if i were to shutdown instance 1 of RAC and unmount the /u01 partition
on u01 all processes on node 2 would immediately resume.  (this has
happened before.)

Seems like a inode or datablock locking issue and/or race condition...

it's locked up right now; i don't really know what to look for but
here's some diagnostics info - can somebody make any sense of this?  i
can leave it locked up for a day or two so if anyone would like to see
output from a debugocfs command before monday i can do that...  i
might
be able to leave it locked up until Monday afternoon we'll see how
long
before the DB locks up and someone yells.  :)

this is a consistently recurring error at my site.  we can't put RAC
into production if it's going to hang due to some race condition when
i
try to create a file.  how is it that noone else has seen this
problem?

Jeremy
Lansing, MI



[root at dc1node1 root]# uname -a
Linux dc1node1.intra.asugroup.com 2.4.9-e.27enterprise #1 SMP Tue Aug
5
15:39:21 EDT 2003 i686 unknown

[root at dc1node1 root]# rpm -qa|grep ocfs
ocfs-tools-1.0.9-12
ocfs-2.4.9-e-enterprise-1.0.9-12
ocfs-support-1.0.9-12

[root at dc1node1 root]# ls /u01/oracle/prod/proddata/
abmd01.dbf      bscd01.dbf       dddd01.dbf  hrd01.dbf   itgd01.dbf 
ontd01.dbf    rlad01.dbf
abmx01.dbf      bscx01.dbf       dddx01.dbf  hrid01.dbf  itgx01.dbf 
ontx01.dbf    rlax01.dbf
ahld01.dbf      cctd01.dbf       domd01.dbf  hrix01.dbf  jad01.dbf  
opid01.dbf    rlmd01.dbf
ahlx01.dbf      cctx01.dbf       domx01.dbf  hrx01.dbf   jax01.dbf  
opix01.dbf    rlmx01.dbf
ahmd01.dbf      ced01.dbf        eaad01.dbf  hxcd01.dbf  jed01.dbf  
osmd01.dbf    sspd01.dbf
ahmx01.dbf      cex01.dbf        eaax01.dbf  hxcx01.dbf  jex01.dbf  
osmx01.dbf    sspx01.dbf
akd01.dbf       clnd01.dbf       eamd01.dbf  hxtd01.dbf  jgd01.dbf  
otad01.dbf    stspk01.dbf
akx01.dbf       clnx01.dbf       eamx01.dbf  hxtx01.dbf  jgx01.dbf  
otax01.dbf    system01.dbf
alrd01.dbf      cnd01.dbf        ecd01.dbf   ibad01.dbf  jld01.dbf  
owad01.dbf    system02.dbf
alrx01.dbf      cntrl01.dbf      ecx01.dbf   ibax01.dbf  jlx01.dbf  
ozfd01.dbf    system03.dbf
amfd01.dbf      cntrl01.dbf.old  ecxd01.dbf  ibcd01.dbf  jtfd01.dbf 
ozfx01.dbf    system04.dbf
amfx01.dbf      cntrl02.dbf.old  ecxx01.dbf  ibcx01.dbf  jtfx01.dbf 
ozpd01.dbf    system05.dbf
amsd01.dbf      cntrl03.dbf.old  edrd01.dbf  ibed01.dbf  jtmd01.dbf 
ozpx01.dbf    system06.dbf
amsx01.dbf      cnx01.dbf        edrx01.dbf  ibex01.dbf  jtmx01.dbf 
ozsd01.dbf    system07.dbf
amvd01.dbf      crpd01.dbf       egod01.dbf  ibpd01.dbf  jtsd01.dbf 
ozsx01.dbf    system08.dbf
amvx01.dbf      crpx01.dbf       egox01.dbf  ibpx01.dbf  jtsx01.dbf 
pad01.dbf     system09.dbf
apd01.dbf       cscd01.dbf       engd01.dbf  ibud01.dbf  log01a.dbf 
pax01.dbf     system10.dbf
applsysd01.dbf  cscx01.dbf       engx01.dbf  ibux01.dbf  log01b.dbf 
pjid01.dbf    system11.dbf
applsysd02.dbf  csd01.dbf        enid01.dbf  ibyd01.dbf  log02a.dbf 
pjix01.dbf    undo01.dbf
applsysx01.dbf  csdd01.dbf       enix01.dbf  ibyx01.dbf  log02b.dbf 
pjmd01.dbf    undo02.dbf
applsysx02.dbf  csdx01.dbf       evmd01.dbf  icxd01.dbf  log03a.dbf 
pjmx01.dbf    vead01.dbf
apx01.dbf       csed01.dbf       evmx01.dbf  icxx01.dbf  log03b.dbf 
pmid01.dbf    veax01.dbf
ard01.dbf       csex01.dbf       fad01.dbf   iebd01.dbf  med01.dbf  
pmix01.dbf    vehd01.dbf
arx01.dbf       csfd01.dbf       fax01.dbf   iebx01.dbf  mex01.dbf  
pnd01.dbf     vehx01.dbf
asfd01.dbf      csfx01.dbf       femd01.dbf  iecd01.dbf  mfgd01.dbf 
pnx01.dbf     wipd01.dbf
asfx01.dbf      csid01.dbf       femx01.dbf  iecx01.dbf  mfgx01.dbf 
poad01.dbf    wipx01.dbf
asgd01.dbf      csix01.dbf       fiid01.dbf  iemd01.dbf  mrpd01.dbf 
poax01.dbf    wmsd01.dbf
asgx01.dbf      csld01.dbf       fiix01.dbf  iemx01.dbf  mrpx01.dbf 
pod01.dbf     wmsx01.dbf
asld01.dbf      cslx01.dbf       flmd01.dbf  ieod01.dbf  mscd01.dbf 
pomd01.dbf    wpsd01.dbf
aslx01.dbf      csmd01.dbf       flmx01.dbf  ieox01.dbf  mscx01.dbf 
pomx01.dbf    wpsx01.dbf
asod01.dbf      csmx01.dbf       fptd01.dbf  iesd01.dbf  msdd01.dbf 
pond01.dbf    wshd01.dbf
asox01.dbf      cspd01.dbf       fptx01.dbf  iesx01.dbf  msdx01.dbf 
ponx01.dbf    wshx01.dbf
aspd01.dbf      cspx01.dbf       frmd01.dbf  ieud01.dbf  msod01.dbf 
portal01.dbf  wsmd01.dbf
aspx01.dbf      csrd01.dbf       frmx01.dbf  ieux01.dbf  msox01.dbf 
posd01.dbf    wsmx01.dbf
astd01.dbf      csrx01.dbf       fted01.dbf  iexd01.dbf  msrd01.dbf 
posx01.dbf    xdpd01.dbf
astx01.dbf      cssd01.dbf       ftex01.dbf  iexx01.dbf  msrx01.dbf 
pox01.dbf     xdpx01.dbf
axd01.dbf       cssx01.dbf       fvd01.dbf   igcd01.dbf  mwad01.dbf 
prpd01.dbf    xlad01.dbf
axx01.dbf       csx01.dbf        fvx01.dbf   igcx01.dbf  mwax01.dbf 
prpx01.dbf    xlax01.dbf
azd01.dbf       ctxd01.dbf       gld01.dbf   igfd01.dbf  oed01.dbf  
psad01.dbf    xnbd01.dbf
azx01.dbf       cuad01.dbf       glx01.dbf   igfx01.dbf  oex01.dbf  
psax01.dbf    xnbx01.dbf
bend01.dbf      cuax01.dbf       gmad01.dbf  igid01.dbf  okbd01.dbf 
psbd01.dbf    xncd01.dbf
benx01.dbf      cued01.dbf       gmax01.dbf  igix01.dbf  okbx01.dbf 
psbx01.dbf    xncx01.dbf
bicd01.dbf      cuex01.dbf       gmdd01.dbf  igsd01.dbf  okcd01.dbf 
pspd01.dbf    xnid01.dbf
bicx01.dbf      cufd01.dbf       gmdx01.dbf  igsx01.dbf  okcx01.dbf 
pspx01.dbf    xnix01.dbf
bild01.dbf      cufx01.dbf       gmed01.dbf  igwd01.dbf  oked01.dbf 
pvd01.dbf     xnmd01.dbf
bilx01.dbf      cugd01.dbf       gmex01.dbf  igwx01.dbf  okex01.dbf 
pvx01.dbf     xnmx01.dbf
bimd01.dbf      cugx01.dbf       gmfd01.dbf  imcd01.dbf  okid01.dbf 
qad01.dbf     xnpd01.dbf
bimx01.dbf      cuid01.dbf       gmfx01.dbf  imcx01.dbf  okix01.dbf 
qax01.dbf     xnpx01.dbf
bisd01.dbf      cuix01.dbf       gmid01.dbf  imtd01.dbf  okld01.dbf 
qotd01.dbf    xnsd01.dbf
bisx01.dbf      cund01.dbf       gmix01.dbf  imtx01.dbf  oklx01.dbf 
qotx01.dbf    xnsx01.dbf
bivd01.dbf      cunx01.dbf       gmld01.dbf  invd01.dbf  okod01.dbf 
qpd01.dbf     xtrd01.dbf
bivx01.dbf      cupd01.dbf       gmlx01.dbf  invx01.dbf  okox01.dbf 
qpx01.dbf     xtrx01.dbf
bixd01.dbf      cupx01.dbf       gmpd01.dbf  ipad01.dbf  okrd01.dbf 
qrmd01.dbf    zfad01.dbf
bixx01.dbf      cusd01.dbf       gmpx01.dbf  ipax01.dbf  okrx01.dbf 
qrmx01.dbf    zfax01.dbf
bned01.dbf      cust01.dbf       gmsd01.dbf  ipdd01.dbf  oksd01.dbf 
rgd01.dbf     zsad01.dbf
bnex01.dbf      cusx01.dbf       gmsx01.dbf  ipdx01.dbf  oksx01.dbf 
rgx01.dbf     zsax01.dbf
bomd01.dbf      czd01.dbf        grd01.dbf   iscd01.dbf  okxd01.dbf 
rhxd01.dbf
bomx01.dbf      czx01.dbf        grx01.dbf   iscx01.dbf  okxx01.dbf 
rhxx01.dbf


(this same "ls" command will not execute on node 2)



[root at dc1node1 root]# debugocfs -f /oracle/prod/ /dev/sdd
fileinfo:
        Name = /oracle/prod/
        curr_master = 1
        file_lock = OCFS_DLM_NO_LOCK
        oin_node_map = 00000000000000000000000000000000
        seq_num = 0
        local_ext = false
        granularity = 0
        filename = prod
        filename_len = 4
        file_size = 131072
        alloc_size = 131072
        attribs = OCFS_ATTRIB_DIRECTORY
        prot_bits = S_IRUSR S_IWUSR S_IXUSR S_IRGRP S_IXGRP S_IROTH
S_IXOTH
        uid = 510
        gid = 500
        create_time = Wed Jan 14 11:29:16 2004
        modify_time = Wed Jan 14 11:29:16 2004
        dir_node_ptr = 16179200
        this_sector = 16179712
        last_ext_ptr = 0
        sync_flags = OCFS_SYNC_FLAG_VALID
        link_cnt = 0
        next_del = -1
        next_free_ext = 0
        extent[0].file_off = 0
        extent[0].num_bytes = 0
        extent[0].disk_off = 29810688
        extent[1].file_off = 0
        extent[1].num_bytes = 0
        extent[1].disk_off = 0
        extent[2].file_off = 0
        extent[2].num_bytes = 0
        extent[2].disk_off = 0

[root at dc1node1 root]# debugocfs -f /oracle/prod/proddata/ /dev/sdd
fileinfo:
        Name = /oracle/prod/proddata/
        curr_master = 1
        file_lock = OCFS_DLM_ENABLE_CACHE_LOCK
        oin_node_map = 00000000000000000000000000000000
        seq_num = 0
        local_ext = false
        granularity = 0
        filename = proddata
        filename_len = 8
        file_size = 131072
        alloc_size = 131072
        attribs = OCFS_ATTRIB_DIRECTORY
        prot_bits = S_IRUSR S_IWUSR S_IXUSR S_IRGRP S_IXGRP S_IROTH
S_IXOTH
        uid = 510
        gid = 500
        create_time = Wed Jan 14 11:29:16 2004
        modify_time = Wed Jan 14 11:29:16 2004
        dir_node_ptr = 29810688
        this_sector = 29811200
        last_ext_ptr = 0
        sync_flags = OCFS_SYNC_FLAG_MARK_FOR_DELETION
        link_cnt = 0
        next_del = -1
        next_free_ext = 0
        extent[0].file_off = 0
        extent[0].num_bytes = 0
        extent[0].disk_off = 29941760
        extent[1].file_off = 0
        extent[1].num_bytes = 0
        extent[1].disk_off = 0
        extent[2].file_off = 0
        extent[2].num_bytes = 0
        extent[2].disk_off = 0

[root at dc1node1 root]# debugocfs -d /oracle/prod/ /dev/sdd
dirinfo:
        Name = /oracle/prod/
        curr_master = 0
        file_lock = OCFS_DLM_NO_LOCK
        oin_node_map = 00000000000000000000000000000000
        seq_num = 0
        alloc_file_off = 0
        alloc_node = 1
        free_node_ptr = INVALID_NODE_POINTER
        node_disk_off = 29810688
        next_node_ptr = INVALID_NODE_POINTER
        indx_node_ptr = INVALID_NODE_POINTER
        next_del_ent_node = INVALID_NODE_POINTER
        head_del_ent_node = INVALID_NODE_POINTER
        first_del = 255
        num_del = 0
        num_ents = 254
        depth = 0
        num_ent_used = 5
        dir_node_flags = DIR_NODE_FLAG_ROOT
        sync_flags = OCFS_SYNC_FLAG_DELETED
        index_dirty = false
        bad_off = 4
        index =   0   2   1   3   4   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0

[root at dc1node1 root]# debugocfs -d /oracle/prod/proddata/ /dev/sdd
dirinfo:
        Name = /oracle/prod/proddata/
        curr_master = 0
        file_lock = OCFS_DLM_NO_LOCK
        oin_node_map = 00000000000000000000000000000000
        seq_num = 0
        alloc_file_off = 131072
        alloc_node = 1
        free_node_ptr = INVALID_NODE_POINTER
        node_disk_off = 29941760
        next_node_ptr = INVALID_NODE_POINTER
        indx_node_ptr = INVALID_NODE_POINTER
        next_del_ent_node = INVALID_NODE_POINTER
        head_del_ent_node = INVALID_NODE_POINTER
        first_del = 255
        num_del = 0
        num_ents = 254
        depth = 0
        num_ent_used = 0
        dir_node_flags = DIR_NODE_FLAG_ROOT
        sync_flags = OCFS_SYNC_FLAG_DELETED
        index_dirty = false
        bad_off = 0
        index =   0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0
                  0   0   0   0   0   0   0   0   0   0   0   0   0  
0
  0   0

[root at dc1node1 root]# debugocfs -v 0-1 /dev/sdd
vote0:
        seq_num = 1204
        dir_ent = 34122240
        open_handle = No
        Vote0 = (0x00000000)
        Vote1 = FLAG_VOTE_NODE (0x00000001)

vote1:
        seq_num = 1205
        dir_ent = 34343936
        open_handle = No
        Vote0 = FLAG_VOTE_NODE (0x00000001)
        Vote1 = (0x00000000)


[root at dc1node1 root]# debugocfs -p 0-1 /dev/sdd
publish0:
        time = 2508321.2554391394
        vote = No
        dirty = No
        vote_type = (0x00000000)
        vote_map = 00000000000000000000000000000000
        seq_num = 1205
        dir_ent = 0
        hbm = 20 0

publish1:
        time = 2508321.2555065604
        vote = No
        dirty = No
        vote_type = (0x00000000)
        vote_map = 00000000000000000000000000000000
        seq_num = 1204
        dir_ent = 0
        hbm = 0 20

[root at dc1node1 root]# debugocfs -l /dev/sdd | head -30
filelisting:
        File1 = /oracle/
        File2 = /oracle/ctrl/
        File3 = /oracle/ctrl/
        File4 = /oracle/ctrl/
        File5 = /oracle/ctrl/
        File6 = /oracle/prod/
        File7 = /oracle/prod/proddata/
        File8 = /oracle/prod/ctrltest/
        File9 = /oracle/prod/proddata/
        File10 = /oracle/prod/ctrldata/
        File11 = /oracle/prod/proddata/
        File12 = /oracle/prod/proddata/zsad01.dbf
        File13 = /oracle/prod/proddata/zfax01.dbf
        File14 = /oracle/prod/proddata/zfad01.dbf
        File15 = /oracle/prod/proddata/xtrd01.dbf
        File16 = /oracle/prod/proddata/xnpx01.dbf
        File17 = /oracle/prod/proddata/xnpd01.dbf
        File18 = /oracle/prod/proddata/xnmx01.dbf
        File19 = /oracle/prod/proddata/xnmd01.dbf
        File20 = /oracle/prod/proddata/xnid01.dbf

...

        File406 = /oracle/prod/proddata/axx01.dbf
        File407 = /oracle/prod/proddata/astd01.dbf
        File408 = /oracle/prod/proddata/aslx01.dbf
        File409 = /oracle/prod/proddata/asfx01.dbf
        File410 = /oracle/prod/proddata/ard01.dbf
        File411 = /oracle/prod/proddata/apx01.dbf
        File412 = /oracle/prod/proddata/applsysd01.dbf
        File413 = /oracle/prod/proddata/amvd01.dbf
        File414 = /oracle/prod/proddata/alrx01.dbf
        File415 = /oracle/prod/proddata/ahmd01.dbf
        File416 = /oracle/ctrl/
        File417 = /backup


Jeremy Schneider
Database/Systems Administrator
The ASU Group - IS Dept
email: jer1887 at asugroup.com 

_______________________________________________
Ocfs-users mailing list
Ocfs-users at oss.oracle.com 
http://oss.oracle.com/mailman/listinfo/ocfs-users


More information about the Ocfs-users mailing list