[Ocfs2-commits] jlbec commits r1812 - in trunk: . fs/ocfs2
fs/ocfs2/cluster
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Thu Jan 20 17:16:59 CST 2005
Author: jlbec
Date: 2005-01-20 17:16:58 -0600 (Thu, 20 Jan 2005)
New Revision: 1812
Added:
trunk/fs/ocfs2/cluster/
trunk/fs/ocfs2/cluster/Makefile
trunk/fs/ocfs2/cluster/compat_libfs.c
trunk/fs/ocfs2/cluster/compat_libfs.h
trunk/fs/ocfs2/cluster/dlm_compat.h
trunk/fs/ocfs2/cluster/dlmast.c
trunk/fs/ocfs2/cluster/dlmcommon.h
trunk/fs/ocfs2/cluster/dlmconvert.c
trunk/fs/ocfs2/cluster/dlmlock.c
trunk/fs/ocfs2/cluster/dlmmaster.c
trunk/fs/ocfs2/cluster/dlmmod.c
trunk/fs/ocfs2/cluster/dlmmod.h
trunk/fs/ocfs2/cluster/dlmrecovery.c
trunk/fs/ocfs2/cluster/dlmthread.c
trunk/fs/ocfs2/cluster/dlmunlock.c
trunk/fs/ocfs2/cluster/heartbeat.c
trunk/fs/ocfs2/cluster/heartbeat.h
trunk/fs/ocfs2/cluster/nodemanager.c
trunk/fs/ocfs2/cluster/nodemanager.h
trunk/fs/ocfs2/cluster/ocfs2_heartbeat.h
trunk/fs/ocfs2/cluster/ocfs2_nodemanager.h
trunk/fs/ocfs2/cluster/ocfs2_tcp.h
trunk/fs/ocfs2/cluster/tcp.c
trunk/fs/ocfs2/cluster/tcp.h
trunk/fs/ocfs2/cluster/util.c
trunk/fs/ocfs2/cluster/util.h
trunk/fs/ocfs2/cluster/warning_hack.h
Removed:
trunk/cluster/
trunk/fs/ocfs2/cluster/Makefile
trunk/fs/ocfs2/cluster/compat_libfs.c
trunk/fs/ocfs2/cluster/compat_libfs.h
trunk/fs/ocfs2/cluster/dlm_compat.h
trunk/fs/ocfs2/cluster/dlmast.c
trunk/fs/ocfs2/cluster/dlmcommon.h
trunk/fs/ocfs2/cluster/dlmconvert.c
trunk/fs/ocfs2/cluster/dlmlock.c
trunk/fs/ocfs2/cluster/dlmmaster.c
trunk/fs/ocfs2/cluster/dlmmod.c
trunk/fs/ocfs2/cluster/dlmmod.h
trunk/fs/ocfs2/cluster/dlmrecovery.c
trunk/fs/ocfs2/cluster/dlmthread.c
trunk/fs/ocfs2/cluster/dlmunlock.c
trunk/fs/ocfs2/cluster/heartbeat.c
trunk/fs/ocfs2/cluster/heartbeat.h
trunk/fs/ocfs2/cluster/nodemanager.c
trunk/fs/ocfs2/cluster/nodemanager.h
trunk/fs/ocfs2/cluster/ocfs2_heartbeat.h
trunk/fs/ocfs2/cluster/ocfs2_nodemanager.h
trunk/fs/ocfs2/cluster/ocfs2_tcp.h
trunk/fs/ocfs2/cluster/tcp.c
trunk/fs/ocfs2/cluster/tcp.h
trunk/fs/ocfs2/cluster/util.c
trunk/fs/ocfs2/cluster/util.h
trunk/fs/ocfs2/cluster/warning_hack.h
Log:
Step 5 - Move cluster into place.
o Move cluster/ to fs/ocfs2/cluster/, where it is a proper child of
OCFS2.
Copied: trunk/fs/ocfs2/cluster (from rev 1810, trunk/cluster)
Deleted: trunk/fs/ocfs2/cluster/Makefile
===================================================================
--- trunk/cluster/Makefile 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/Makefile 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,111 +0,0 @@
-
-ifeq ($(KERNELRELEASE),)
-TOPDIR = ..
-
-include $(TOPDIR)/Preamble.make
-else
-# We are included by kbuild.
-
-OUR_SRC_DIR := $(M)
-
-include $(OUR_SRC_DIR)/../Config.make
-
-endif
-
-ifdef HAVE_NPTL
-MODULE_DEFINES += -DHAVE_NPTL
-endif
-
-ifdef HAVE_SOCK_CREATE_LITE
-MODULE_DEFINES += -DHAVE_SOCK_CREATE_LITE
-endif
-
-EXTRA_CFLAGS += $(MODULE_DEFINES)
-
-ifeq ($(KERNELRELEASE),)
-ifeq ($(KERNEL_26),)
-COMPAT_LIBFS := compat_libfs.o
-endif
-endif
-
-obj-m := ocfs2_dlm.o ocfs2_heartbeat.o ocfs2_nodemanager.o ocfs2_tcp.o
-
-ocfs2_dlm-objs := dlmmod.o dlmthread.o dlmrecovery.o util.o \
- dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o \
- $(COMPAT_LIBFS)
-
-ocfs2_nodemanager-objs := nodemanager.o util.o $(COMPAT_LIBFS)
-
-ocfs2_heartbeat-objs := heartbeat.o util.o $(COMPAT_LIBFS)
-
-ocfs2_tcp-objs := tcp.o util.o $(COMPAT_LIBFS)
-
-
-ifeq ($(KERNELRELEASE),)
-#
-# Called from a regular "make".
-#
-
-SOURCES = \
- compat_libfs.c \
- dlmmaster.c \
- dlmmod.c \
- dlmast.c \
- dlmconvert.c \
- dlmlock.c \
- dlmunlock.c \
- dlmrecovery.c \
- dlmthread.c \
- heartbeat.c \
- nodemanager.c \
- tcp.c \
- util.c
-
-HEADERS = \
- compat_libfs.h \
- dlm_compat.h \
- dlmcommon.h \
- dlmmod.h \
- heartbeat.h \
- nodemanager.h \
- tcp.h \
- util.h \
- warning_hack.h
-
-DIST_FILES = $(SOURCES) $(HEADERS)
-
-ALL_RULES = build-modules
-
-CLEAN_RULES = clean-modules
-
-INSTALL_RULES = install-modules
-
-
-ifneq ($(KERNEL_26),)
-#
-# This is a 2.6 kernel. Just forward to kbuild.
-#
-
-build-modules:
- $(MAKE) -C $(KERNELDIR) M=$(CURDIR) modules
-
-install-modules:
- $(MAKE) -C $(KERNELDIR) M=$(CURDIR) modules_install
-
-clean-modules:
- $(MAKE) -C $(KERNELDIR) M=$(CURDIR) clean
-else
-#
-# This is a 2.4 kernel. Because Red Hat's source tree is whack, do
-# everything by hand.
-#
-
-INSTALL_DIR = ocfs2
-
-include $(TOPDIR)/Kbuild-24.make
-
-endif
-
-include $(TOPDIR)/Postamble.make
-
-endif
Copied: trunk/fs/ocfs2/cluster/Makefile (from rev 1811, trunk/cluster/Makefile)
Deleted: trunk/fs/ocfs2/cluster/compat_libfs.c
===================================================================
--- trunk/cluster/compat_libfs.c 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/compat_libfs.c 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,708 +0,0 @@
-/* -----------------------------------------------------------------*/
-
-
-/*
- * compat_libfs.c
- * Library for filesystems writers.
- * PLUS... transaction file stuff stolen from nfsd
- */
-
-#include "warning_hack.h"
-
-#include <linux/module.h>
-#include <linux/pagemap.h>
-#include <linux/mount.h>
-#include <linux/vfs.h>
-#include <asm/uaccess.h>
-#include <linux/slab.h>
-
-#include "compat_libfs.h"
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-#define kstatfs statfs
-#endif
-
-#define __user
-
-
-int simple_statfs(struct super_block *sb, struct kstatfs *buf);
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd);
-#else
-struct dentry *simple_lookup(struct inode *dir,struct dentry *dentry);
-#endif
-
-int simple_sync_file(struct file *file, struct dentry *dentry, int datasync);
-int dcache_dir_open(struct inode *inode, struct file *file);
-int dcache_dir_close(struct inode *inode, struct file *file);
-loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin);
-ssize_t generic_read_dir(struct file *filp, char *buf, size_t siz, loff_t *ppos);
-int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry);
-int simple_empty(struct dentry *dentry);
-int simple_unlink(struct inode *dir, struct dentry *dentry);
-int simple_rmdir(struct inode *dir, struct dentry *dentry);
-int simple_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry);
-int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files);
-
-
-
-#if 0
-int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
-{
- struct inode *inode = dentry->d_inode;
- generic_fillattr(inode, stat);
- stat->blocks = inode->i_mapping->nrpages << (PAGE_CACHE_SHIFT - 9);
- return 0;
-}
-#endif
-
-int simple_statfs(struct super_block *sb, struct kstatfs *buf)
-{
- buf->f_type = sb->s_magic;
- buf->f_bsize = PAGE_CACHE_SIZE;
- buf->f_namelen = NAME_MAX;
- return 0;
-}
-
-/*
- * Lookup the data. This is trivial - if the dentry didn't already
- * exist, we know it is negative.
- */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
-{
- if (dentry->d_name.len > NAME_MAX)
- return ERR_PTR(-ENAMETOOLONG);
- d_add(dentry, NULL);
- return NULL;
-}
-#else
-struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry)
-{
- if (dentry->d_name.len > NAME_MAX)
- return ERR_PTR(-ENAMETOOLONG);
- d_add(dentry, NULL);
- return NULL;
-}
-#endif
-
-
-struct dentry * simple_find_child(struct dentry *dentry, struct qstr *name)
-{
- struct list_head *iter;
- struct dentry *child = NULL;
-
- spin_lock(&dcache_lock);
- list_for_each(iter, &dentry->d_subdirs) {
- child = list_entry(iter, struct dentry, d_child);
- if (child->d_name.len == name->len &&
- memcmp(child->d_name.name, name->name, name->len)==0)
- break;
- child = NULL;
- }
- if (child)
- dget_locked(child);
- spin_unlock(&dcache_lock);
- return child;
-}
-
-
-
-int simple_sync_file(struct file * file, struct dentry *dentry, int datasync)
-{
- return 0;
-}
-
-int dcache_dir_open(struct inode *inode, struct file *file)
-{
- static struct qstr cursor_name = {.len = 1, .name = "."};
-
- file->private_data = d_alloc(file->f_dentry, &cursor_name);
-
- return file->private_data ? 0 : -ENOMEM;
-}
-
-int dcache_dir_close(struct inode *inode, struct file *file)
-{
- dput(file->private_data);
- return 0;
-}
-
-loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
-{
- down(&file->f_dentry->d_inode->i_sem);
- switch (origin) {
- case 1:
- offset += file->f_pos;
- case 0:
- if (offset >= 0)
- break;
- default:
- up(&file->f_dentry->d_inode->i_sem);
- return -EINVAL;
- }
- if (offset != file->f_pos) {
- file->f_pos = offset;
- if (file->f_pos >= 2) {
- struct list_head *p;
- struct dentry *cursor = file->private_data;
- loff_t n = file->f_pos - 2;
-
- spin_lock(&dcache_lock);
- list_del(&cursor->d_child);
- p = file->f_dentry->d_subdirs.next;
- while (n && p != &file->f_dentry->d_subdirs) {
- struct dentry *next;
- next = list_entry(p, struct dentry, d_child);
- if (!d_unhashed(next) && next->d_inode)
- n--;
- p = p->next;
- }
- list_add_tail(&cursor->d_child, p);
- spin_unlock(&dcache_lock);
- }
- }
- up(&file->f_dentry->d_inode->i_sem);
- return offset;
-}
-
-/* Relationship between i_mode and the DT_xxx types */
-static inline unsigned char dt_type(struct inode *inode)
-{
- return (inode->i_mode >> 12) & 15;
-}
-
-/*
- * Directory is locked and all positive dentries in it are safe, since
- * for ramfs-type trees they can't go away without unlink() or rmdir(),
- * both impossible due to the lock on directory.
- */
-
-int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
-{
- struct dentry *dentry = filp->f_dentry;
- struct dentry *cursor = filp->private_data;
- struct list_head *p, *q = &cursor->d_child;
- ino_t ino;
- int i = filp->f_pos;
-
- switch (i) {
- case 0:
- ino = dentry->d_inode->i_ino;
- if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
- break;
- filp->f_pos++;
- i++;
- /* fallthrough */
- case 1:
- ino = dentry->d_parent->d_inode->i_ino;
- if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
- break;
- filp->f_pos++;
- i++;
- /* fallthrough */
- default:
- spin_lock(&dcache_lock);
- if (filp->f_pos == 2) {
- list_del(q);
- list_add(q, &dentry->d_subdirs);
- }
- for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
- struct dentry *next;
- next = list_entry(p, struct dentry, d_child);
- if (d_unhashed(next) || !next->d_inode)
- continue;
-
- spin_unlock(&dcache_lock);
- if (filldir(dirent, next->d_name.name, next->d_name.len, filp->f_pos, next->d_inode->i_ino, dt_type(next->d_inode)) < 0)
- return 0;
- spin_lock(&dcache_lock);
- /* next is still alive */
- list_del(q);
- list_add(q, p);
- p = q;
- filp->f_pos++;
- }
- spin_unlock(&dcache_lock);
- }
- return 0;
-}
-
-ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
-{
- return -EISDIR;
-}
-
-struct file_operations simple_dir_operations = {
- .open = dcache_dir_open,
- .release = dcache_dir_close,
- .llseek = dcache_dir_lseek,
- .read = generic_read_dir,
- .readdir = dcache_readdir,
-};
-
-struct inode_operations simple_dir_inode_operations = {
- .lookup = simple_lookup,
-};
-
-#if 0
-/*
- * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
- * will never be mountable)
- */
-struct super_block *
-get_sb_pseudo(struct file_system_type *fs_type, char *name,
- struct super_operations *ops, unsigned long magic)
-{
- struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
- static struct super_operations default_ops = {.statfs = simple_statfs};
- struct dentry *dentry;
- struct inode *root;
- struct qstr d_name = {.name = name, .len = strlen(name)};
-
- if (IS_ERR(s))
- return s;
-
- s->s_flags = MS_NOUSER;
- s->s_maxbytes = ~0ULL;
- s->s_blocksize = 1024;
- s->s_blocksize_bits = 10;
- s->s_magic = magic;
- s->s_op = ops ? ops : &default_ops;
- root = new_inode(s);
- if (!root)
- goto Enomem;
- root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
- root->i_uid = root->i_gid = 0;
- root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
- dentry = d_alloc(NULL, &d_name);
- if (!dentry) {
- iput(root);
- goto Enomem;
- }
- dentry->d_sb = s;
- dentry->d_parent = dentry;
- d_instantiate(dentry, root);
- s->s_root = dentry;
- s->s_flags |= MS_ACTIVE;
- return s;
-
-Enomem:
- up_write(&s->s_umount);
- deactivate_super(s);
- return ERR_PTR(-ENOMEM);
-}
-#endif
-
-int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
-{
- struct inode *inode = old_dentry->d_inode;
-
- inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
- inode->i_nlink++;
- atomic_inc(&inode->i_count);
- dget(dentry);
- d_instantiate(dentry, inode);
- return 0;
-}
-
-static inline int simple_positive(struct dentry *dentry)
-{
- return dentry->d_inode && !d_unhashed(dentry);
-}
-
-int simple_empty(struct dentry *dentry)
-{
- struct dentry *child;
- int ret = 0;
-
- spin_lock(&dcache_lock);
- list_for_each_entry(child, &dentry->d_subdirs, d_child)
- if (simple_positive(child))
- goto out;
- ret = 1;
-out:
- spin_unlock(&dcache_lock);
- return ret;
-}
-
-int simple_unlink(struct inode *dir, struct dentry *dentry)
-{
- struct inode *inode = dentry->d_inode;
-
- inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
- inode->i_nlink--;
- dput(dentry);
- return 0;
-}
-
-int simple_rmdir(struct inode *dir, struct dentry *dentry)
-{
- if (!simple_empty(dentry))
- return -ENOTEMPTY;
-
- dentry->d_inode->i_nlink--;
- simple_unlink(dir, dentry);
- dir->i_nlink--;
- return 0;
-}
-
-int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
-{
- struct inode *inode = old_dentry->d_inode;
- int they_are_dirs = S_ISDIR(old_dentry->d_inode->i_mode);
-
- if (!simple_empty(new_dentry))
- return -ENOTEMPTY;
-
- if (new_dentry->d_inode) {
- simple_unlink(new_dir, new_dentry);
- if (they_are_dirs)
- old_dir->i_nlink--;
- } else if (they_are_dirs) {
- old_dir->i_nlink--;
- new_dir->i_nlink++;
- }
-
- old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime =
- new_dir->i_mtime = inode->i_ctime = CURRENT_TIME;
-
- return 0;
-}
-
-#if 0
-int simple_readpage(struct file *file, struct page *page)
-{
- void *kaddr;
-
- if (PageUptodate(page))
- goto out;
-
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr, 0, PAGE_CACHE_SIZE);
- kunmap_atomic(kaddr, KM_USER0);
- flush_dcache_page(page);
- SetPageUptodate(page);
-out:
- unlock_page(page);
- return 0;
-}
-
-int simple_prepare_write(struct file *file, struct page *page,
- unsigned from, unsigned to)
-{
- if (!PageUptodate(page)) {
- if (to - from != PAGE_CACHE_SIZE) {
- void *kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr, 0, from);
- memset(kaddr + to, 0, PAGE_CACHE_SIZE - to);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
- }
- SetPageUptodate(page);
- }
- return 0;
-}
-
-int simple_commit_write(struct file *file, struct page *page,
- unsigned offset, unsigned to)
-{
- struct inode *inode = page->mapping->host;
- loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
-
- /*
- * No need to use i_size_read() here, the i_size
- * cannot change under us because we hold the i_sem.
- */
- if (pos > inode->i_size)
- i_size_write(inode, pos);
- set_page_dirty(page);
- return 0;
-}
-#endif
-
-void d_genocide(struct dentry *root);
-
-void d_genocide(struct dentry *root)
-{
- struct dentry *this_parent = root;
- struct list_head *next;
- spin_lock(&dcache_lock);
-repeat:
- next = this_parent->d_subdirs.next;
-resume:
- while (next != &this_parent->d_subdirs) {
- struct list_head *tmp = next;
- struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
- next = tmp->next;
- if (d_unhashed(dentry)||!dentry->d_inode)
- continue;
- if (!list_empty(&dentry->d_subdirs)) {
- this_parent = dentry;
- goto repeat;
- }
- atomic_dec(&dentry->d_count);
- }
- if (this_parent != root) {
- next = this_parent->d_child.next;
- atomic_dec(&this_parent->d_count);
- this_parent = this_parent->d_parent;
- goto resume;
- }
- spin_unlock(&dcache_lock);
-}
-
-static void simple_read_inode(struct inode * inode)
-{
- inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-}
-
-
-int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files)
-{
- static struct super_operations s_ops = {
- .statfs = simple_statfs,
- .read_inode = simple_read_inode
- };
- struct inode *inode;
- struct dentry *root;
- struct dentry *dentry;
- int i;
-
- s->s_blocksize = PAGE_CACHE_SIZE;
- s->s_blocksize_bits = PAGE_CACHE_SHIFT;
- s->s_magic = magic;
- s->s_op = &s_ops;
-
- inode = new_inode(s);
- if (!inode)
- return -ENOMEM;
- inode->i_mode = S_IFDIR | 0755;
- inode->i_uid = inode->i_gid = 0;
- inode->i_blksize = PAGE_CACHE_SIZE;
- inode->i_blocks = 0;
- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- inode->i_op = &simple_dir_inode_operations;
- inode->i_fop = &simple_dir_operations;
- root = d_alloc_root(inode);
- if (!root) {
- iput(inode);
- return -ENOMEM;
- }
- for (i = 0; !files->name || files->name[0]; i++, files++) {
- struct qstr name;
- if (!files->name)
- continue;
- name.name = files->name;
- name.len = strlen(name.name);
- printk("adding file %*s\n", name.len, name.name);
- name.hash = full_name_hash(name.name, name.len);
- dentry = d_alloc(root, &name);
- if (!dentry)
- goto out;
- inode = new_inode(s);
- if (!inode)
- goto out;
- inode->i_mode = S_IFREG | files->mode;
- inode->i_uid = inode->i_gid = 0;
- inode->i_blksize = PAGE_CACHE_SIZE;
- inode->i_blocks = 0;
- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- inode->i_fop = files->ops;
- inode->i_ino = i;
- d_add(dentry, inode);
- }
- s->s_root = root;
- return 0;
-out:
- d_genocide(root);
- dput(root);
- return -ENOMEM;
-}
-
-#if 0
-static spinlock_t pin_fs_lock = SPIN_LOCK_UNLOCKED;
-
-int simple_pin_fs(char *name, struct vfsmount **mount, int *count)
-{
- struct vfsmount *mnt = NULL;
- spin_lock(&pin_fs_lock);
- if (unlikely(!*mount)) {
- spin_unlock(&pin_fs_lock);
- mnt = do_kern_mount(name, 0, name, NULL);
- if (IS_ERR(mnt))
- return PTR_ERR(mnt);
- spin_lock(&pin_fs_lock);
- if (!*mount)
- *mount = mnt;
- }
- mntget(*mount);
- ++*count;
- spin_unlock(&pin_fs_lock);
- mntput(mnt);
- return 0;
-}
-
-void simple_release_fs(struct vfsmount **mount, int *count)
-{
- struct vfsmount *mnt;
- spin_lock(&pin_fs_lock);
- mnt = *mount;
- if (!--*count)
- *mount = NULL;
- spin_unlock(&pin_fs_lock);
- mntput(mnt);
-}
-
-ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
- const void *from, size_t available)
-{
- loff_t pos = *ppos;
- if (pos < 0)
- return -EINVAL;
- if (pos >= available)
- return 0;
- if (count > available - pos)
- count = available - pos;
- if (copy_to_user(to, from + pos, count))
- return -EFAULT;
- *ppos = pos + count;
- return count;
-}
-
-EXPORT_SYMBOL(dcache_dir_close);
-EXPORT_SYMBOL(dcache_dir_lseek);
-EXPORT_SYMBOL(dcache_dir_open);
-EXPORT_SYMBOL(dcache_readdir);
-EXPORT_SYMBOL(generic_read_dir);
-EXPORT_SYMBOL(simple_commit_write);
-EXPORT_SYMBOL(simple_empty);
-EXPORT_SYMBOL(simple_fill_super);
-EXPORT_SYMBOL(simple_getattr);
-EXPORT_SYMBOL(simple_link);
-EXPORT_SYMBOL(simple_lookup);
-EXPORT_SYMBOL(simple_pin_fs);
-EXPORT_SYMBOL(simple_prepare_write);
-EXPORT_SYMBOL(simple_readpage);
-EXPORT_SYMBOL(simple_release_fs);
-EXPORT_SYMBOL(simple_rename);
-EXPORT_SYMBOL(simple_rmdir);
-EXPORT_SYMBOL(simple_statfs);
-EXPORT_SYMBOL(simple_sync_file);
-EXPORT_SYMBOL(simple_unlink);
-EXPORT_SYMBOL(simple_read_from_buffer);
-EXPORT_SYMBOL(get_sb_pseudo);
-#endif
-
-/* -----------------------------------------------------------------*/
-
-
-
-/* transaction file support */
-
-/*
- * transaction based IO methods.
- * The file expects a single write which triggers the transaction, and then
- * possibly a read which collects the result - which is stored in a
- * file-local buffer.
- */
-static ssize_t TA_write(struct file *file, const char __user *buf, size_t size, loff_t *pos)
-{
- ino_t ino = file->f_dentry->d_inode->i_ino;
- struct argresp *ar;
- ssize_t rv = 0;
- struct super_block *sb = file->f_dentry->d_inode->i_sb;
- TA_write_ops *ops = TA_GENERIC_SB_MEMBER(sb);
- TA_write_op *write_op;
-
- printk("welcome to TA_write: num_ops=%d, op[%d]=%p, private=%p, size=%lld\n",
- ops->num_ops, (int)ino, ops->write_op[ino], file->private_data, (long long)size);
- if (ino >= ops->num_ops || ops->write_op[ino] == NULL)
- return -EINVAL;
- write_op = ops->write_op[ino];
- if (file->private_data)
- return -EINVAL; /* only one write allowed per open */
- if (size > PAGE_SIZE - sizeof(struct argresp))
- return -EFBIG;
-
- ar = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!ar)
- return -ENOMEM;
- ar->size = 0;
- down(&file->f_dentry->d_inode->i_sem);
- if (file->private_data)
- rv = -EINVAL;
- else
- file->private_data = ar;
- up(&file->f_dentry->d_inode->i_sem);
- if (rv) {
- kfree(ar);
- return rv;
- }
- if (copy_from_user(ar->data, buf, size))
- return -EFAULT;
-
- printk("now calling write_op...\n");
- rv = write_op(file, ar->data, size);
- printk("write_op returned %lld\n", (long long)rv);
- if (rv>0) {
- ar->size = rv;
- rv = size;
- }
- return rv;
-}
-
-
-static ssize_t TA_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
-{
- struct argresp *ar;
- ssize_t rv = 0;
-
- if (file->private_data == NULL)
- rv = TA_write(file, buf, 0, pos);
- if (rv < 0)
- return rv;
-
- ar = file->private_data;
- if (!ar)
- return 0;
- if (*pos >= ar->size)
- return 0;
- if (*pos + size > ar->size)
- size = ar->size - *pos;
- if (copy_to_user(buf, ar->data + *pos, size))
- return -EFAULT;
- *pos += size;
- return size;
-}
-
-static int TA_open(struct inode *inode, struct file *file)
-{
- file->private_data = NULL;
- return 0;
-}
-
-static int TA_release(struct inode *inode, struct file *file)
-{
- void *p = file->private_data;
- file->private_data = NULL;
- kfree(p);
- return 0;
-}
-
-
-
-
-
-
-
-
-struct file_operations transaction_ops = {
- .write = TA_write,
- .read = TA_read,
- .open = TA_open,
- .release = TA_release,
-};
Copied: trunk/fs/ocfs2/cluster/compat_libfs.c (from rev 1811, trunk/cluster/compat_libfs.c)
Deleted: trunk/fs/ocfs2/cluster/compat_libfs.h
===================================================================
--- trunk/cluster/compat_libfs.h 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/compat_libfs.h 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,40 +0,0 @@
-#ifndef CLUSTER_COMPAT_LIBFS_H
-#define CLUSTER_COMPAT_LIBFS_H
-
-#include <linux/version.h>
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#define TA_GENERIC_SB_MEMBER(sb) ((sb)->s_fs_info)
-#else
-#define TA_GENERIC_SB_MEMBER(sb) ((sb)->u.generic_sbp)
-#endif
-
-
-/* an argresp is stored in an allocated page and holds the
- * size of the argument or response, along with its content
- */
-struct argresp {
- ssize_t size;
- char data[0];
-};
-
-typedef ssize_t (TA_write_op)(struct file *, char *, size_t);
-typedef struct _TA_write_ops
-{
- int num_ops;
- TA_write_op *write_op[0];
-} TA_write_ops;
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-struct tree_descr
-{
- char *name;
- struct file_operations *ops;
- int mode;
-};
-#endif
-
-int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files);
-struct dentry * simple_find_child(struct dentry *dentry, struct qstr *name);
-
-#endif /* CLUSTER_COMPAT_LIBFS_H */
Copied: trunk/fs/ocfs2/cluster/compat_libfs.h (from rev 1811, trunk/cluster/compat_libfs.h)
Deleted: trunk/fs/ocfs2/cluster/dlm_compat.h
===================================================================
--- trunk/cluster/dlm_compat.h 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/dlm_compat.h 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,189 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlm_compat.h
- *
- * Compatibility stuff for 2.4
- *
- * Copyright (C) 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation, version
- * 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel
- */
-
-#ifndef CLUSTER_DLM_COMPAT_H
-#define CLUSTER_DLM_COMPAT_H
-
-#include <linux/version.h>
-#include <linux/types.h>
-#include <linux/kdev_t.h>
-#include <linux/sched.h>
-#include <linux/compiler.h>
-
-/* for tcp_sk() */
-#include <net/sock.h>
-#include <linux/tcp.h>
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-# include <linux/locks.h>
-# include <linux/blkdev.h>
-#else
-# include <linux/buffer_head.h>
-#endif
-
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-
-extern inline int generic_fls(int x);
-extern inline int get_bitmask_order(unsigned int count);
-/* XXX Hack to avoid warning */
-struct mem_dqinfo;
-
-#define flush_scheduled_work flush_scheduled_tasks
-#define work_struct tq_struct
-#define INIT_WORK(w, f, d) INIT_TQUEUE(w, f, d)
-#define schedule_work(w) schedule_task(w)
-
-#ifdef HAVE_NPTL
-static inline void dequeue_signal_lock(struct task_struct *task,
- sigset_t *blocked, siginfo_t *info)
-{
- spin_lock_irq(&task->sighand->siglock);
- dequeue_signal(blocked, info);
- spin_unlock_irq(&task->sighand->siglock);
-}
-#else
-static inline void dequeue_signal_lock(struct task_struct *task,
- sigset_t *blocked, siginfo_t *info)
-{
- spin_lock_irq(&task->sigmask_lock);
- dequeue_signal(blocked, info);
- spin_unlock_irq(&task->sigmask_lock);
-}
-#endif
-#define kstatfs statfs
-
-
-
-/*
- * Copied right out of the 2.6.2 kernel's buffer_head.h:
- * macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
- * and buffer_foo() functions.
- */
-#define BUFFER_FNS(bit, name) \
-static inline void set_buffer_##name(struct buffer_head *bh) \
-{ \
- set_bit(BH_##bit, &(bh)->b_state); \
-} \
-static inline void clear_buffer_##name(struct buffer_head *bh) \
-{ \
- clear_bit(BH_##bit, &(bh)->b_state); \
-} \
-static inline int buffer_##name(struct buffer_head *bh) \
-{ \
- return test_bit(BH_##bit, &(bh)->b_state); \
-}
-
-#undef buffer_uptodate
-#undef buffer_dirty
-BUFFER_FNS(Uptodate, uptodate)
-BUFFER_FNS(Dirty, dirty)
-
-#define clear_buffer_dirty mark_buffer_clean
-
-#define OCFS_CURRENT_SECONDS CURRENT_TIME
-
-static inline __u32 mk_inode_time(long tv_sec, long tv_nsec)
-{
- return tv_sec;
-}
-
-#ifndef tcp_sk
-static inline struct tcp_opt * tcp_sk(const struct sock *__sk)
-{
- return &__sk->tp_info.af_tcp;
-}
-#endif
-static inline int ocfs_dev_bits(kdev_t dev)
-{
- return blksize_bits(block_size(dev));
-}
-
-#define blk_run_address_space(throwaway) run_task_queue(&tq_disk)
-
-#else /* LINUX_VERSION_CODE < 2.6 */
-
-static inline int ocfs_dev_bits(dev_t dev)
-{
- struct block_device *bd = bdget(dev);
- int ret = -EINVAL;
-
- if (bd == NULL)
- goto out;
-
- ret = bd->bd_inode->i_blkbits;
- bdput(bd);
-out:
- return ret;
-
-}
-
-static inline struct buffer_head *getblk(dev_t dev, sector_t block, int size)
-{
- struct block_device *bd = bdget(dev);
- struct buffer_head *bh;
-
- if (bd == NULL)
- return NULL;
-
- bh = __getblk(bd, block, size);
- bdput(bd);
- return bh;
-}
-
-#define OCFS_CURRENT_SECONDS (CURRENT_TIME.tv_sec)
-
-static inline struct timespec mk_inode_time(long tv_sec, long tv_nsec)
-{
- struct timespec ts = {
- .tv_sec = tv_sec,
- .tv_nsec = tv_nsec,
- };
- return ts;
-}
-
-#endif /* LINUX_VERSION_CODE */
-
-#ifndef HAVE_SOCK_CREATE_LITE
-static inline int sock_create_lite(int family, int type, int protocol,
- struct socket **res)
-{
- struct socket *sock = sock_alloc();
- int ret = 0;
-
- if (sock == NULL)
- ret = -ENOMEM;
-
- *res = sock;
-
- return ret;
-}
-#endif /* HAVE_SOCK_CREATE_LITE */
-
-
-#endif /* CLUSTER_DLM_COMPAT_H */
-
Copied: trunk/fs/ocfs2/cluster/dlm_compat.h (from rev 1811, trunk/cluster/dlm_compat.h)
Deleted: trunk/fs/ocfs2/cluster/dlmast.c
===================================================================
--- trunk/cluster/dlmast.c 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/dlmast.c 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,311 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlmast.c
- *
- * AST and BAST functionality for local and remote nodes
- *
- * Copyright (C) 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel
- */
-
-#include "warning_hack.h"
-
-#include "dlm_compat.h"
-#include "util.h"
-#include "dlmcommon.h"
-
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/highmem.h>
-#include <linux/utsname.h>
-#include <linux/init.h>
-#include <linux/sysctl.h>
-#include <linux/random.h>
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#include <linux/statfs.h>
-#include <linux/moduleparam.h>
-#endif
-#include <linux/blkdev.h>
-#include <linux/socket.h>
-#include <linux/inet.h>
-#include <linux/spinlock.h>
-
-
-#include "heartbeat.h"
-#include "nodemanager.h"
-#include "tcp.h"
-#include "dlmmod.h"
-
-static int dlm_send_proxy_ast(dlm_ctxt *dlm, dlm_lock_resource *res,
- dlm_lock *lock, int type, int blocked_type);
-
-int dlm_do_ast(dlm_ctxt *dlm, dlm_lock_resource *res, dlm_lock *lock)
-{
- int ret;
-
- dlm_astlockfunc_t *fn;
- dlm_lockstatus *lksb;
-
- dlmprintk0("\n");
-
- DLM_ASSERT(lock);
- DLM_ASSERT(res);
- DLM_ASSERT(lock->lksb);
-
- lksb = lock->lksb;
- fn = lock->ast;
-
- if (res->owner == dlm->group_index) {
- /* this node is the lockres master */
- if (lksb->flags & DLM_LKSB_GET_LVB) {
- dlmprintk("getting lvb from lockres for %s node\n",
- lock->node == dlm->group_index ? "master" :
- "remote");
- memcpy(lksb->lvb, res->lvb, DLM_LVB_LEN);
- } else if (lksb->flags & DLM_LKSB_PUT_LVB) {
- dlmprintk("setting lvb from lockres for %s node\n",
- lock->node == dlm->group_index ? "master" :
- "remote");
- memcpy(res->lvb, lksb->lvb, DLM_LVB_LEN);
- }
- }
-
- ret = 0;
- if (lock->node != dlm->group_index) {
- /* lock request came from another node
- * go do the ast over there */
- ret = dlm_send_proxy_ast(dlm, res, lock, DLM_AST, 0);
- } else {
- DLM_ASSERT(fn);
- (*fn)(lock->astdata);
- }
-
- /* reset any lvb flags on the lksb */
- lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB);
- return ret;
-}
-
-
-int dlm_do_bast(dlm_ctxt *dlm, dlm_lock_resource *res,
- dlm_lock *lock, int blocked_type)
-{
- int ret;
- dlm_bastlockfunc_t *fn = lock->bast;
-
- dlmprintk0("\n");
-
- if (lock->node != dlm->group_index) {
- ret = dlm_send_proxy_ast(dlm, res, lock,
- DLM_BAST, blocked_type);
- goto leave;
- }
-
- if (!fn) {
- dlmprintk("eek! lock has no bast %*s! cookie=%llu\n",
- res->lockname.len, res->lockname.name, lock->cookie);
- ret = -EINVAL;
- goto leave;
- }
- (*fn)(lock->astdata, blocked_type);
- ret = 0;
-leave:
- return ret;
-}
-
-
-int dlm_proxy_ast_handler(net_msg *msg, u32 len, void *data)
-{
- int ret;
- int status;
- dlm_ctxt *dlm = data;
- dlm_lock_resource *res;
- dlm_lock *lock = NULL;
- dlm_proxy_ast *past = (dlm_proxy_ast *) msg->buf;
- struct qstr lockname;
- struct list_head *iter, *head=NULL;
- u64 cookie;
- u32 flags;
-
- dlm_proxy_ast_to_host(past);
- lockname.name = past->name;
- lockname.len = past->namelen;
- cookie = past->cookie;
- flags = past->flags;
-
- if ((flags & (LKM_PUT_LVB|LKM_GET_LVB)) ==
- (LKM_PUT_LVB|LKM_GET_LVB)) {
- dlmprintk("both PUT and GET lvb specified\n");
- ret = DLM_BADARGS;
- goto leave;
- }
-
- dlmprintk("lvb: %s\n", flags & LKM_PUT_LVB ? "put lvb" :
- (flags & LKM_GET_LVB ? "get lvb" : "none"));
-
- lockname.hash = full_name_hash(lockname.name, lockname.len);
-
- dlmprintk("type=%d, blocked_type=%d\n", past->type, past->blocked_type);
-
- if (past->type != DLM_AST &&
- past->type != DLM_BAST) {
- dlmprintk("Eeeek unknown ast type! %d, cookie=%llu, "
- "name=%*s\n",
- past->type, cookie, lockname.len, lockname.name);
- ret = DLM_IVLOCKID;
- goto leave;
- }
-
- res = dlm_lookup_lock(dlm, &lockname);
- if (!res) {
- dlmprintk("eek! got %sast for unknown lockres! cookie=%llu, "
- "name=%*s, namelen=%d\n",
- past->type == DLM_AST ? "" : "b",
- cookie, lockname.len, lockname.name, lockname.len);
- ret = DLM_IVLOCKID;
- goto leave;
- }
-
- dlmprintk("lockres %*s\n", res->lockname.len, res->lockname.name);
- if (!dlm_is_recovery_lock(past->name, past->namelen))
- down_read(&dlm->recovery_sem);
- spin_lock(&res->spinlock);
-
- /* try convert queue for both ast/bast */
- head = &res->converting;
- lock = NULL;
- list_for_each(iter, head) {
- lock = list_entry (iter, dlm_lock, list);
- if (lock->cookie == cookie)
- goto do_ast;
- }
-
- /* if not on convert, try blocked for ast, granted for bast */
- if (past->type == DLM_AST)
- head = &res->blocked;
- else
- head = &res->granted;
-
- list_for_each(iter, head) {
- lock = list_entry (iter, dlm_lock, list);
- if (lock->cookie == cookie)
- goto do_ast;
- }
-
- dlmprintk("eek! got %sast for unknown lock! cookie=%llu, "
- "name=%*s, namelen=%d\n",
- past->type == DLM_AST ? "" : "b",
- cookie, lockname.len, lockname.name, lockname.len);
- spin_unlock(&res->spinlock);
- if (!dlm_is_recovery_lock(past->name, past->namelen))
- up_read(&dlm->recovery_sem);
- ret = DLM_NORMAL;
- goto leave;
-
-do_ast:
- ret = DLM_NORMAL;
- if (past->type == DLM_AST) {
- list_del(&lock->list);
- list_add_tail(&lock->list, &res->granted);
- dlmprintk("ast: adding to granted list... type=%d, "
- "convert_type=%d\n", lock->type, lock->convert_type);
- if (lock->convert_type != LKM_IVMODE) {
- lock->type = lock->convert_type;
- lock->convert_type = LKM_IVMODE;
- } else {
- // should already be there....
- }
-
- lock->lksb->status = DLM_NORMAL;
-
- /* if we requested the lvb, fetch it into our lksb now */
- if (flags & LKM_GET_LVB) {
- DLM_ASSERT(lock->lksb->flags & DLM_LKSB_GET_LVB);
- memcpy(lock->lksb->lvb, past->lvb, DLM_LVB_LEN);
- }
- status = dlm_do_ast(dlm, res, lock);
- dlmprintk("ast done: now... type=%d, convert_type=%d\n",
- lock->type, lock->convert_type);
- } else {
- dlmprintk("bast: before... type=%d, convert_type=%d\n",
- lock->type, lock->convert_type);
- status = dlm_do_bast(dlm, res, lock, past->blocked_type);
- dlmprintk("bast: after... type=%d, convert_type=%d\n",
- lock->type, lock->convert_type);
- }
-
- if (status < 0)
- dlmprintk("eeek: ast/bast returned %d\n", status);
-
- spin_unlock(&res->spinlock);
- if (!dlm_is_recovery_lock(past->name, past->namelen))
- up_read(&dlm->recovery_sem);
-
-leave:
- return ret;
-}
-
-static int dlm_send_proxy_ast(dlm_ctxt *dlm, dlm_lock_resource *res,
- dlm_lock *lock, int type, int blocked_type)
-{
- int ret = 0;
- dlm_proxy_ast past;
- struct inode *inode = NULL;
- struct iovec iov[2];
- size_t iovlen = 1;
-
- dlmprintk("res %*s, to=%u, type=%d, blocked_type=%d\n",
- res->lockname.len, res->lockname.name, lock->node,
- type, blocked_type);
-
-
- memset(&past, 0, sizeof(dlm_proxy_ast));
- past.node_idx = dlm->group_index;
- past.type = type;
- past.blocked_type = blocked_type;
- past.namelen = res->lockname.len;
- strncpy(past.name, res->lockname.name, past.namelen);
- past.cookie = lock->cookie;
-
- iov[0].iov_len = sizeof(dlm_proxy_ast);
- iov[0].iov_base = &past;
- if (lock->lksb->flags & DLM_LKSB_GET_LVB) {
- past.flags |= LKM_GET_LVB;
- iov[1].iov_len = DLM_LVB_LEN;
- iov[1].iov_base = lock->lksb->lvb;
- iovlen++;
- }
-
- ret = -EINVAL;
- inode = nm_get_group_node_by_index(dlm->group, lock->node);
- if (inode) {
- dlm_proxy_ast_to_net(&past);
- ret = net_send_message_iov(DLM_PROXY_AST_MSG, dlm->key,
- iov, iovlen, inode, NULL);
- iput(inode);
- }
- if (ret < 0)
- dlmprintk("(%d) dlm_send_proxy_ast: returning %d\n",
- current->pid, ret);
- return ret;
-}
-
-
Copied: trunk/fs/ocfs2/cluster/dlmast.c (from rev 1811, trunk/cluster/dlmast.c)
Deleted: trunk/fs/ocfs2/cluster/dlmcommon.h
===================================================================
--- trunk/cluster/dlmcommon.h 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/dlmcommon.h 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,52 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlmcommon.h
- *
- * Common stuff
- *
- * Copyright (C) 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel
- */
-
-#ifndef CLUSTER_DLMCOMMON_H
-#define CLUSTER_DLMCOMMON_H
-
-#define DLM_ASSERT(x) ({ if (!(x)) { printk("assert failed! %s:%d\n", __FILE__, __LINE__); BUG(); } })
-
-typedef struct _nm_ctxt nm_ctxt;
-typedef struct _dlm_ctxt dlm_ctxt;
-typedef struct _heartbeat_ctxt heartbeat_ctxt;
-
-#define CLUSTER_DISK_UUID_LEN 32 // 16 byte binary == 32 char hex string
-
-typedef struct _cluster_disk
-{
- // uuid of disk
- char uuid[CLUSTER_DISK_UUID_LEN+1];
- // all the rest are for heartbeat
- dev_t dev;
- u32 blocksize_bits;
- u32 num_blocks;
- u64 start_block;
- util_rarray slots;
-} cluster_disk;
-
-
-#endif /* CLUSTER_DLMCOMMON_H */
Copied: trunk/fs/ocfs2/cluster/dlmcommon.h (from rev 1811, trunk/cluster/dlmcommon.h)
Deleted: trunk/fs/ocfs2/cluster/dlmconvert.c
===================================================================
--- trunk/cluster/dlmconvert.c 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/dlmconvert.c 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,457 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlmconvert.c
- *
- * underlying calls for lock conversion
- *
- * Copyright (C) 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel
- */
-
-#include "warning_hack.h"
-
-#include "dlm_compat.h"
-#include "util.h"
-#include "dlmcommon.h"
-
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/highmem.h>
-#include <linux/utsname.h>
-#include <linux/init.h>
-#include <linux/sysctl.h>
-#include <linux/random.h>
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#include <linux/statfs.h>
-#include <linux/moduleparam.h>
-#endif
-#include <linux/blkdev.h>
-#include <linux/socket.h>
-#include <linux/inet.h>
-#include <linux/spinlock.h>
-
-
-#include "heartbeat.h"
-#include "nodemanager.h"
-#include "tcp.h"
-#include "dlmmod.h"
-
-
-/* NOTE: __dlmconvert_master is the only function in here that
- * needs a spinlock held on entry (res->spinlock) and it is the
- * only one that holds a lock on exit (res->spinlock).
- * All other functions in here need no locks and drop all of
- * the locks that they acquire. */
-static dlm_status __dlmconvert_master(dlm_ctxt *dlm, dlm_lock_resource *res,
- dlm_lock *lock, int flags, int type,
- int *call_ast, int *kick_thread);
-static dlm_status dlm_send_remote_convert_request(dlm_ctxt *dlm,
- dlm_lock_resource *res,
- dlm_lock *lock, int flags, int type);
-
-/*
- * locking:
- * caller needs: none
- * taken: takes and drops res->spinlock
- * held on exit: none
- * returns: see __dlmconvert_master
- */
-dlm_status dlmconvert_master(dlm_ctxt *dlm, dlm_lock_resource *res,
- dlm_lock *lock, int flags, int type)
-{
- int call_ast = 0, kick_thread = 0;
- dlm_status status;
-
-#warning i think i need some IN_PROGRESS work here
- spin_lock(&res->spinlock);
- status = __dlmconvert_master(dlm, res, lock, flags, type,
- &call_ast, &kick_thread);
- spin_unlock(&res->spinlock);
-
-#warning fix all ast calling!!!
- if (call_ast)
- if (dlm_do_ast(dlm, res, lock) < 0)
- dlmprintk0("eek\n");
-
- if (kick_thread)
- dlm_kick_thread(dlm, res);
-
- return status;
-}
-
-/* performs lock conversion at the lockres master site
- * locking:
- * caller needs: res->spinlock
- * taken: takes and drops lock->spinlock
- * held on exit: res->spinlock
- * returns: DLM_NORMAL, DLM_NOTQUEUED, DLM_DENIED
- * call_ast: whether ast should be called for this lock
- * kick_thread: whether dlm_kick_thread should be called
- */
-static dlm_status __dlmconvert_master(dlm_ctxt *dlm, dlm_lock_resource *res,
- dlm_lock *lock, int flags, int type,
- int *call_ast, int *kick_thread)
-{
- dlm_status status = DLM_NORMAL;
- struct list_head *iter;
- dlm_lock *tmplock=NULL;
-
- dlmprintk("type=%d, convert_type=%d, new convert_type=%d\n", lock->type,
- lock->convert_type, type);
-
- spin_lock(&lock->spinlock);
-
- /* already converting? */
- if (lock->convert_type != LKM_IVMODE) {
- dlmprintk0("attempted to convert a lock with a lock conversion "
- "pending\n");
- status = DLM_DENIED;
- goto unlock_exit;
- }
-
- /* must be on grant queue to convert */
- if (!dlm_lock_on_list(&res->granted, lock)) {
- dlmprintk0("attempted to convert a lock not on grant queue\n");
- status = DLM_DENIED;
- goto unlock_exit;
- }
-
- if (flags & LKM_VALBLK) {
- switch (lock->type) {
- case LKM_EXMODE:
- /* EX + LKM_VALBLK + convert == set lvb */
- dlmprintk("will set lvb: converting %s->%s\n",
- dlm_lock_mode_name(lock->type),
- dlm_lock_mode_name(type));
- lock->lksb->flags |= DLM_LKSB_PUT_LVB;
- break;
- case LKM_PRMODE:
- case LKM_NLMODE:
- /* refetch if new level is not NL */
- if (type > LKM_NLMODE) {
- dlmprintk("will fetch new value into "
- "lvb: converting %s->%s\n",
- dlm_lock_mode_name(lock->type),
- dlm_lock_mode_name(type));
- lock->lksb->flags |= DLM_LKSB_GET_LVB;
- } else {
- dlmprintk("will NOT fetch new value "
- "into lvb: converting "
- "%s->%s\n",
- dlm_lock_mode_name(lock->type),
- dlm_lock_mode_name(type));
- flags &= ~(LKM_VALBLK);
- }
- break;
- }
- }
-
-
- /* in-place downconvert? */
- if (type <= lock->type)
- goto grant;
-
- /* upconvert from here on */
- status = DLM_NORMAL;
- list_for_each(iter, &res->granted) {
- tmplock = list_entry(iter, dlm_lock, list);
- if (tmplock == lock)
- continue;
- if (!dlm_lock_compatible(tmplock->type, type))
- goto switch_queues;
- }
-
- list_for_each(iter, &res->converting) {
- tmplock = list_entry(iter, dlm_lock, list);
- if (!dlm_lock_compatible(tmplock->type, type))
- goto switch_queues;
- /* existing conversion requests take precedence */
- if (!dlm_lock_compatible(tmplock->convert_type, type))
- goto switch_queues;
- }
-
- /* fall thru to grant */
-
-grant:
- dlmprintk("res %*s, granting %s lock\n", res->lockname.len,
- res->lockname.name, dlm_lock_mode_name(type));
- /* immediately grant the new lock type */
- lock->lksb->status = DLM_NORMAL;
- if (lock->node == dlm->group_index)
- dlmprintk0("doing in-place convert for nonlocal lock\n");
- lock->type = type;
- status = DLM_NORMAL;
- *call_ast = 1;
- goto unlock_exit;
-
-switch_queues:
- if (flags & LKM_NOQUEUE) {
- dlmprintk("failed to convert NOQUEUE lock %*s from "
- "%d to %d...\n", res->lockname.len,
- res->lockname.name, lock->type, type);
- status = DLM_NOTQUEUED;
- goto unlock_exit;
- }
- dlmprintk("res %*s, queueing...\n", res->lockname.len,
- res->lockname.name);
-
- lock->convert_type = type;
- list_del(&lock->list);
- list_add_tail(&lock->list, &res->converting);
-
-unlock_exit:
- spin_unlock(&lock->spinlock);
- if (status == DLM_NORMAL)
- *kick_thread = 1;
- return status;
-}
-
-/* messages the master site to do lock conversion
- * locking:
- * caller needs: none
- * taken: takes and drops res->spinlock, uses DLM_LOCK_RES_IN_PROGRESS
- * held on exit: none
- * returns: DLM_NORMAL, DLM_RECOVERING, status from remote node
- */
-dlm_status dlmconvert_remote(dlm_ctxt *dlm, dlm_lock_resource *res,
- dlm_lock *lock, int flags, int type)
-{
- dlm_status status;
-
- dlmprintk("type=%d, convert_type=%d, busy=%d\n", lock->type,
- lock->convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS);
-
- spin_lock(&res->spinlock);
- if (res->state & DLM_LOCK_RES_RECOVERING) {
- status = DLM_RECOVERING;
- goto bail;
- }
- /* will exit this call with spinlock held */
- __dlm_wait_on_lockres(res);
-
- res->state |= DLM_LOCK_RES_IN_PROGRESS;
-
- /* move lock to local convert queue */
- list_del(&lock->list);
- list_add_tail(&lock->list, &res->converting);
- if (lock->convert_type != LKM_IVMODE) {
- dlmprintk0("error! converting a remote lock that is already "
- "converting!\n");
- /* TODO: return correct error */
- BUG();
- }
- lock->convert_type = type;
-
- if (flags & LKM_VALBLK) {
- if (lock->type == LKM_EXMODE) {
- flags |= LKM_PUT_LVB;
- lock->lksb->flags |= DLM_LKSB_PUT_LVB;
- } else {
- if (lock->convert_type == LKM_NLMODE) {
- dlmprintk0("erm, no point in specifying "
- "LKM_VALBLK if converting to NL\n");
- flags &= ~LKM_VALBLK;
- } else {
- flags |= LKM_GET_LVB;
- lock->lksb->flags |= DLM_LKSB_GET_LVB;
- }
- }
- }
- spin_unlock(&res->spinlock);
-
- /* no locks held here.
- * need to wait for a reply as to whether it got queued or not. */
- status = dlm_send_remote_convert_request(dlm, res, lock, flags, type);
-
- spin_lock(&res->spinlock);
- res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
-
- /* if it failed, move it back to granted queue */
- if (status != DLM_NORMAL) {
- list_del(&lock->list);
- list_add_tail(&lock->list, &res->granted);
- lock->convert_type = LKM_IVMODE;
- lock->lksb->flags &= ~(DLM_LKSB_GET_LVB|DLM_LKSB_PUT_LVB);
- }
-bail:
- spin_unlock(&res->spinlock);
-
- /* TODO: should this be a wake_one? */
- /* wake up any IN_PROGRESS waiters */
- wake_up(&res->wq);
-
- return status;
-}
-
-/* sends DLM_CONVERT_LOCK_MSG to master site
- * locking:
- * caller needs: none
- * taken: none
- * held on exit: none
- * returns: DLM_NOLOCKMGR, status from remote node
- */
-static dlm_status dlm_send_remote_convert_request(dlm_ctxt *dlm,
- dlm_lock_resource *res,
- dlm_lock *lock, int flags, int type)
-{
- struct inode *inode = NULL;
- dlm_convert_lock convert;
- int tmpret;
- dlm_status ret;
- int status = 0;
- struct iovec iov[2];
- size_t iovlen = 1;
-
- dlmprintk0("\n");
-
- memset(&convert, 0, sizeof(dlm_convert_lock));
- convert.node_idx = dlm->group_index;
- convert.requested_type = type;
- convert.cookie = lock->cookie;
- convert.namelen = res->lockname.len;
- convert.flags = flags;
- strncpy(convert.name, res->lockname.name, convert.namelen);
-
- iov[0].iov_len = sizeof(dlm_convert_lock);
- iov[0].iov_base = &convert;
-
- if (flags & LKM_PUT_LVB) {
- /* extra data to send if we are updating lvb */
- iov[1].iov_len = DLM_LVB_LEN;
- iov[1].iov_base = lock->lksb->lvb;
- iovlen++;
- }
-
- ret = DLM_NOLOCKMGR;
- inode = nm_get_group_node_by_index(dlm->group, res->owner);
- if (inode) {
- dlm_convert_lock_to_net(&convert);
- tmpret = net_send_message_iov(DLM_CONVERT_LOCK_MSG, dlm->key,
- iov, iovlen, inode, &status);
- if (tmpret >= 0) {
- // successfully sent and received
- ret = status; // this is already a dlm_status
- } else {
- dlmprintk("error occurred in net_send_message: %d\n",
- tmpret);
- ret = dlm_err_to_dlm_status(tmpret);
- }
- iput(inode);
- }
-
- return ret;
-}
-
-
-/* handler for DLM_CONVERT_LOCK_MSG on master site
- * locking:
- * caller needs: none
- * taken: takes and drop res->spinlock
- * held on exit: none
- * returns: DLM_NORMAL, DLM_IVLOCKID, DLM_BADARGS,
- * status from __dlmconvert_master
- */
-int dlm_convert_lock_handler(net_msg *msg, u32 len, void *data)
-{
- dlm_ctxt *dlm = data;
- dlm_convert_lock *cnv = (dlm_convert_lock *)msg->buf;
- dlm_lock_resource *res = NULL;
- struct list_head *iter;
- dlm_lock *lock = NULL;
- dlm_lockstatus *lksb;
- dlm_status status = DLM_NORMAL;
- struct qstr lockname;
- u32 flags;
- int call_ast = 0, kick_thread = 0;
- int found = 0;
-
- dlm_convert_lock_to_host(cnv);
- lockname.name = cnv->name;
- lockname.len = cnv->namelen;
- flags = cnv->flags;
-
- if ((flags & (LKM_PUT_LVB|LKM_GET_LVB)) ==
- (LKM_PUT_LVB|LKM_GET_LVB)) {
- dlmprintk("both PUT and GET lvb specified\n");
- status = DLM_BADARGS;
- goto leave;
- }
-
- dlmprintk("lvb: %s\n", flags & LKM_PUT_LVB ? "put lvb" :
- (flags & LKM_GET_LVB ? "get lvb" : "none"));
-
- lockname.hash = full_name_hash(lockname.name, lockname.len);
-
- status = DLM_IVLOCKID;
- res = dlm_lookup_lock(dlm, &lockname);
- if (!res)
- goto leave;
-
- spin_lock(&res->spinlock);
- list_for_each(iter, &res->granted) {
- lock = list_entry(iter, dlm_lock, list);
- if (lock->cookie == cnv->cookie &&
- lock->node == cnv->node_idx) {
- found = 1;
- break;
- }
- }
- spin_unlock(&res->spinlock);
- if (!found)
- goto leave;
-
- /* found the lock */
- lksb = lock->lksb;
-
- /* see if caller needed to get/put lvb */
- if (flags & LKM_PUT_LVB) {
- DLM_ASSERT(!(lksb->flags &
- (DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB)));
- lksb->flags |= DLM_LKSB_PUT_LVB;
- memcpy(&lksb->lvb[0], &cnv->lvb[0], DLM_LVB_LEN);
- } else if (flags & LKM_GET_LVB) {
- DLM_ASSERT(!(lksb->flags &
- (DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB)));
- lksb->flags |= DLM_LKSB_GET_LVB;
- }
-
-#warning i think we need some handling of IN_PROGRESS here!
- status = __dlmconvert_master(dlm, res, lock, flags, cnv->requested_type,
- &call_ast, &kick_thread);
-
- if (status != DLM_NORMAL)
- lksb->flags &= ~(DLM_LKSB_GET_LVB|DLM_LKSB_PUT_LVB);
-
-leave:
- if (!lock)
- dlmprintk("did not find lock to convert on "
- "grant queue! cookie=%llu\n", cnv->cookie);
-
-#warning fix all ast calling!!!
- if (call_ast)
- if (dlm_do_ast(dlm, res, lock) < 0)
- dlmprintk0("eek\n");
- if (kick_thread)
- dlm_kick_thread(dlm, res);
-
- return status;
-}
Copied: trunk/fs/ocfs2/cluster/dlmconvert.c (from rev 1811, trunk/cluster/dlmconvert.c)
Deleted: trunk/fs/ocfs2/cluster/dlmlock.c
===================================================================
--- trunk/cluster/dlmlock.c 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/dlmlock.c 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,334 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlmlock.c
- *
- * underlying calls for lock creation
- *
- * Copyright (C) 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel
- */
-
-#include "warning_hack.h"
-
-#include "dlm_compat.h"
-#include "util.h"
-#include "dlmcommon.h"
-
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/highmem.h>
-#include <linux/utsname.h>
-#include <linux/init.h>
-#include <linux/sysctl.h>
-#include <linux/random.h>
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#include <linux/statfs.h>
-#include <linux/moduleparam.h>
-#endif
-#include <linux/blkdev.h>
-#include <linux/socket.h>
-#include <linux/inet.h>
-#include <linux/spinlock.h>
-
-
-#include "heartbeat.h"
-#include "nodemanager.h"
-#include "tcp.h"
-#include "dlmmod.h"
-
-static dlm_status dlm_send_remote_lock_request(dlm_ctxt *dlm,
- dlm_lock_resource *res,
- dlm_lock *lock, int flags);
-
-/* performs lock creation at the lockres master site
- * locking:
- * caller needs: none
- * taken: takes and drops res->spinlock
- * held on exit: none
- * returns: DLM_NORMAL, DLM_NOTQUEUED
- */
-dlm_status dlmlock_master(dlm_ctxt *dlm, dlm_lock_resource *res,
- dlm_lock *lock, int flags)
-{
- struct list_head *iter;
- dlm_lock *tmplock;
- int call_ast = 0;
- dlm_status status = DLM_NORMAL;
-
- DLM_ASSERT(lock);
- DLM_ASSERT(res);
- DLM_ASSERT(dlm);
- DLM_ASSERT(lock->lksb);
-
- dlmprintk("type=%d\n", lock->type);
-
- /* this will effectively spin_lock(&res->spinlock) */
- dlm_wait_on_lockres(res);
- res->state |= DLM_LOCK_RES_IN_PROGRESS;
-
- /* for NOQUEUE request, unless we get
- * lock right away, return DLM_NOTQUEUED */
- if (flags & LKM_NOQUEUE)
- status = DLM_NOTQUEUED;
-
- list_for_each(iter, &res->granted) {
- tmplock = list_entry(iter, dlm_lock, list);
- if (!dlm_lock_compatible(tmplock->type, lock->type)) {
- list_add_tail(&lock->list, &res->blocked);
- goto done;
- }
- }
-
- list_for_each(iter, &res->converting) {
- tmplock = list_entry(iter, dlm_lock, list);
- if (!dlm_lock_compatible(tmplock->type, lock->type)) {
- list_add_tail(&lock->list, &res->blocked);
- goto done;
- }
- }
-
- /* got it right away */
- lock->lksb->status = DLM_NORMAL;
- status = DLM_NORMAL;
- list_add_tail(&lock->list, &res->granted);
- call_ast = 1;
-
-done:
- res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
- spin_unlock(&res->spinlock);
- wake_up(&res->wq);
-
- dlm_kick_thread(dlm, res);
-
- if (call_ast) {
-#warning fix all ast calling!!!
- if (dlm_do_ast(dlm, res, lock) < 0)
- dlmprintk0("eek\n");
- }
-
- return status;
-}
-
-/*
- * locking:
- * caller needs: none
- * taken: takes and drops res->spinlock
- * held on exit: none
- * returns: DLM_DENIED, DLM_RECOVERING, or net status
- */
-dlm_status dlmlock_remote(dlm_ctxt *dlm, dlm_lock_resource *res,
- dlm_lock *lock, int flags)
-{
- dlm_status status = DLM_DENIED;
-
- dlmprintk("type=%d\n", lock->type);
-
- spin_lock(&res->spinlock);
- if (res->state & DLM_LOCK_RES_RECOVERING) {
- status = DLM_RECOVERING;
- goto bail;
- }
-
- /* will exit this call with spinlock held */
- __dlm_wait_on_lockres(res);
- res->state |= DLM_LOCK_RES_IN_PROGRESS;
- /* add lock to local (secondary) queue */
- list_add_tail(&lock->list, &res->blocked);
- spin_unlock(&res->spinlock);
-
- /* spec seems to say that you will get DLM_NORMAL when the lock
- * has been queued, meaning we need to wait for a reply here. */
- status = dlm_send_remote_lock_request(dlm, res, lock, flags);
-
- spin_lock(&res->spinlock);
- res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
- if (status != DLM_NORMAL) {
- /* remove from local queue if it failed */
- list_del(&lock->list);
- }
-bail:
- spin_unlock(&res->spinlock);
- wake_up(&res->wq);
- return status;
-}
-
-
-/* for remote lock creation.
- * locking:
- * caller needs: none, but need res->state & DLM_LOCK_RES_IN_PROGRESS
- * taken: none
- * held on exit: none
- * returns: DLM_NOLOCKMGR, or net status
- */
-static dlm_status dlm_send_remote_lock_request(dlm_ctxt *dlm,
- dlm_lock_resource *res,
- dlm_lock *lock, int flags)
-{
- struct inode *inode = NULL;
- dlm_create_lock create;
- int tmpret, status = 0;
- dlm_status ret;
-
- dlmprintk0("\n");
-
- memset(&create, 0, sizeof(create));
- create.node_idx = dlm->group_index;
- create.requested_type = lock->type;
- create.cookie = lock->cookie;
- create.namelen = res->lockname.len;
- create.flags = flags;
- strncpy(create.name, res->lockname.name, create.namelen);
-
- ret = DLM_NOLOCKMGR;
- inode = nm_get_group_node_by_index(dlm->group, res->owner);
- if (inode) {
- dlm_create_lock_to_net(&create);
- tmpret = net_send_message(DLM_CREATE_LOCK_MSG, dlm->key,
- &create, sizeof(create),
- inode, &status);
- if (tmpret >= 0) {
- // successfully sent and received
- ret = status; // this is already a dlm_status
- } else {
- dlmprintk("error occurred in net_send_message: %d\n",
- tmpret);
- ret = dlm_err_to_dlm_status(tmpret);
- }
- iput(inode);
- }
-
- return ret;
-}
-
-/* handler for lock creation net message
- * locking:
- * caller needs: none
- * taken: takes and drops res->spinlock
- * held on exit: none
- * returns: DLM_NORMAL, DLM_SYSERR, DLM_IVLOCKID, DLM_NOTQUEUED
- */
-int dlm_create_lock_handler(net_msg *msg, u32 len, void *data)
-{
- dlm_ctxt *dlm = data;
- dlm_create_lock *create = (dlm_create_lock *)msg->buf;
- dlm_lock_resource *res;
- dlm_lock *newlock = NULL, *tmplock;
- dlm_lockstatus *lksb = NULL;
- dlm_status status = DLM_NORMAL;
- struct qstr lockname;
- struct list_head *iter;
- int call_ast = 0;
-
- DLM_ASSERT(dlm);
-
- dlm_create_lock_to_host(create);
- lockname.name = create->name;
- lockname.len = create->namelen;
-
- dlmprintk0("\n");
-
- lockname.hash = full_name_hash(lockname.name, lockname.len);
-
- status = DLM_SYSERR;
- newlock = kmalloc(sizeof(dlm_lock), GFP_KERNEL);
- if (!newlock)
- goto leave;
-
- lksb = kmalloc(sizeof(dlm_lockstatus), GFP_KERNEL);
- if (!lksb)
- goto leave;
-
- memset(newlock, 0, sizeof(dlm_lock));
- INIT_LIST_HEAD(&newlock->list);
- INIT_LIST_HEAD(&newlock->ast_list);
- spin_lock_init(&newlock->spinlock);
- newlock->type = create->requested_type;
- newlock->convert_type = LKM_IVMODE;
- newlock->highest_blocked = LKM_IVMODE;
- newlock->node = create->node_idx;
- newlock->ast = NULL;
- newlock->bast = NULL;
- newlock->astdata = NULL;
- newlock->cookie = create->cookie;
-
- memset(lksb, 0, sizeof(dlm_lockstatus));
- newlock->lksb = lksb;
- lksb->lockid = newlock;
- lksb->flags |= DLM_LKSB_KERNEL_ALLOCATED;
-
- status = DLM_IVLOCKID;
- res = dlm_lookup_lock(dlm, &lockname);
- if (!res)
- goto leave;
-
- /* found lock resource */
- status = DLM_NORMAL;
- spin_lock(&res->spinlock);
- newlock->lockres = res;
-
- /* for NOQUEUE request, unless we get
- * lock right away, return DLM_NOTQUEUED */
- if (create->flags & LKM_NOQUEUE)
- status = DLM_NOTQUEUED;
-
- /* see if any granted locks are blocking us */
- list_for_each(iter, &res->granted) {
- tmplock = list_entry(iter, dlm_lock, list);
- if (!dlm_lock_compatible(tmplock->type, newlock->type)) {
- list_add_tail(&newlock->list, &res->blocked);
- goto blocked;
- }
- }
- list_for_each(iter, &res->converting) {
- tmplock = list_entry(iter, dlm_lock, list);
- if (!dlm_lock_compatible(tmplock->type, newlock->type)){
- list_add_tail(&newlock->list, &res->blocked);
- goto blocked;
- }
- }
-
- /* got it right away */
- newlock->lksb->status = DLM_NORMAL;
- status = DLM_NORMAL;
- list_add_tail(&newlock->list, &res->granted);
- call_ast = 1;
-
-blocked:
- spin_unlock(&res->spinlock);
-
-#warning fix all ast calling!!!
- if (call_ast)
- if (dlm_do_ast(dlm, res, newlock) < 0)
- dlmprintk0("eek\n");
-
- dlm_kick_thread(dlm, res);
-
-leave:
- if (status != DLM_NORMAL) {
- if (newlock)
- kfree(newlock);
- if (lksb)
- kfree(lksb);
- }
-
- return status;
-}
Copied: trunk/fs/ocfs2/cluster/dlmlock.c (from rev 1811, trunk/cluster/dlmlock.c)
Deleted: trunk/fs/ocfs2/cluster/dlmmaster.c
===================================================================
--- trunk/cluster/dlmmaster.c 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/dlmmaster.c 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,1071 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlmmod.c
- *
- * standalone DLM module
- *
- * Copyright (C) 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel
- */
-
-#include "warning_hack.h"
-
-#include "dlm_compat.h"
-#include "util.h"
-#include "dlmcommon.h"
-
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/highmem.h>
-#include <linux/utsname.h>
-#include <linux/init.h>
-#include <linux/sysctl.h>
-#include <linux/random.h>
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#include <linux/statfs.h>
-#include <linux/moduleparam.h>
-#endif
-#include <linux/blkdev.h>
-#include <linux/socket.h>
-#include <linux/inet.h>
-#include <linux/spinlock.h>
-
-
-#include "heartbeat.h"
-#include "nodemanager.h"
-#include "tcp.h"
-#include "dlmmod.h"
-
-spinlock_t dlm_master_lock = SPIN_LOCK_UNLOCKED;
-LIST_HEAD(dlm_master_list);
-
-/* gives a really vague idea of the system load */
-atomic_t dlm_num_resources = ATOMIC_INIT(0);
-
-
-static int dlm_init_mle(dlm_master_list_entry *mle, int type, dlm_ctxt *dlm,
- dlm_lock_resource *res, struct qstr *name, int locked);
-static void dlm_put_mle(dlm_master_list_entry *mle);
-static int dlm_do_master_request_resp(dlm_ctxt *dlm, struct qstr *name,
- int response, int to);
-static int dlm_do_master_request(dlm_master_list_entry *mle, int to);
-static int dlm_do_assert_master(dlm_master_list_entry *mle);
-static void dlm_mle_node_up(struct inode *group, struct inode *node,
- int idx, void *data);
-static void dlm_mle_node_down(struct inode *group, struct inode *node,
- int idx, void *data);
-
-
-
-
-/* remove from list and free */
-static void dlm_put_mle(dlm_master_list_entry *mle)
-{
- if (atomic_dec_and_lock(&mle->refcnt, &dlm_master_lock)) {
- list_del(&mle->list);
- spin_unlock(&dlm_master_lock);
- hb_unregister_callback(HB_NODE_DOWN_CB, dlm_mle_node_down, mle);
- hb_unregister_callback(HB_NODE_UP_CB, dlm_mle_node_up, mle);
- kfree(mle);
- }
-}
-
-static inline void dlm_get_mle(dlm_master_list_entry *mle)
-{
- atomic_inc(&mle->refcnt);
-}
-
-
-
-static int dlm_init_mle(dlm_master_list_entry *mle, int type, dlm_ctxt *dlm,
- dlm_lock_resource *res, struct qstr *name, int locked)
-{
- int ret = 0;
-
- mle->dlm = dlm;
- mle->type = type;
- INIT_LIST_HEAD(&mle->list);
- memset(mle->maybe_map, 0, sizeof(mle->maybe_map));
- spin_lock_init(&mle->spinlock);
- init_waitqueue_head(&mle->wq);
- atomic_set(&mle->woken, 0);
- atomic_set(&mle->refcnt, 1);
- memset(mle->response_map, 0, sizeof(mle->response_map));
- mle->master = NM_MAX_NODES;
- mle->error = 0;
-
- if (mle->type == DLM_MLE_MASTER)
- mle->u.res = res;
- else
- strncpy(mle->u.name.name, name->name, name->len);
-
- if (!locked)
- spin_lock(&dlm->spinlock);
-
- /* copy off the node_map and register hb callbacks on our copy */
- memcpy(mle->node_map, dlm->node_map, sizeof(mle->node_map));
- memcpy(mle->vote_map, dlm->node_map, sizeof(mle->vote_map));
- clear_bit(dlm->group_index, mle->vote_map);
- clear_bit(dlm->group_index, mle->node_map);
-
-#warning cannot do this here cuz this kmallocs and we are under a spinlock
- if (hb_register_callback(HB_NODE_DOWN_CB, dlm_mle_node_down, mle,
- DLM_HB_NODE_DOWN_PRI+1)
- ||
- hb_register_callback(HB_NODE_UP_CB, dlm_mle_node_up, mle,
- DLM_HB_NODE_UP_PRI+1)) {
- ret = -EINVAL;
- }
-
- if (!locked)
- spin_unlock(&dlm->spinlock);
-
- return ret;
-}
-
-
-
-
-/////////////////////////////////////////////////
-//
-// TODO: change these comments to reflect reality
-//
-// master_request(target=me)
-// wait for all responses
-// if maybe_map is 0 there are no others in progress
-// assert_master(me)
-// else (maybe_map has some nodes in it)
-// (nodes in maybe_map had better be < my node num)
-// wait for assert_master
-// endif
-//
-//
-// receive:
-// master_request(target):
-// if i own it, return YES
-// if i dont know anything about it, return NO
-// if i have it in progress
-// if my node number is lower
-// return MAYBE
-// else
-// if target < lowest_so_far, lowest_so_far=target
-// return NO
-//
-// assert_master(master):
-// if i own it, BUG()!!!
-// if i have it, but owner!=master, BUG()!!!
-// if i dont know anything about it, ignore
-// if i have it in progress
-// if lowest_so_far != master
-// BUG()!!!
-// else
-// set the owner, DONE
-//
-/////////////////////////////////////////////////
-
-
-
-
-/*
- * lookup a lock resource by name.
- * may already exist in the hashtable.
- *
- * if not, allocate enough for the lockres and for
- * the temporary structure used in doing the mastering.
- *
- * also, do a lookup in the dlm_master_list to see
- * if another node has begun mastering the same lock.
- * if so, there should be a block entry in there
- * for this name, and we should *not* attempt to master
- * the lock here. need to wait around for that node
- * to assert_master (or die).
- *
- */
-dlm_lock_resource * dlm_get_lock_resource(dlm_ctxt *dlm,
- struct qstr *lockname, int flags)
-{
- dlm_lock_resource *tmpres=NULL, *res=NULL;
- struct list_head *bucket;
- dlm_master_list_entry *mle = NULL, *tmpmle = NULL;
- struct list_head *iter;
- int blocked = 0;
- int map_changed = 0, restart = 0, assert = 0;
- int ret, start, bit;
-
- bucket = &(dlm->resources[lockname->hash & DLM_HASH_MASK]);
-
- /* pre-allocate a dlm_lock_resource and master stuff */
- mle = kmalloc(sizeof(dlm_master_list_entry), GFP_KERNEL);
- res = kmalloc(sizeof(dlm_lock_resource), GFP_KERNEL);
- if (!mle || !res) {
- dlmprintk0("could not allocate memory for new lock resource\n");
- if (mle)
- kfree(mle);
- if (res)
- kfree(res);
- res = NULL;
- goto leave;
- }
-
- /* check for pre-existing lock */
- spin_lock(&dlm->spinlock);
- tmpres = __dlm_lookup_lock(dlm, lockname);
- if (tmpres) {
- spin_unlock(&dlm->spinlock);
- /* TODO: return error, or return the lockres ?!? */
- kfree(res);
- kfree(mle);
- res = tmpres;
- goto leave;
- }
-
- dlm_init_lockres(res, lockname);
-
- if (flags & LKM_LOCAL) {
- /* caller knows it's safe to assume it's not mastered elsewhere
- * DONE! return right away */
- list_add_tail(&res->list, bucket);
- res->owner = dlm->group_index;
- atomic_inc(&dlm_num_resources);
- spin_unlock(&dlm->spinlock);
- /* lockres still marked IN_PROGRESS */
- goto wake_waiters;
- }
-
- /* check master list to see if another node has started mastering it */
- spin_lock(&dlm_master_lock);
- list_for_each(iter, &dlm_master_list) {
- tmpmle = list_entry(iter, dlm_master_list_entry, list);
- if (!dlm_mle_equal(dlm, tmpmle, lockname))
- continue;
-
- if (tmpmle->type == DLM_MLE_MASTER) {
- dlmprintk0("eek! master entry for nonexistent lock!\n");
- BUG();
- }
- dlm_get_mle(tmpmle);
- blocked = 1;
- // found a block, wait for lock to be mastered by another node
- break;
- }
-
- if (!blocked) {
- /* go ahead and try to master lock on this node */
- if (dlm_init_mle(mle, DLM_MLE_MASTER, dlm, res, NULL, 1)) {
- dlmprintk0("bug! failed to register hb callbacks\n");
- BUG();
- }
- list_add(&mle->list, &dlm_master_list);
- }
- spin_unlock(&dlm_master_lock);
-
- /* at this point there is either a DLM_MLE_BLOCK or a DLM_MLE_MASTER
- * on the master list, so it's safe to add the lockres to the hashtable.
- * anyone who finds the lock will still have to wait on the IN_PROGRESS.
- * also, any new nodes that try to join at this point will have to wait
- * until my dlm_master_lock list is empty, so they cannot possibly
- * do any master requests yet... TODO
- * ?? should i have a special type of mle just for joining nodes ??
- * ?? could allow them to come in and put their mle
- * on the list and sleep ?? */
-
- /* finally add the lockres to its hash bucket */
- list_add_tail(&res->list, bucket);
- atomic_inc(&dlm_num_resources);
- spin_unlock(&dlm->spinlock);
-
- if (blocked) {
- /* must wait for lock to be mastered elsewhere */
- kfree(mle);
- mle = tmpmle;
- goto wait;
- }
-
- ret = -EINVAL;
- start = 0;
- while (1) {
- bit = find_next_bit (mle->vote_map, NM_MAX_NODES, start);
- if (bit >= NM_MAX_NODES) {
- dlmprintk0("no more nodes\n");
- break;
- }
-
- ret = dlm_do_master_request(mle, bit);
- if (ret < 0) {
- // TODO
- //dlmprintk("dlm_do_master_request returned %d\n", ret);
- }
- if (mle->master != NM_MAX_NODES) {
- // found a master!
- break;
- }
- start = bit+1;
- }
-
-wait:
- while (1) {
- spin_lock(&res->spinlock);
- if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
- // another node has become the owner
- spin_unlock(&res->spinlock);
- break;
- }
- spin_unlock(&res->spinlock);
-
- spin_lock(&mle->spinlock);
- if (mle->master != NM_MAX_NODES) {
- u16 m = mle->master;
- // dlmprintk("node %u is the master!\n", m);
- spin_unlock(&mle->spinlock);
-
- spin_lock(&res->spinlock);
- res->owner = m;
- spin_unlock(&res->spinlock);
- break;
- }
- restart = 0;
- map_changed = (memcmp(mle->vote_map, mle->node_map,
- sizeof(mle->vote_map)) != 0);
- if (memcmp(mle->vote_map, mle->response_map,
- sizeof(mle->vote_map)) == 0) {
- // dlmprintk("every node has responded...\n");
- if (map_changed) {
- dlmprintk0("eek! got all original nodes, but "
- "nodemap changed while collecting "
- "responses\n");
- restart = 1;
- }
-
- if (mle->error) {
- dlmprintk0("ugh. some node hit an error "
- "(-ENOMEM). try the whole thing "
- "again\n");
- mle->error = 0;
- /* TODO: treat this just like the dead node
- * case below, cleanup and start over, but
- * keep the error node around */
- restart = 1;
- }
-
- bit = find_next_bit(mle->maybe_map, NM_MAX_NODES, 0);
- if (bit >= NM_MAX_NODES) {
- /* No other nodes are in-progress. Those nodes
- * should all be locking out this lockid until
- * I assert. They should have put a dummy entry
- * on dlm_master_list. Need to assert myself as
- * the master. */
- // dlmprintk0("I am the only node in-progress!"
- // " asserting myself as master\n");
- assert = 1;
- } else {
- /* other nodes are in-progress */
- if (map_changed &&
- !test_bit(bit, mle->node_map)) {
- /* TODO: need to copy the node_map into
- * the vote_map, zero everything out
- * and start over */
- dlmprintk("need to handle this case. "
- "winning node %u just died\n",
- bit);
- restart = 1;
- }
-
- if (bit > dlm->group_index) {
- // dlmprintk("next in-progress node "
- // "(%u) is higher than me (%u)\n",
- // bit, dlm->group_index);
-
- /* Nodes not in-progress should be
- * locking out this lockid until I
- * assert. In-progress nodes should
- * match me up with their lowest
- * maybe_map bit. Need to assert myself
- * as the master */
- // dlmprintk("I am the lowest node! "
- // "asserting myself as master\n");
- assert = 1;
- } else {
- /* Need to sit around and wait for
- * assert. My lowest maybe_map bit
- * should be the one to assert. Just
- * fall through and sleep. Should be
- * woken by the handler. */
- // dlmprintk("sleeping while waiting "
- // "for %u to assert himself as "
- // "master\n", bit);
- }
- }
- } else {
- if (map_changed) {
- /* TODO: need to handle this */
- dlmprintk0("eek! nodemap changed while "
- "collecting responses\n");
- restart = 1;
- }
- // dlmprintk0("still waiting for all nodes to "
- // "respond...\n");
- }
-
- if (restart && assert)
- assert = 0;
-
- /* make sure to tell any other nodes that i am mastering this */
- if (assert)
- mle->master = dlm->group_index;
-
- spin_unlock(&mle->spinlock);
-
- if (assert) {
- ret = dlm_do_assert_master(mle);
- // dlmprintk("assert returned %d!\n", ret);
- if (ret == 0) {
- spin_lock(&res->spinlock);
- res->owner = dlm->group_index;
- spin_unlock(&res->spinlock);
- // dlmprintk("wooo! i am the owner. phew!\n");
- break;
- } else
- restart = 1;
- }
- if (restart) {
- dlmprintk0("something happened such that the master "
- "process needs to be restarted!\n");
- /* TODO: clear it all out and start over */
- }
-
- atomic_set(&mle->woken, 0);
- ret = util_wait_atomic_eq(&mle->wq, &mle->woken, 1, 5000);
- if (ret == -EINTR) {
- dlmprintk0("interrupted during lock mastery!\n");
- break;
- }
- if (ret == -ETIMEDOUT) {
- dlmprintk("timed out during lock mastery: "
- "vote_map=%0lx, response_map=%0lx\n",
- mle->vote_map[0], mle->response_map[0]);
- continue;
- }
- }
- dlm_put_mle(mle);
-
-wake_waiters:
- spin_lock(&res->spinlock);
- res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
- spin_unlock(&res->spinlock);
- wake_up(&res->wq);
-
-leave:
- return res;
-}
-
-
-
-
-/*
- * locks that can be taken here:
- * dlm->spinlock
- * res->spinlock
- * mle->spinlock
- * dlm_master_list
- *
- * if possible, TRIM THIS DOWN!!!
- */
-int dlm_master_request_handler(net_msg *msg, u32 len, void *data)
-{
- u8 response = DLM_MASTER_RESP_MAYBE;
- dlm_ctxt *dlm = data;
- dlm_lock_resource *res;
- dlm_master_request *request = (dlm_master_request *) msg->buf;
- dlm_master_list_entry *mle = NULL, *tmpmle = NULL;
- struct qstr lockname;
- int found;
- struct list_head *iter;
-
- dlm_master_request_to_host(request);
- lockname.name = request->name;
- lockname.len = request->namelen;
- lockname.hash = full_name_hash(lockname.name, lockname.len);
-
-way_up_top:
- spin_lock(&dlm->spinlock);
- res = __dlm_lookup_lock(dlm, &lockname);
- if (res) {
- spin_unlock(&dlm->spinlock);
-
- /* take care of the easy cases up front */
- spin_lock(&res->spinlock);
- if (res->owner == dlm->group_index) {
- spin_unlock(&res->spinlock);
- // dlmprintk0("this node is the master\n");
- response = DLM_MASTER_RESP_YES;
- if (mle)
- kfree(mle);
- goto send_response;
- } else if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
- spin_unlock(&res->spinlock);
- // dlmprintk("node %u is the master\n", res->owner);
- response = DLM_MASTER_RESP_NO;
- if (mle)
- kfree(mle);
- goto send_response;
- }
-
- /* ok, there is no owner. either this node is
- * being blocked, or it is actively trying to
- * master this lock. */
- if (!(res->state & DLM_LOCK_RES_IN_PROGRESS)) {
- dlmprintk0("bug! lock with no owner should be "
- "in-progress!\n");
- BUG();
- }
-
- // dlmprintk0("lockres is in progress...\n");
- found = 0;
- spin_lock(&dlm_master_lock);
- list_for_each(iter, &dlm_master_list) {
- tmpmle = list_entry(iter, dlm_master_list_entry, list);
- if (!dlm_mle_equal(dlm, tmpmle, &lockname))
- continue;
-
- dlm_get_mle(tmpmle);
- spin_lock(&tmpmle->spinlock);
- if (tmpmle->type == DLM_MLE_BLOCK) {
- // dlmprintk0("this node is waiting for "
- // "lockres to be mastered\n");
- response = DLM_MASTER_RESP_NO;
- } else {
- // dlmprintk0("this node is attempting to "
- // "master lockres\n");
- response = DLM_MASTER_RESP_MAYBE;
- }
- set_bit(request->node_idx, tmpmle->maybe_map);
- spin_unlock(&tmpmle->spinlock);
-
- spin_unlock(&dlm_master_lock);
- spin_unlock(&res->spinlock);
-
- dlm_put_mle(tmpmle);
- if (mle)
- kfree(mle);
- goto send_response;
- }
- spin_unlock(&dlm_master_lock);
- spin_unlock(&res->spinlock);
- dlmprintk0("bug bug bug!!! no mle found for this lock!\n");
- BUG();
- }
-
- /*
- * lockres doesn't exist on this node
- * if there is an MLE_BLOCK, return NO
- * if there is an MLE_MASTER, return MAYBE
- * otherwise, add an MLE_BLOCK, return NO
- */
- found = 0;
- spin_lock(&dlm_master_lock);
- list_for_each(iter, &dlm_master_list) {
- tmpmle = list_entry(iter, dlm_master_list_entry, list);
- if (!dlm_mle_equal(dlm, tmpmle, &lockname))
- continue;
- dlm_get_mle(tmpmle);
- found = 1;
- break;
- }
-
- if (!found) {
- /* this lockid has never been seen on this node yet */
- // dlmprintk0("no mle found\n");
- if (!mle) {
- spin_unlock(&dlm_master_lock);
- spin_unlock(&dlm->spinlock);
-
- mle = kmalloc(sizeof(dlm_master_list_entry) +
- lockname.len, GFP_KERNEL);
- if (!mle) {
- // bad bad bad... this sucks.
- response = DLM_MASTER_RESP_ERROR;
- goto send_response;
- }
- if (dlm_init_mle(mle, DLM_MLE_BLOCK, dlm, NULL,
- &lockname, 0)) {
- dlmprintk0("eeek!\n");
- response = DLM_MASTER_RESP_ERROR;
- dlm_put_mle(mle);
- goto send_response;
- }
- goto way_up_top;
- }
-
- // dlmprintk0("this is second time thru, already allocated, "
- // "add the block.\n");
- set_bit(request->node_idx, mle->maybe_map);
- list_add(&mle->list, &dlm_master_list);
- response = DLM_MASTER_RESP_NO;
- } else {
- // dlmprintk0("mle was found\n");
- spin_lock(&tmpmle->spinlock);
- if (tmpmle->type == DLM_MLE_BLOCK)
- response = DLM_MASTER_RESP_NO;
- else
- response = DLM_MASTER_RESP_MAYBE;
- set_bit(request->node_idx, tmpmle->maybe_map);
- spin_unlock(&tmpmle->spinlock);
- dlm_put_mle(tmpmle);
- }
- spin_unlock(&dlm_master_lock);
- spin_unlock(&dlm->spinlock);
-
-send_response:
- //ret = dlm_do_master_request_resp(dlm, &lockname, response,
- // request->node_idx);
- //dlmprintk("response returned %d\n", ret);
- //dlmprintk("sending response %d to other node\n", response);
- return response;
-}
-
-/* NOTE: when doing node recovery, run the dlm_master_list looking for the
- * dead node in any maybe_map... clear that bit, and if now empty, clear the
- * whole thing */
-
-/*
- * locks that can be taken here:
- * mle->spinlock
- * dlm_master_list
- *
- */
-int dlm_master_request_resp_handler(net_msg *msg, u32 len, void *data)
-{
- dlm_ctxt *dlm = data;
- dlm_master_list_entry *mle = NULL;
- dlm_master_request_resp *resp = (dlm_master_request_resp *) msg->buf;
- int found = 0, wake = 0;
- struct list_head *iter;
- struct qstr lockname;
-
- dlm_master_request_resp_to_host(resp);
- lockname.name = resp->name;
- lockname.len = resp->namelen;
- lockname.hash = full_name_hash(lockname.name, lockname.len);
-
- spin_lock(&dlm_master_lock);
- list_for_each(iter, &dlm_master_list) {
- mle = list_entry(iter, dlm_master_list_entry, list);
- if (!dlm_mle_equal(dlm, mle, &lockname)) {
- mle = NULL;
- continue;
- }
-
- dlm_get_mle(mle);
- if (mle->type == DLM_MLE_BLOCK) {
- dlmprintk0("eek! cannot get a response for a block!\n");
- break;
- }
- found = 1;
- wake = 0;
- spin_lock(&mle->spinlock);
- switch (resp->response) {
- case DLM_MASTER_RESP_YES:
- set_bit(resp->node_idx, mle->response_map);
- // dlmprintk("woot! node %u is the master!\n",
- // resp->node_idx);
- mle->master = resp->node_idx;
- wake = 1;
- break;
- case DLM_MASTER_RESP_NO:
- // dlmprintk("node %u is not the master, not "
- // "in-progress\n", resp->node_idx);
- set_bit(resp->node_idx, mle->response_map);
- if (memcmp(mle->response_map, mle->vote_map,
- sizeof(mle->vote_map))==0)
- wake = 1;
- break;
- case DLM_MASTER_RESP_MAYBE:
- // dlmprintk("node %u is not the master, but IS"
- // " in-progress\n", resp->node_idx);
- set_bit(resp->node_idx, mle->response_map);
- set_bit(resp->node_idx, mle->maybe_map);
- if (memcmp(mle->response_map, mle->vote_map,
- sizeof(mle->vote_map))==0)
- wake = 1;
- break;
- case DLM_MASTER_RESP_ERROR:
- dlmprintk("node %u hit an -ENOMEM! try this "
- "whole thing again\n",
- resp->node_idx);
- mle->error = 1;
- wake = 1;
- break;
- default:
- dlmprintk("bad response! %u\n", resp->response);
- break;
- }
- if (wake) {
- atomic_set(&mle->woken, 1);
- wake_up(&mle->wq);
- }
- spin_unlock(&mle->spinlock);
- break;
- }
- spin_unlock(&dlm_master_lock);
-
- if (found)
- dlm_put_mle(mle);
- else
- dlmprintk0("hrrm... got a master resp but found no matching "
- "request\n");
- return 0;
-}
-
-/*
- * locks that can be taken here:
- * dlm->spinlock
- * res->spinlock
- * mle->spinlock
- * dlm_master_list
- *
- * if possible, TRIM THIS DOWN!!!
- */
-int dlm_assert_master_handler(net_msg *msg, u32 len, void *data)
-{
- dlm_ctxt *dlm = data;
- dlm_master_list_entry *mle = NULL;
- dlm_assert_master *assert = (dlm_assert_master *)msg->buf;
- dlm_lock_resource *res;
- int bit;
- struct list_head *iter;
- struct qstr lockname;
-
- dlm_assert_master_to_host(assert);
- lockname.name = assert->name;
- lockname.len = assert->namelen;
- lockname.hash = full_name_hash(lockname.name, lockname.len);
-
- spin_lock(&dlm->spinlock);
-
- /* find the MLE */
- spin_lock(&dlm_master_lock);
- list_for_each(iter, &dlm_master_list) {
- mle = list_entry(iter, dlm_master_list_entry, list);
- if (dlm_mle_equal(dlm, mle, &lockname)) {
- dlm_get_mle(mle);
- break;
- }
- mle = NULL;
- }
- if (!mle) {
- dlmprintk("EEEEEEK! just got an assert_master from %u, but no "
- "MLE for it!\n",
- assert->node_idx);
- spin_unlock(&dlm_master_lock);
- goto check_lockres;
- }
- bit = find_next_bit (mle->maybe_map, NM_MAX_NODES, 0);
- if (bit >= NM_MAX_NODES) {
- dlmprintk("EEK! no bits set in the maybe_map, but %u is "
- "asserting!\n", assert->node_idx);
- BUG();
- } else if (bit != assert->node_idx) {
- /* TODO: is this ok? */
- dlmprintk("EEK! expected %u to be the master, but %u is "
- "asserting!\n", bit, assert->node_idx);
- BUG();
- }
- spin_unlock(&dlm_master_lock);
-
- /* ok everything checks out with the MLE
- * now check to see if there is a lockres */
-check_lockres:
- res = __dlm_lookup_lock(dlm, &lockname);
- if (res) {
- spin_lock(&res->spinlock);
- if (!mle) {
- if (res->owner != assert->node_idx) {
- dlmprintk("EEEEeeEEeeEEEK! assert_master from "
- "%u, but current owner is %u!\n",
- assert->node_idx, res->owner);
- BUG();
- }
- } else {
- if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
- dlmprintk("EEEEEEEEEEEEEEEEEK!!! got "
- "assert_master from node %u, but %u "
- "is the owner!\n", assert->node_idx,
- res->owner);
- dlmprintk0("goodnite!\n");
- BUG();
- }
- if (!(res->state & DLM_LOCK_RES_IN_PROGRESS)) {
- dlmprintk("bug! got assert from %u, but lock "
- "with no owner should be "
- "in-progress!\n", assert->node_idx);
- BUG();
- }
- }
- spin_unlock(&res->spinlock);
- }
- spin_unlock(&dlm->spinlock);
-
- // dlmprintk("woo! got an assert_master from node %u!\n",
- // assert->node_idx);
- if (mle) {
- spin_lock(&mle->spinlock);
- mle->master = assert->node_idx;
- atomic_set(&mle->woken, 1);
- wake_up(&mle->wq);
- spin_unlock(&mle->spinlock);
-
- /* if this is the last put, it will be removed from the list */
- dlm_put_mle(mle);
- }
- return 0;
-}
-
-
-static int dlm_do_master_request(dlm_master_list_entry *mle, int to)
-{
- struct inode *inode = NULL;
- dlm_ctxt *dlm = mle->dlm;
- dlm_master_request request;
- int ret, response=0;
-
- memset(&request, 0, sizeof(request));
- request.node_idx = dlm->group_index;
- if (mle->type == DLM_MLE_BLOCK) {
- request.namelen = mle->u.name.len;
- strncpy(request.name, mle->u.name.name, request.namelen);
- } else {
- request.namelen = mle->u.res->lockname.len;
- strncpy(request.name, mle->u.res->lockname.name,
- request.namelen);
- }
-
- ret = -EINVAL;
- inode = nm_get_group_node_by_index(dlm->group, to);
- if (inode) {
- dlm_master_request_to_net(&request);
- ret = net_send_message(DLM_MASTER_REQUEST_MSG, dlm->key,
- &request, sizeof(request),
- inode, &response);
- iput(inode);
- if (ret >= 0) {
- spin_lock(&mle->spinlock);
- switch (response) {
- case DLM_MASTER_RESP_YES:
- set_bit(to, mle->response_map);
- // dlmprintk("woot! node %u is the "
- // "master!\n", to);
- mle->master = to;
- break;
- case DLM_MASTER_RESP_NO:
- // dlmprintk("node %u is not the "
- // "master, not in-progress\n", to);
- set_bit(to, mle->response_map);
- break;
- case DLM_MASTER_RESP_MAYBE:
- // dlmprintk("node %u is not the "
- // "master, but IS in-progress\n", to);
- set_bit(to, mle->response_map);
- set_bit(to, mle->maybe_map);
- break;
- case DLM_MASTER_RESP_ERROR:
- dlmprintk("node %u hit an -ENOMEM! "
- "try everything again\n", to);
- mle->error = 1;
- break;
- default:
- dlmprintk("bad response! %u\n",
- response);
- ret = -EINVAL;
- break;
- }
- spin_unlock(&mle->spinlock);
- } else {
- dlmprintk("net_send_message returned %d!\n", ret);
- }
- } else {
- dlmprintk("nm_get_group_node_by_index failed to find inode "
- "for node %d!\n", to);
- }
- return ret;
-}
-
-static int dlm_do_master_request_resp(dlm_ctxt *dlm, struct qstr *name,
- int response, int to)
-{
- struct inode *inode = NULL;
- dlm_master_request_resp resp;
- int ret;
-
- memset(&resp, 0, sizeof(resp));
- resp.node_idx = dlm->group_index;
- resp.response = response;
- resp.namelen = name->len;
- strncpy(resp.name, name->name, name->len);
-
- inode = nm_get_group_node_by_index(dlm->group, to);
- if (!inode)
- return -EINVAL;
-
- dlm_master_request_resp_to_net(&resp);
- ret = net_send_message(DLM_MASTER_REQUEST_RESP_MSG, dlm->key,
- &resp, sizeof(resp), inode, NULL);
- iput(inode);
- return ret;
-}
-
-/*
- * NOTE: this can be used for debugging
- * can periodically run all locks owned by this node
- * and re-assert across the cluster...
- */
-static int dlm_do_assert_master(dlm_master_list_entry *mle)
-{
- struct inode *inode = NULL;
- dlm_ctxt *dlm = mle->dlm;
- dlm_assert_master assert;
- int to, start = 0, ret = 0, tmpret;
-
- while (1) {
- to = find_next_bit (mle->vote_map, NM_MAX_NODES, start);
- if (to >= NM_MAX_NODES) {
- // dlmprintk0("no more nodes\n");
- break;
- }
- // dlmprintk("sending assert master to %d\n", to);
-
- memset(&assert, 0, sizeof(assert));
- assert.node_idx = dlm->group_index;
- if (mle->type == DLM_MLE_BLOCK) {
- assert.namelen = mle->u.name.len;
- strncpy(assert.name, mle->u.name.name, assert.namelen);
- } else {
- assert.namelen = mle->u.res->lockname.len;
- strncpy(assert.name, mle->u.res->lockname.name,
- assert.namelen);
- }
-
- inode = nm_get_group_node_by_index(dlm->group, to);
- if (!inode) {
- tmpret = -EINVAL;
- dlmprintk("could not get nm info for node %d! "
- "need to retry this whole thing\n", to);
- ret = tmpret;
- break;
- }
-
- dlm_assert_master_to_net(&assert);
- tmpret = net_send_message(DLM_MASTER_REQUEST_MSG, dlm->key,
- &assert, sizeof(assert), inode, NULL);
- iput(inode);
-
- if (tmpret < 0) {
- // TODO
- // dlmprintk("assert_master returned %d!\n", tmpret);
- ret = tmpret;
- break;
- }
- start = to+1;
- }
-
- return ret;
-}
-
-
-
-
-
-
-static void dlm_mle_node_down(struct inode *group, struct inode *node,
- int idx, void *data)
-{
- //int ret;
- //struct inode *node = ptr2;
-
- dlm_master_list_entry *mle;
- dlm_ctxt *dlm;
-
- mle = data;
- if (!mle) {
- dlmprintk0("eek! NULL mle!\n");
- return;
- }
- if (!mle->dlm) {
- dlmprintk0("eek! NULL dlm\n");
- return;
- }
- dlm = mle->dlm;
- if (dlm->group != group)
- return;
-
- spin_lock(&mle->spinlock);
-
- if (!test_bit(idx, mle->node_map))
- dlmprintk("node %u already removed from nodemap!\n", idx);
- else
- clear_bit(idx, mle->node_map);
-
-#if 0
- if (test_bit(idx, mle->recovery_map))
- dlmprintk("node %u already added to recovery map!\n", idx);
- else
- set_bit(idx, mle->recovery_map);
-#endif
- spin_unlock(&mle->spinlock);
-}
-
-static void dlm_mle_node_up(struct inode *group, struct inode *node,
- int idx, void *data)
-{
- //struct inode *node = ptr2;
- dlm_master_list_entry *mle;
- dlm_ctxt *dlm;
-
- mle = data;
- if (!mle) {
- dlmprintk0("eek! NULL mle!\n");
- return;
- }
- if (!mle->dlm) {
- dlmprintk0("eek! NULL dlm\n");
- return;
- }
- dlm = mle->dlm;
- if (dlm->group != group)
- return;
-
- spin_lock(&mle->spinlock);
-
-#if 0
- if (test_bit(idx, mle->recovery_map))
- dlmprintk("BUG!!! node up message on node "
- "in recovery (%u)!!!\n", idx);
- else
-#endif
- {
- if (test_bit(idx, mle->node_map))
- dlmprintk("node %u already in node map!!!\n", idx);
- else
- set_bit(idx, mle->node_map);
- }
-
- spin_unlock(&mle->spinlock);
-}
Copied: trunk/fs/ocfs2/cluster/dlmmaster.c (from rev 1811, trunk/cluster/dlmmaster.c)
Deleted: trunk/fs/ocfs2/cluster/dlmmod.c
===================================================================
--- trunk/cluster/dlmmod.c 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/dlmmod.c 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,802 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlmmod.c
- *
- * standalone DLM module
- *
- * Copyright (C) 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel
- */
-
-#include "warning_hack.h"
-
-#include "dlm_compat.h"
-#include "util.h"
-#include "dlmcommon.h"
-
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/highmem.h>
-#include <linux/utsname.h>
-#include <linux/init.h>
-#include <linux/sysctl.h>
-#include <linux/random.h>
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#include <linux/statfs.h>
-#include <linux/moduleparam.h>
-#endif
-#include <linux/blkdev.h>
-#include <linux/socket.h>
-#include <linux/inet.h>
-#include <linux/spinlock.h>
-
-
-#include "heartbeat.h"
-#include "nodemanager.h"
-#include "tcp.h"
-#include "dlmmod.h"
-
-MODULE_LICENSE ("GPL");
-MODULE_AUTHOR("Oracle Corporation");
-//MODULE_DESCRIPTION("Oracle DLM");
-
-
-/*
- *
- * spinlock lock ordering: if multiple locks are needed, obey this ordering:
- * dlm_domain_lock -> dlm_ctxt -> dlm_lock_resource -> dlm_lock
- *
- */
-
-
-static int __init dlm_driver_entry (void);
-static int dlm_read_params(void);
-static void __exit dlm_driver_exit (void);
-
-
-
-LIST_HEAD(dlm_domains);
-spinlock_t dlm_domain_lock = SPIN_LOCK_UNLOCKED;
-u16 dlm_global_index = NM_MAX_NODES;
-static spinlock_t dlm_cookie_lock = SPIN_LOCK_UNLOCKED;
-static u64 dlm_next_cookie = 1;
-
-
-
-
-/* ----------------------------------------------------------------- */
-
-extern spinlock_t dlm_master_lock;
-extern struct list_head dlm_master_list;
-/* ----------------------------------------------------------------- */
-
-
-
-
-/*
- * dlm_driver_entry()
- *
- * Driver entry point. Called on insmod.
- */
-static int __init dlm_driver_entry (void)
-{
- int status;
-
-
- dlmprintk0("Loaded dlm Driver module\n");
- status = dlm_read_params();
- if (status < 0)
- return -1;
-
- dlm_global_index = nm_this_node(NULL);
- if (dlm_global_index == NM_MAX_NODES)
- return -1;
-
- return 0;
-} /* dlm_driver_entry */
-
-/*
- * dlm_read_params()
- *
- * Read insmod params
- */
-static int dlm_read_params(void)
-{
- int status = 0;
- return status;
-} /* dlm_read_params */
-
-
-/*
- * dlm_driver_exit()
- *
- * Called on rmmod
- */
-static void __exit dlm_driver_exit (void)
-{
- dlmprintk0("Unloaded dlm Driver module\n");
- return;
-} /* dlm_driver_exit */
-
-
-/* fetch next node-local (u8 nodenum + u56 cookie) into u64 */
-static inline void dlm_get_next_cookie(u16 node_num, u64 *cookie)
-{
- /* why did I make node_num 16 bit to begin with? */
- u64 tmpnode = (u8)(node_num & (u16)0x00ff);
-
- /* shift single byte of node num into top 8 bits */
- tmpnode <<= 56;
-
- spin_lock(&dlm_cookie_lock);
- *cookie = (dlm_next_cookie | tmpnode);
- if (++dlm_next_cookie & 0xff00000000000000ull) {
- dlmprintk0("eek! this node's cookie will now wrap!\n");
- dlm_next_cookie = 1;
- }
- spin_unlock(&dlm_cookie_lock);
-}
-
-
-dlm_status dlmlock(dlm_ctxt *dlm, int mode, dlm_lockstatus *lksb, int flags,
- char *name, dlm_astlockfunc_t *ast, void *data,
- dlm_bastlockfunc_t *bast)
-{
- dlm_status status;
- dlm_lock_resource *res;
- dlm_lock *lock = NULL;
- char *buf = NULL;
- int convert = 0, recovery = 0;
- struct qstr q;
-
- if (!lksb)
- return DLM_BADARGS;
-
- status = DLM_BADPARAM;
- if (mode != LKM_EXMODE && mode != LKM_PRMODE && mode != LKM_NLMODE)
- goto error_status;
-
- if (flags & ~LKM_VALID_FLAGS)
- goto error_status;
-
- convert = (flags & LKM_CONVERT);
- recovery = (flags & LKM_RECOVERY);
-
- if (recovery && (!dlm_is_recovery_lock(name, strlen(name)) ||
- convert) ) {
- goto error_status;
- }
- if (convert && (flags & LKM_LOCAL)) {
- dlmprintk0("strange LOCAL convert request!\n");
- goto error_status;
- }
-
- if (convert) {
- /* CONVERT request */
-
- /* if converting, must pass in a valid dlm_lock */
- if (!lksb->lockid || !lksb->lockid->lockres)
- goto error_status;
- lock = lksb->lockid;
-
- /* XXX: for ocfs2 purposes, the ast/bast/astdata/lksb are
- * static after the original lock call. convert requests will
- * ensure that everything is the same, or return DLM_BADARGS.
- * this means that DLM_DENIED_NOASTS will never be returned.
- */
-#warning differs from spec here!
-
- if (lock->lksb != lksb || lock->ast != ast ||
- lock->bast != bast || lock->astdata != data) {
- status = DLM_BADARGS;
- dlmprintk("ERROR new args: lksb=%p, ast=%p, bast=%p, "
- "astdata=%p\n", lksb, ast, bast, data);
- dlmprintk(" orig args: lksb=%p, ast=%p, bast=%p, "
- "astdata=%p\n", lock->lksb, lock->ast,
- lock->bast, lock->astdata);
- goto error_status;
- }
- res = lock->lockres;
- down_read(&dlm->recovery_sem);
-
- if (res->owner == dlm->group_index)
- status = dlmconvert_master(dlm, res, lock, flags, mode);
- else
- status = dlmconvert_remote(dlm, res, lock, flags, mode);
-
- } else {
- /* LOCK request */
- status = DLM_BADARGS;
- if (!name)
- goto error;
-
- status = DLM_IVBUFLEN;
- q.len = strlen(name);
- if (q.len > DLM_LOCKID_NAME_MAX)
- goto error;
-
- /* take care of all allocs before any locking */
- status = DLM_SYSERR;
- buf = kmalloc(q.len+1, GFP_KERNEL); /* lockres name */
- if (!buf)
- goto error;
-
- memcpy(buf, name, q.len);
- buf[q.len] = 0;
- q.name = buf;
- q.hash = full_name_hash(q.name, q.len);
-
- lock = kmalloc(sizeof(dlm_lock), GFP_KERNEL); /* dlm_lock */
- if (!lock)
- goto error;
-
- lksb->lockid = lock;
-
- if (!recovery)
- down_read(&dlm->recovery_sem);
-
- /* find or create the lock resource */
- res = dlm_get_lock_resource(dlm, &q, flags);
- if (!res) {
- status = DLM_IVLOCKID;
- goto up_error;
- }
-
- dlmprintk("type=%d\n", mode);
- dlmprintk("creating lock: lock=%p res=%p\n", lock, res);
-
- memset(lock, 0, sizeof(dlm_lock));
- INIT_LIST_HEAD(&lock->list);
- INIT_LIST_HEAD(&lock->ast_list);
- spin_lock_init(&lock->spinlock);
- lock->lockres = res;
- lock->type = mode;
- lock->convert_type = LKM_IVMODE;
- lock->highest_blocked = LKM_IVMODE;
- lock->node = dlm->group_index;
- lock->ast = ast;
- lock->bast = bast;
- lock->astdata = data;
- lock->lksb = lksb;
-
- dlm_get_next_cookie(lock->node, &lock->cookie);
-
- if (res->owner == dlm->group_index)
- status = dlmlock_master(dlm, res, lock, flags);
- else
- status = dlmlock_remote(dlm, res, lock, flags);
-
- if (status != DLM_NORMAL)
- goto up_error;
- }
-
- if (!recovery)
- up_read(&dlm->recovery_sem);
- return status;
-
-up_error:
- if (!recovery)
- up_read(&dlm->recovery_sem);
-error:
- if (buf)
- kfree(buf);
- if (lock && !convert) {
- kfree(lock);
- lksb->lockid = NULL;
- }
-
-error_status:
- // this is kind of unnecessary
- lksb->status = status;
- return status;
-}
-EXPORT_SYMBOL(dlmlock);
-
-
-/* there seems to be no point in doing this async
- * since (even for the remote case) there is really
- * no work to queue up... so just do it and fire the
- * unlockast by hand when done... */
-dlm_status dlmunlock(dlm_ctxt *dlm, dlm_lockstatus *lksb, int flags,
- dlm_astunlockfunc_t *unlockast, void *data)
-{
- dlm_status status;
- dlm_lock_resource *res;
- dlm_lock *lock = NULL;
- int call_ast = 0;
-
- dlmprintk0("\n");
-
- if (!lksb)
- return DLM_BADARGS;
-
- if (flags & ~(LKM_CANCEL | LKM_VALBLK | LKM_INVVALBLK))
- return DLM_BADPARAM;
-
- if ((flags & (LKM_VALBLK | LKM_CANCEL)) == (LKM_VALBLK | LKM_CANCEL)) {
- dlmprintk0("VALBLK given with CANCEL: ignoring VALBLK\n");
- flags &= ~LKM_VALBLK;
- }
-
- if (!lksb->lockid || !lksb->lockid->lockres)
- return DLM_BADPARAM;
-
- lock = lksb->lockid;
- res = lock->lockres;
-
- DLM_ASSERT(lock);
- DLM_ASSERT(res);
- dlmprintk("lock=%p res=%p\n", lock, res);
-
- if (res->owner == dlm->group_index) {
- status = dlmunlock_master(dlm, res, lock, lksb, flags,
- &call_ast);
- dlmprintk("done calling dlmunlock_master: returned %d, "
- "call_ast is %d\n", status, call_ast);
- } else {
- status = dlmunlock_remote(dlm, res, lock, lksb, flags,
- &call_ast);
- dlmprintk("done calling dlmunlock_remote: returned %d, "
- "call_ast is %d\n", status, call_ast);
- }
-
- if (call_ast) {
- dlmprintk("calling unlockast(%p, %d)\n",
- data, lksb->status);
- (*unlockast)(data, lksb->status);
- }
- dlmprintk("returning status=%d!\n", status);
- return status;
-}
-EXPORT_SYMBOL(dlmunlock);
-
-
-static dlm_ctxt * __dlm_lookup_domain(char *domain)
-{
- dlm_ctxt *tmp = NULL;
- struct list_head *iter;
-
- list_for_each(iter, &dlm_domains) {
- tmp = list_entry (iter, dlm_ctxt, list);
- if (strncmp(tmp->name, domain, NM_MAX_NAME_LEN)==0)
- break;
- tmp = NULL;
- }
-
- return tmp;
-}
-
-dlm_ctxt * dlm_lookup_domain(char *domain)
-{
- dlm_ctxt *tmp = NULL;
- spin_lock(&dlm_domain_lock);
- tmp = __dlm_lookup_domain(domain);
- spin_unlock(&dlm_domain_lock);
- return tmp;
-}
-
-dlm_lock_resource * __dlm_lookup_lock(dlm_ctxt *dlm, struct qstr *lockname)
-{
- struct list_head *iter;
- dlm_lock_resource *tmpres=NULL;
- struct list_head *bucket;
-
- dlmprintk0("\n");
-
- bucket = &(dlm->resources[lockname->hash & DLM_HASH_MASK]);
-
- /* check for pre-existing lock */
- list_for_each(iter, bucket) {
- tmpres = list_entry(iter, dlm_lock_resource, list);
- if (tmpres->lockname.len == lockname->len &&
- strncmp(tmpres->lockname.name, lockname->name,
- lockname->len) == 0)
- break;
- tmpres = NULL;
- }
- return tmpres;
-}
-
-dlm_lock_resource * dlm_lookup_lock(dlm_ctxt *dlm, struct qstr *lockname)
-{
- dlm_lock_resource *res;
- spin_lock(&dlm->spinlock);
- res = __dlm_lookup_lock(dlm, lockname);
- spin_unlock(&dlm->spinlock);
- return res;
-}
-
-
-
-/*
- * dlm_register_domain: one-time setup per "domain"
- */
-dlm_ctxt * dlm_register_domain(char *domain, char *group_name, u32 key)
-{
- dlm_ctxt *tmp = NULL, *dlm = NULL;
- struct inode *group = NULL;
- int tmpret, i;
-
- if (strlen(domain) > NM_MAX_NAME_LEN) {
- dlmprintk0("domain name length too long\n");
- goto leave;
- }
-
- group = nm_get_group_by_name(group_name);
- if (!group) {
- dlmprintk("no nm group %s for domain %s!\n",
- group_name, domain);
- goto leave;
- }
-
- /*
- * TODO: should i do some type of dlm-group-join business here?
- * I need to have new nodes communicate with other dlm nodes to
- * wait until their master lists are empty before allowing me to
- * join. does this belong here? or in hb?
- * seems like stuff that heartbeat shouldn't care about, cuz we
- * would actually be preventing a node that is "UP" from being
- * part of the dlm group.
- */
- dlm = dlm_lookup_domain(domain);
- if (dlm) {
- /* found a pre-existing domain */
- goto leave;
- }
-
- dlm = kmalloc(sizeof(dlm_ctxt), GFP_KERNEL);
- if (dlm == NULL) {
- dlmprintk0("could not allocate dlm_ctxt\n");
- goto leave;
- }
- memset(dlm, 0, sizeof(dlm_ctxt));
- dlm->name = kmalloc(strlen(domain) + 1, GFP_KERNEL);
- if (dlm->name == NULL) {
- kfree(dlm);
- dlm = NULL;
- dlmprintk0("could not allocate dlm domain name\n");
- goto leave;
- }
- dlm->resources = (struct list_head *) __get_free_page(GFP_KERNEL);
- if (!dlm->resources) {
- kfree(dlm->name);
- kfree(dlm);
- dlm = NULL;
- dlmprintk0("could not allocate dlm hash\n");
- goto leave;
- }
- memset(dlm->resources, 0, PAGE_SIZE);
-
- for (i=0; i<DLM_HASH_SIZE; i++)
- INIT_LIST_HEAD(&dlm->resources[i]);
-
- strcpy(dlm->name, domain);
- spin_lock_init(&dlm->spinlock);
- INIT_LIST_HEAD(&dlm->list);
- INIT_LIST_HEAD(&dlm->dirty_list);
- INIT_LIST_HEAD(&dlm->reco.resources);
- INIT_LIST_HEAD(&dlm->reco.received);
- util_thread_info_init(&dlm->thread);
- util_thread_info_init(&dlm->reco.thread);
- init_rwsem(&dlm->recovery_sem);
- dlm->group = group;
- dlm->group_index = nm_this_node(group);
- dlm->key = key;
- dlm->reco.new_master = NM_INVALID_SLOT_NUM;
- dlm->reco.dead_node = NM_INVALID_SLOT_NUM;
- dlm->reco.sending_node = NM_INVALID_SLOT_NUM;
- dlm->reco.next_seq = 0;
-
- spin_lock(&dlm_domain_lock);
- tmp = __dlm_lookup_domain(domain);
- if (tmp) {
- spin_unlock(&dlm_domain_lock);
- /* found a pre-existing domain */
- kfree(dlm->name);
- kfree(dlm);
- dlm = NULL;
- goto leave;
- }
-
- /* add the new domain */
- list_add_tail(&dlm->list, &dlm_domains);
- spin_unlock(&dlm_domain_lock);
-
- tmpret = hb_register_callback(HB_NODE_DOWN_CB, dlm_hb_node_down_cb, dlm,
- DLM_HB_NODE_DOWN_PRI);
- if (tmpret)
- goto error;
- tmpret = hb_register_callback(HB_NODE_UP_CB, dlm_hb_node_up_cb, dlm,
- DLM_HB_NODE_UP_PRI);
- if (tmpret)
- goto error;
-
- /* TODO: need to use hb_fill_node_map to fill a temporary votemap
- * then communicate with each of these nodes that I want to come up
- * FOR THIS DLM. there may be many nodes in this group heartbeating
- * but they may not care about this particular dlm instance. once
- * everyone has come back with a response that i have been added or
- * that they are not a member I can put together the REAL node map
- * for this dlm in dlm->node_map */
- /* TODO: I guess we can fill this here as a superset of possible nodes
- * so that the hb_callbacks above have something to work on in the
- * meantime, then trim out the nodes that are not part of this dlm
- * once we know */
- /* TODO: I may need to register a special net handler on insmod of dlm.o
- * with a key of 0 so that I can respond to requests even if I am not
- * part of a dlm group. this would still leave a gap in time between
- * the start of heartbeating and the insmod dlm.o, unless I change the
- * module loading stuff in clusterbo to include dlm.o (which would work
- * fine) */
-#warning WRONG WRONG WRONG
- tmpret = hb_fill_node_map(group, dlm->node_map, NM_MAX_NODES);
- if (tmpret)
- goto error;
-
- dlmprintk("hb_fill_node_map returned node map:\n");
- BUG_ON(ARRAY_SIZE(dlm->node_map) & 3); /* better be mult of 4 :) */
- for(i = 0; i < ARRAY_SIZE(dlm->node_map); i += 4)
- dlmprintk("%0lx%0lx%0lx%0lx\n",
- dlm->node_map[i], dlm->node_map[i + 1],
- dlm->node_map[i + 2], dlm->node_map[i + 3]);
-
-#if 0
- tmpret = net_register_handler("reco-request",
- DLM_NET_RECOVERY_REQUEST_MSG_TYPE,
- key, sizeof(dlm_reco_request),
- dlm_recovery_request_handler, dlm);
- if (tmpret)
- goto error;
- tmpret = net_register_handler("reco-lock-arr-req",
- DLM_NET_RECOVERY_LOCK_ARR_REQ_MSG_TYPE,
- key, sizeof(dlm_reco_lock_arr_req),
- dlm_recovery_lock_arr_req_handler, dlm);
- if (tmpret)
- goto error;
- tmpret = net_register_handler("reco-response",
- DLM_NET_RECOVERY_RESPONSE_MSG_TYPE,
- key, sizeof(dlm_reco_response),
- dlm_recovery_response_handler, dlm);
- if (tmpret)
- goto error;
-#endif
-
- tmpret = net_register_handler(DLM_MASTER_REQUEST_RESP_MSG, key, 0,
- sizeof(dlm_master_request_resp),
- dlm_master_request_resp_handler,
- dlm);
- if (tmpret)
- goto error;
-
- tmpret = net_register_handler(DLM_MASTER_REQUEST_MSG, key, 0,
- sizeof(dlm_master_request),
- dlm_master_request_handler,
- dlm);
-
- if (tmpret)
- goto error;
-
- tmpret = net_register_handler(DLM_ASSERT_MASTER_MSG, key, 0,
- sizeof(dlm_assert_master),
- dlm_assert_master_handler,
- dlm);
- if (tmpret)
- goto error;
- tmpret = net_register_handler(DLM_CREATE_LOCK_MSG, key, 0,
- sizeof(dlm_create_lock),
- dlm_create_lock_handler,
- dlm);
- if (tmpret)
- goto error;
- tmpret = net_register_handler(DLM_CONVERT_LOCK_MSG, key,
- NET_HND_VAR_LEN,
- DLM_CONVERT_LOCK_MAX_LEN,
- dlm_convert_lock_handler,
- dlm);
- if (tmpret)
- goto error;
-
- tmpret = net_register_handler(DLM_UNLOCK_LOCK_MSG, key,
- NET_HND_VAR_LEN,
- DLM_UNLOCK_LOCK_MAX_LEN,
- dlm_unlock_lock_handler,
- dlm);
- if (tmpret)
- goto error;
-
- tmpret = net_register_handler(DLM_PROXY_AST_MSG, key,
- NET_HND_VAR_LEN,
- DLM_PROXY_AST_MAX_LEN,
- dlm_proxy_ast_handler,
- dlm);
- if (tmpret)
- goto error;
-
- tmpret = dlm_launch_thread(dlm);
- if (tmpret == 0)
- goto leave;
-
-error:
- hb_unregister_callback(HB_NODE_UP_CB, dlm_hb_node_up_cb, dlm);
- hb_unregister_callback(HB_NODE_DOWN_CB, dlm_hb_node_down_cb, dlm);
- spin_lock(&dlm_domain_lock);
- list_del(&dlm->list);
- spin_unlock(&dlm_domain_lock);
- free_page((unsigned long)dlm->resources);
- kfree(dlm->name);
- kfree(dlm);
- dlm = NULL;
-
-leave:
- if (!dlm && group)
- iput(group);
- return dlm;
-}
-EXPORT_SYMBOL(dlm_register_domain);
-
-void dlm_unregister_domain(dlm_ctxt *dlm)
-{
- // fill me in please
-}
-EXPORT_SYMBOL(dlm_unregister_domain);
-
-void dlm_init_lockres(dlm_lock_resource *res, struct qstr *lockname)
-{
- memset(res, 0, sizeof(dlm_lock_resource));
- res->lockname.name = lockname->name;
- res->lockname.len = lockname->len;
- res->lockname.hash = lockname->hash;
- init_waitqueue_head(&res->wq);
- spin_lock_init(&res->spinlock);
- INIT_LIST_HEAD(&res->list);
- INIT_LIST_HEAD(&res->granted);
- INIT_LIST_HEAD(&res->converting);
- INIT_LIST_HEAD(&res->blocked);
- INIT_LIST_HEAD(&res->dirty);
- INIT_LIST_HEAD(&res->recovering);
-
- res->owner = DLM_LOCK_RES_OWNER_UNKNOWN;
- res->state |= DLM_LOCK_RES_IN_PROGRESS;
-}
-
-
-
-
-/* will exit holding res->spinlock, but may drop in function */
-void dlm_wait_on_lockres(dlm_lock_resource *res)
-{
- DECLARE_WAITQUEUE(wait, current);
-
- add_wait_queue(&res->wq, &wait);
-repeat:
- set_current_state(TASK_UNINTERRUPTIBLE);
-
- spin_lock(&res->spinlock);
- if (res->state & DLM_LOCK_RES_IN_PROGRESS) {
- spin_unlock(&res->spinlock);
- schedule();
- goto repeat;
- }
- remove_wait_queue(&res->wq, &wait);
- current->state = TASK_RUNNING;
-}
-
-/* will exit holding res->spinlock, but may drop in function */
-void __dlm_wait_on_lockres(dlm_lock_resource *res)
-{
- DECLARE_WAITQUEUE(wait, current);
-
- add_wait_queue(&res->wq, &wait);
-repeat:
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (res->state & DLM_LOCK_RES_IN_PROGRESS) {
- spin_unlock(&res->spinlock);
- schedule();
- spin_lock(&res->spinlock);
- goto repeat;
- }
- remove_wait_queue(&res->wq, &wait);
- current->state = TASK_RUNNING;
-}
-
-
-
-
-
-void dlm_dump_everything(void)
-{
- dlm_ctxt *dlm;
- struct list_head *iter;
-
- dlmprintk("dumping ALL dlm state for node %s\n",
- system_utsname.nodename);
- spin_lock(&dlm_domain_lock);
- list_for_each(iter, &dlm_domains) {
- dlm = list_entry (iter, dlm_ctxt, list);
- dlm_dump_dlm(dlm);
- }
- spin_unlock(&dlm_domain_lock);
-}
-
-void dlm_dump_dlm(dlm_ctxt *dlm)
-{
- dlm_lock_resource *res;
- dlm_lock *lock;
- struct list_head *iter, *iter2;
- struct list_head *bucket;
- int i;
-
- dlmprintk("dlm_ctxt: %s, group=%u, key=%u\n",
- dlm->name, dlm->group_index, dlm->key);
- dlmprintk0("some bug here... should not have to check for this...\n");
- if (!dlm || !dlm->name) {
- dlmprintk("wtf... dlm=%p\n", dlm);
- return;
- }
-
- spin_lock(&dlm->spinlock);
- for (i=0; i<DLM_HASH_SIZE; i++) {
- bucket = &(dlm->resources[i]);
- list_for_each(iter, bucket) {
- res = list_entry(iter, dlm_lock_resource, list);
- dlmprintk("lockres: %*s, owner=%u, state=%u\n",
- res->lockname.len, res->lockname.name,
- res->owner, res->state);
- spin_lock(&res->spinlock);
- dlmprintk0(" granted queue: \n");
- list_for_each(iter2, &res->granted) {
- lock = list_entry(iter2, dlm_lock, list);
- spin_lock(&lock->spinlock);
- dlmprintk(" type=%d, conv=%d, node=%u, "
- "cookie=%llu\n", lock->type,
- lock->convert_type, lock->node,
- lock->cookie);
- spin_unlock(&lock->spinlock);
- }
- dlmprintk0(" converting queue: \n");
- list_for_each(iter2, &res->converting) {
- lock = list_entry(iter2, dlm_lock, list);
- spin_lock(&lock->spinlock);
- dlmprintk(" type=%d, conv=%d, node=%u, "
- "cookie=%llu\n", lock->type,
- lock->convert_type, lock->node,
- lock->cookie);
- spin_unlock(&lock->spinlock);
- }
- dlmprintk0(" blocked queue: \n");
- list_for_each(iter2, &res->blocked) {
- lock = list_entry(iter2, dlm_lock, list);
- spin_lock(&lock->spinlock);
- dlmprintk(" type=%d, conv=%d, node=%u, "
- "cookie=%llu\n", lock->type,
- lock->convert_type, lock->node,
- lock->cookie);
- spin_unlock(&lock->spinlock);
- }
- spin_unlock(&res->spinlock);
- }
- }
- spin_unlock(&dlm->spinlock);
-}
-
-module_init (dlm_driver_entry);
-module_exit (dlm_driver_exit);
Copied: trunk/fs/ocfs2/cluster/dlmmod.c (from rev 1811, trunk/cluster/dlmmod.c)
Deleted: trunk/fs/ocfs2/cluster/dlmmod.h
===================================================================
--- trunk/cluster/dlmmod.h 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/dlmmod.h 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,677 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlmmod.h
- *
- * Function prototypes
- *
- * Copyright (C) 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel
- */
-
-#ifndef CLUSTER_DLMMOD_H
-#define CLUSTER_DLMMOD_H
-
-
-
-#if 0
-#define dlmprintk(x, arg...)
-#define dlmprintk0(x)
-#else
-#define dlmprintk(x, arg...) printk("(dlm:%d)(%s:%d) " x, current->pid, __FUNCTION__, __LINE__, ##arg)
-#define dlmprintk0(x) printk("(dlm:%d)(%s:%d) " x, current->pid, __FUNCTION__, __LINE__)
-#endif
-
-
-
-
-#define DLM_HB_NODE_DOWN_PRI (0xf000000)
-#define DLM_HB_NODE_UP_PRI (0x8000000)
-
-#define DLM_LVB_LEN 64
-#define DLM_LOCKID_NAME_MAX 32
-
-#define DLM_DOMAIN_NAME_MAX_LEN 255
-#define DLM_LOCK_RES_OWNER_UNKNOWN NM_MAX_NODES
-#define DLM_THREAD_SHUFFLE_INTERVAL 5 // flush everything every 5 passes
-#define DLM_THREAD_MS 200 // flush at least every 200 ms
-
-#define DLM_HASH_BITS 7
-#define DLM_HASH_SIZE (1 << DLM_HASH_BITS)
-#define DLM_HASH_MASK (DLM_HASH_SIZE - 1)
-
-typedef enum _dlm_ast_type {
- DLM_AST = 0,
- DLM_BAST,
- DLM_ASTUNLOCK
-} dlm_ast_type;
-
-
-#define LKM_IVMODE (-1) /* invalid mode */
-#define LKM_NLMODE 0 /* null lock */
-#define LKM_CRMODE 1 /* concurrent read */ /* unsupported */
-#define LKM_CWMODE 2 /* concurrent write */ /* unsupported */
-#define LKM_PRMODE 3 /* protected read */
-#define LKM_PWMODE 4 /* protected write */ /* unsupported */
-#define LKM_EXMODE 5 /* exclusive */
-#define LKM_MAXMODE 5
-#define LKM_MODEMASK 0xff
-
-
-/* reserved: flags used by the "real" dlm, only a few are supported by this dlm */
-#define LKM_ORPHAN 0x00000010 /* this lock is orphanable */ /* unsupported */
-#define LKM_PARENTABLE 0x00000020 /* this lock was orphaned */ /* unsupported */
-#define LKM_BLOCK 0x00000040 /* blocking lock request */ /* unsupported */
-#define LKM_LOCAL 0x00000080 /* local lock request */
-#define LKM_VALBLK 0x00000100 /* lock value block request */
-#define LKM_NOQUEUE 0x00000200 /* non blocking request */
-#define LKM_CONVERT 0x00000400 /* conversion request */
-#define LKM_NODLCKWT 0x00000800 /* this lock wont deadlock */ /* unsupported */
-#define LKM_UNLOCK 0x00001000 /* deallocate this lock */
-#define LKM_CANCEL 0x00002000 /* cancel conversion request */
-#define LKM_DEQALL 0x00004000 /* remove all locks held by proc */ /* unsupported */
-#define LKM_INVVALBLK 0x00008000 /* invalidate lock value block */
-#define LKM_SYNCSTS 0x00010000 /* return synchronous status if poss */ /* unsupported */
-#define LKM_TIMEOUT 0x00020000 /* lock request contains timeout */ /* unsupported */
-#define LKM_SNGLDLCK 0x00040000 /* request can self-deadlock */ /* unsupported */
-#define LKM_FINDLOCAL 0x00080000 /* find local lock request */ /* unsupported */
-#define LKM_PROC_OWNED 0x00100000 /* owned by process, not group */ /* unsupported */
-#define LKM_XID 0x00200000 /* use transaction id for deadlock */ /* unsupported */
-#define LKM_XID_CONFLICT 0x00400000 /* do not allow lock inheritance */ /* unsupported */
-#define LKM_FORCE 0x00800000 /* force unlock flag */
-#define LKM_REVVALBLK 0x01000000 /* temporary solution: re-validate lock value block */ /* unsupported */
-/* unused */
-#define LKM_UNUSED1 0x00000001 /* unused */
-#define LKM_UNUSED2 0x00000002 /* unused */
-#define LKM_UNUSED3 0x00000004 /* unused */
-#define LKM_UNUSED4 0x00000008 /* unused */
-#define LKM_UNUSED5 0x02000000 /* unused */
-#define LKM_UNUSED6 0x04000000 /* unused */
-#define LKM_UNUSED7 0x08000000 /* unused */
-#define LKM_UNUSED8 0x10000000 /* unused */
-/* ocfs2 extensions: internal only; should never be used by caller */
-#define LKM_PUT_LVB 0x20000000 /* extension: lvb is being passed, should be applied to lockres */
-#define LKM_GET_LVB 0x40000000 /* extension: lvb should be copied from lockres when lock granted */
-#define LKM_RECOVERY 0x80000000 /* extension: flag for recovery lock, used to avoid recovery rwsem */
-
-#define LKM_VALID_FLAGS (LKM_VALBLK | LKM_CONVERT | LKM_UNLOCK | \
- LKM_CANCEL | LKM_INVVALBLK | LKM_FORCE | \
- LKM_RECOVERY | LKM_LOCAL | LKM_NOQUEUE)
-
-#define DLM_RECOVERY_LOCK_NAME "$RECOVERY"
-#define DLM_RECOVERY_LOCK_NAME_LEN 9
-
-static inline int dlm_is_recovery_lock(char *lock_name, int name_len)
-{
- if (name_len == DLM_RECOVERY_LOCK_NAME_LEN &&
- strncmp(lock_name, DLM_RECOVERY_LOCK_NAME, DLM_RECOVERY_LOCK_NAME_LEN)==0)
- return 1;
- return 0;
-}
-
-typedef enum _dlm_status {
- DLM_NORMAL = 0, /* 0: request in progress */
- DLM_GRANTED, /* 1: request granted */
- DLM_DENIED, /* 2: request denied */
- DLM_DENIED_NOLOCKS, /* 3: request denied, out of system resources */
- DLM_WORKING, /* 4: async request in progress */
- DLM_BLOCKED, /* 5: lock request blocked */
- DLM_BLOCKED_ORPHAN, /* 6: lock request blocked by a orphan lock*/
- DLM_DENIED_GRACE_PERIOD, /* 7: topological change in progress */
- DLM_SYSERR, /* 8: system error */
- DLM_NOSUPPORT, /* 9: unsupported */
- DLM_CANCELGRANT, /* 10: can't cancel convert: already granted */
- DLM_IVLOCKID, /* 11: bad lockid */
- DLM_SYNC, /* 12: synchronous request granted */
- DLM_BADTYPE, /* 13: bad resource type */
- DLM_BADRESOURCE, /* 14: bad resource handle */
- DLM_MAXHANDLES, /* 15: no more resource handles */
- DLM_NOCLINFO, /* 16: can't contact cluster manager */
- DLM_NOLOCKMGR, /* 17: can't contact lock manager */
- DLM_NOPURGED, /* 18: can't contact purge daemon */
- DLM_BADARGS, /* 19: bad api args */
- DLM_VOID, /* 20: no status */
- DLM_NOTQUEUED, /* 21: NOQUEUE was specified and request failed */
- DLM_IVBUFLEN, /* 22: invalid resource name length */
- DLM_CVTUNGRANT, /* 23: attempted to convert ungranted lock */
- DLM_BADPARAM, /* 24: invalid lock mode specified */
- DLM_VALNOTVALID, /* 25: value block has been invalidated */
- DLM_REJECTED, /* 26: request rejected, unrecognized client */
- DLM_ABORT, /* 27: blocked lock request cancelled */
- DLM_CANCEL, /* 28: conversion request cancelled */
- DLM_IVRESHANDLE, /* 29: invalid resource handle */
- DLM_DEADLOCK, /* 30: deadlock recovery refused this request */
- DLM_DENIED_NOASTS, /* 31: failed to allocate AST */
- DLM_FORWARD, /* 32: request must wait for primary's response */
- DLM_TIMEOUT, /* 33: timeout value for lock has expired */
- DLM_IVGROUPID, /* 34: invalid group specification */
- DLM_VERS_CONFLICT, /* 35: version conflicts prevent request handling */
- DLM_BAD_DEVICE_PATH, /* 36: Locks device does not exist or path wrong */
- DLM_NO_DEVICE_PERMISSION, /* 37: Client has insufficient pers for device */
- DLM_NO_CONTROL_DEVICE, /* 38: Cannot set options on opened device */
- DLM_MAXSTATS, /* 39: upper limit for return code validation */
-
- DLM_RECOVERING /* 40: our lame addition to allow caller to fail a lock
- request if it is being recovered */
-} dlm_status;
-
-
-
-typedef struct _dlm_recovery_ctxt
-{
- struct list_head resources;
- struct list_head received; // list of dlm_reco_lock_infos received from other nodes during recovery
- u16 new_master;
- u16 dead_node;
- u16 sending_node;
- u32 next_seq;
- util_thread_info thread;
- unsigned long node_map[BITS_TO_LONGS(NM_MAX_NODES)];
-} dlm_recovery_ctxt;
-
-
-struct _dlm_ctxt
-{
- struct list_head list;
- struct list_head *resources;
- struct list_head dirty_list;
- spinlock_t spinlock;
- struct rw_semaphore recovery_sem;
- char *name;
- util_thread_info thread;
- struct inode *group;
- u32 key;
- u16 group_index;
- unsigned long node_map[BITS_TO_LONGS(NM_MAX_NODES)];
- unsigned long recovery_map[BITS_TO_LONGS(NM_MAX_NODES)];
- dlm_recovery_ctxt reco;
-};
-
-#define DLM_LOCK_RES_UNINITED 0x00000001
-#define DLM_LOCK_RES_RECOVERING 0x00000002
-#define DLM_LOCK_RES_READY 0x00000004
-#define DLM_LOCK_RES_DIRTY 0x00000008
-#define DLM_LOCK_RES_IN_PROGRESS 0x00000010
-
-typedef struct _dlm_lock_resource
-{
- struct list_head list;
-
- /* please keep these next 3 in this order
- * some funcs want to iterate over all lists */
- struct list_head granted;
- struct list_head converting;
- struct list_head blocked;
-
- struct list_head dirty;
- struct list_head recovering; // dlm_recovery_ctxt.resources list
- spinlock_t spinlock;
- wait_queue_head_t wq;
- u16 owner; // node which owns the lock resource, or unknown
- u16 state;
- struct qstr lockname;
- char lvb[DLM_LVB_LEN];
-} dlm_lock_resource;
-
-typedef void (dlm_astlockfunc_t)(void *);
-typedef void (dlm_bastlockfunc_t)(void *, int);
-typedef void (dlm_astunlockfunc_t)(void *, dlm_status);
-
-typedef struct _dlm_lockstatus dlm_lockstatus;
-
-typedef struct _dlm_lock
-{
- struct list_head list;
- struct list_head ast_list;
- dlm_lock_resource *lockres;
- spinlock_t spinlock;
-
- s8 type;
- s8 convert_type;
- s8 highest_blocked;
- s8 reserved1;
- u16 node;
- u16 reserved2;
-
- dlm_astlockfunc_t *ast; // ast and bast must be callable while holding a spinlock!
- dlm_bastlockfunc_t *bast;
- void *astdata;
- u64 cookie;
- dlm_lockstatus *lksb;
-} dlm_lock;
-
-
-#define DLM_LKSB_KERNEL_ALLOCATED 0x01 // allocated on master node on behalf of remote node
-#define DLM_LKSB_PUT_LVB 0x02
-#define DLM_LKSB_GET_LVB 0x04
-#define DLM_LKSB_UNUSED2 0x08
-#define DLM_LKSB_UNUSED3 0x10
-#define DLM_LKSB_UNUSED4 0x20
-#define DLM_LKSB_UNUSED5 0x40
-#define DLM_LKSB_UNUSED6 0x80
-
-struct _dlm_lockstatus {
- dlm_status status; // can we just change this to a u8 or u16?
- u32 flags;
- dlm_lock *lockid;
- char lvb[DLM_LVB_LEN];
-};
-
-enum {
- DLM_MLE_BLOCK,
- DLM_MLE_MASTER
-};
-
-typedef struct _dlm_lock_name
-{
- u8 len;
- u8 name[0]; // [DLM_LOCKID_NAME_MAX]
-} dlm_lock_name;
-
-/* good god this needs to be trimmed down */
-typedef struct _dlm_master_list_entry
-{
- struct list_head list;
- dlm_ctxt *dlm;
- spinlock_t spinlock;
- wait_queue_head_t wq;
- atomic_t woken;
- atomic_t refcnt;
- unsigned long maybe_map[BITS_TO_LONGS(NM_MAX_NODES)];
- unsigned long vote_map[BITS_TO_LONGS(NM_MAX_NODES)];
- unsigned long response_map[BITS_TO_LONGS(NM_MAX_NODES)];
- unsigned long node_map[BITS_TO_LONGS(NM_MAX_NODES)];
- u16 master;
- u8 error;
- u8 type; // BLOCK or MASTER
- union {
- dlm_lock_resource *res;
- dlm_lock_name name;
- } u;
-} dlm_master_list_entry;
-
-
-
-#define DLM_MASTER_REQUEST_MSG 500
-#define DLM_MASTER_REQUEST_RESP_MSG 501
-#define DLM_ASSERT_MASTER_MSG 502
-#define DLM_CREATE_LOCK_MSG 503
-#define DLM_CONVERT_LOCK_MSG 504
-#define DLM_PROXY_AST_MSG 505
-#define DLM_UNLOCK_LOCK_MSG 506
-
-#define DLM_RECO_NODE_DATA_MSG 507
-
-
-typedef struct _dlm_reco_node_data
-{
- int state;
- u16 node_num;
- struct list_head list;
- struct list_head granted;
- struct list_head converting;
- struct list_head blocked;
-} dlm_reco_node_data;
-
-enum {
- DLM_RECO_NODE_DATA_DEAD = -1,
- DLM_RECO_NODE_DATA_INIT = 0,
- DLM_RECO_NODE_DATA_REQUESTING,
- DLM_RECO_NODE_DATA_REQUESTED,
- DLM_RECO_NODE_DATA_RECEIVING,
- DLM_RECO_NODE_DATA_DONE,
- DLM_RECO_NODE_DATA_FINALIZE_SENT,
-};
-
-
-enum {
- DLM_MASTER_RESP_NO,
- DLM_MASTER_RESP_YES,
- DLM_MASTER_RESP_MAYBE,
- DLM_MASTER_RESP_ERROR
-};
-
-
-typedef struct _dlm_master_request
-{
- u16 node_idx;
- u8 namelen;
- u8 pad1;
- u8 name[NM_MAX_NAME_LEN];
-} dlm_master_request;
-
-typedef struct _dlm_master_request_resp
-{
- u16 node_idx;
- u8 response;
- u8 namelen;
- u8 name[NM_MAX_NAME_LEN];
-} dlm_master_request_resp;
-
-typedef struct _dlm_assert_master
-{
- u16 node_idx;
- u8 namelen;
- u8 pad1;
- u8 name[NM_MAX_NAME_LEN];
-} dlm_assert_master;
-
-typedef struct _dlm_create_lock
-{
- u64 cookie;
- u32 flags;
- u16 node_idx;
- s8 requested_type;
- u8 namelen;
- u8 name[NM_MAX_NAME_LEN];
-} dlm_create_lock;
-
-typedef struct _dlm_convert_lock
-{
- u64 cookie;
- u32 flags;
- u16 node_idx;
- s8 requested_type;
- u8 namelen;
- u8 name[NM_MAX_NAME_LEN];
- s8 lvb[0];
-} dlm_convert_lock;
-#define DLM_CONVERT_LOCK_MAX_LEN (sizeof(dlm_convert_lock) + DLM_LVB_LEN)
-
-typedef struct _dlm_unlock_lock
-{
- u64 cookie;
- u32 flags;
- u16 node_idx;
- u8 namelen;
- u8 pad1;
- u8 name[NM_MAX_NAME_LEN];
- s8 lvb[0];
-} dlm_unlock_lock;
-#define DLM_UNLOCK_LOCK_MAX_LEN (sizeof(dlm_unlock_lock) + DLM_LVB_LEN)
-
-typedef struct _dlm_proxy_ast
-{
- u64 cookie;
- u32 flags;
- u16 node_idx;
- u16 pad1;
- u8 type;
- u8 blocked_type;
- u8 namelen;
- u8 pad2;
- u8 name[NM_MAX_NAME_LEN];
- s8 lvb[0];
-} dlm_proxy_ast;
-#define DLM_PROXY_AST_MAX_LEN (sizeof(dlm_proxy_ast) + DLM_LVB_LEN)
-
-
-static inline void dlm_master_request_to_net(dlm_master_request *m)
-{
- m->node_idx = htons(m->node_idx);
-}
-static inline void dlm_master_request_to_host(dlm_master_request *m)
-{
- m->node_idx = ntohs(m->node_idx);
-}
-
-static inline void dlm_master_request_resp_to_net(dlm_master_request_resp *m)
-{
- m->node_idx = htons(m->node_idx);
-}
-static inline void dlm_master_request_resp_to_host(dlm_master_request_resp *m)
-{
- m->node_idx = ntohs(m->node_idx);
-}
-
-static inline void dlm_assert_master_to_net(dlm_assert_master *m)
-{
- m->node_idx = htons(m->node_idx);
-}
-static inline void dlm_assert_master_to_host(dlm_assert_master *m)
-{
- m->node_idx = ntohs(m->node_idx);
-}
-
-static inline void dlm_create_lock_to_net(dlm_create_lock *c)
-{
- c->cookie = cpu_to_be64(c->cookie);
- c->flags = htonl(c->flags);
- c->node_idx = htons(c->node_idx);
-}
-static inline void dlm_create_lock_to_host(dlm_create_lock *c)
-{
- c->cookie = be64_to_cpu(c->cookie);
- c->flags = ntohl(c->flags);
- c->node_idx = ntohs(c->node_idx);
-}
-
-static inline void dlm_convert_lock_to_net(dlm_convert_lock *c)
-{
- c->cookie = cpu_to_be64(c->cookie);
- c->flags = htonl(c->flags);
- c->node_idx = htons(c->node_idx);
-}
-static inline void dlm_convert_lock_to_host(dlm_convert_lock *c)
-{
- c->cookie = be64_to_cpu(c->cookie);
- c->flags = ntohl(c->flags);
- c->node_idx = ntohs(c->node_idx);
-}
-
-static inline void dlm_unlock_lock_to_net(dlm_unlock_lock *u)
-{
- u->cookie = cpu_to_be64(u->cookie);
- u->flags = htonl(u->flags);
- u->node_idx = htons(u->node_idx);
-}
-static inline void dlm_unlock_lock_to_host(dlm_unlock_lock *u)
-{
- u->cookie = be64_to_cpu(u->cookie);
- u->flags = ntohl(u->flags);
- u->node_idx = ntohs(u->node_idx);
-}
-
-static inline void dlm_proxy_ast_to_net(dlm_proxy_ast *a)
-{
- a->cookie = cpu_to_be64(a->cookie);
- a->flags = htonl(a->flags);
- a->node_idx = htons(a->node_idx);
-}
-static inline void dlm_proxy_ast_to_host(dlm_proxy_ast *a)
-{
- a->cookie = be64_to_cpu(a->cookie);
- a->flags = ntohl(a->flags);
- a->node_idx = ntohs(a->node_idx);
-}
-
-
-int dlm_create_lock_handler(net_msg *msg, u32 len, void *data);
-int dlm_convert_lock_handler(net_msg *msg, u32 len, void *data);
-int dlm_proxy_ast_handler(net_msg *msg, u32 len, void *data);
-
-int dlm_unlock_lock_handler(net_msg *msg, u32 len, void *data);
-
-
-
-
-
-void dlm_shuffle_lists(dlm_ctxt *dlm, dlm_lock_resource *res);
-void dlm_thread_run_lock_resources(dlm_ctxt *dlm);
-int dlm_launch_thread(dlm_ctxt *dlm);
-void dlm_complete_thread(dlm_ctxt *dlm);
-
-dlm_status dlmlock(dlm_ctxt *dlm, int mode, dlm_lockstatus *lksb, int flags, char *name,
- dlm_astlockfunc_t *ast, void *data, dlm_bastlockfunc_t *bast);
-
-dlm_status dlmlock_master(dlm_ctxt *dlm, dlm_lock_resource *res,
- dlm_lock *lock, int flags);
-dlm_status dlmlock_remote(dlm_ctxt *dlm, dlm_lock_resource *res,
- dlm_lock *lock, int flags);
-
-dlm_status dlmconvert_master(dlm_ctxt *dlm, dlm_lock_resource *res,
- dlm_lock *lock, int flags, int type);
-dlm_status dlmconvert_remote(dlm_ctxt *dlm, dlm_lock_resource *res,
- dlm_lock *lock, int flags, int type);
-
-dlm_status dlmunlock(dlm_ctxt *dlm, dlm_lockstatus *lksb, int flags,
- dlm_astunlockfunc_t *unlockast, void *data);
-dlm_status dlmunlock_common(dlm_ctxt *dlm, dlm_lock_resource *res,
- dlm_lock *lock, dlm_lockstatus *lksb,
- int flags, int *call_ast, int master_node);
-static inline dlm_status dlmunlock_master(dlm_ctxt *dlm, dlm_lock_resource *res,
- dlm_lock *lock, dlm_lockstatus *lksb,
- int flags, int *call_ast)
-{
- return dlmunlock_common(dlm, res, lock, lksb, flags, call_ast, 1);
-}
-
-static inline dlm_status dlmunlock_remote(dlm_ctxt *dlm, dlm_lock_resource *res,
- dlm_lock *lock, dlm_lockstatus *lksb,
- int flags, int *call_ast)
-{
- return dlmunlock_common(dlm, res, lock, lksb, flags, call_ast, 0);
-}
-
-
-dlm_ctxt * dlm_register_domain(char *domain, char *group_name, u32 key);
-void dlm_unregister_domain(dlm_ctxt *dlm);
-dlm_lock_resource * dlm_get_lock_resource(dlm_ctxt *dlm, struct qstr *lockname, int flags);
-int dlm_lock_owner_broadcast(dlm_ctxt *dlm, dlm_lock_resource *res);
-int dlm_refresh_lock_resource(dlm_ctxt *dlm, dlm_lock_resource *res);
-int dlm_do_ast(dlm_ctxt *dlm, dlm_lock_resource *res, dlm_lock *lock);
-int dlm_do_bast(dlm_ctxt *dlm, dlm_lock_resource *res, dlm_lock *lock, int blocked_type);
-u16 dlm_nm_this_node(dlm_ctxt *dlm);
-void dlm_kick_thread(dlm_ctxt *dlm, dlm_lock_resource *res);
-
-int dlm_nm_init(dlm_ctxt *dlm);
-int dlm_heartbeat_init(dlm_ctxt *dlm);
-
-dlm_lock_resource * dlm_lookup_lock(dlm_ctxt *dlm, struct qstr *lockname);
-dlm_ctxt * dlm_lookup_domain(char *domain);
-
-void dlm_hb_node_down_cb(struct inode *group, struct inode *node, int idx, void *data);
-void dlm_hb_node_up_cb(struct inode *group, struct inode *node, int idx, void *data);
-int dlm_hb_node_dead(dlm_ctxt *dlm, int node);
-int dlm_hb_node_up(dlm_ctxt *dlm, int node);
-int __dlm_hb_node_dead(dlm_ctxt *dlm, int node);
-int __dlm_hb_node_up(dlm_ctxt *dlm, int node);
-
-int dlm_lock_owner_broadcast(dlm_ctxt *dlm, dlm_lock_resource *res);
-int dlm_master_request_handler(net_msg *msg, u32 len, void *data);
-int dlm_master_request_resp_handler(net_msg *msg, u32 len, void *data);
-int dlm_assert_master_handler(net_msg *msg, u32 len, void *data);
-dlm_lock_resource * __dlm_lookup_lock(dlm_ctxt *dlm, struct qstr *lockname);
-void dlm_init_lockres(dlm_lock_resource *res, struct qstr *lockname);
-void dlm_wait_on_lockres(dlm_lock_resource *res);
-void dlm_dump_everything(void);
-void dlm_dump_dlm(dlm_ctxt *dlm);
-
-int dlm_lock_owner_broadcast(dlm_ctxt *dlm, dlm_lock_resource *res);
-int dlm_lock_owner_broadcast(dlm_ctxt *dlm, dlm_lock_resource *res);
-
-void dlm_wait_on_lockres(dlm_lock_resource *res);
-void __dlm_wait_on_lockres(dlm_lock_resource *res);
-
-
-
-
-static inline const char * dlm_lock_mode_name(int mode)
-{
- switch (mode) {
- case LKM_EXMODE:
- return "EX";
- case LKM_PRMODE:
- return "PR";
- case LKM_NLMODE:
- return "NL";
- }
- return "UNKNOWN";
-}
-
-
-static inline int dlm_lock_compatible(int existing, int request)
-{
- /* NO_LOCK compatible with all */
- if (request == LKM_NLMODE ||
- existing == LKM_NLMODE)
- return 1;
-
- /* EX incompatible with all non-NO_LOCK */
- if (request == LKM_EXMODE)
- return 0;
-
- /* request must be PR, which is compatible with PR */
- if (existing == LKM_PRMODE)
- return 1;
-
- return 0;
-}
-
-static inline int dlm_lock_on_list(struct list_head *head, dlm_lock *lock)
-{
- struct list_head *iter;
- dlm_lock *tmplock;
-
- list_for_each(iter, head) {
- tmplock = list_entry(iter, dlm_lock, list);
- if (tmplock == lock)
- return 1;
- }
- return 0;
-}
-
-static inline int dlm_mle_equal(dlm_ctxt *dlm, dlm_master_list_entry *mle, struct qstr *lockname)
-{
- dlm_lock_resource *res;
-
- if (dlm != mle->dlm)
- return 0;
-
- if (mle->type == DLM_MLE_BLOCK) {
- if (lockname->len != mle->u.name.len ||
- strncmp(lockname->name, mle->u.name.name, lockname->len)!=0)
- return 0;
- } else {
- res = mle->u.res;
- if (res->lockname.hash != lockname->hash ||
- res->lockname.len != lockname->len ||
- strncmp(res->lockname.name, lockname->name, lockname->len)!=0)
- return 0;
- }
- return 1;
-}
-
-static inline dlm_status dlm_err_to_dlm_status(int err)
-{
- dlm_status ret;
- if (err == -ENOMEM)
- ret = DLM_SYSERR;
- else if (err == -ETIMEDOUT || net_link_down(err, NULL))
- ret = DLM_NOLOCKMGR;
- else if (err == -EINVAL)
- ret = DLM_BADPARAM;
- else if (err == -ENAMETOOLONG)
- ret = DLM_IVBUFLEN;
- else
- ret = DLM_BADARGS;
- return ret;
-}
-
-#endif /* CLUSTER_DLMMOD_H */
Copied: trunk/fs/ocfs2/cluster/dlmmod.h (from rev 1811, trunk/cluster/dlmmod.h)
Deleted: trunk/fs/ocfs2/cluster/dlmrecovery.c
===================================================================
--- trunk/cluster/dlmrecovery.c 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/dlmrecovery.c 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,947 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlmrecovery.c
- *
- * recovery stuff
- *
- * Copyright (C) 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel
- */
-
-#include "warning_hack.h"
-
-#include "dlm_compat.h"
-#include "util.h"
-#include "dlmcommon.h"
-
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/highmem.h>
-#include <linux/utsname.h>
-#include <linux/init.h>
-#include <linux/sysctl.h>
-#include <linux/random.h>
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#include <linux/statfs.h>
-#include <linux/moduleparam.h>
-#endif
-#include <linux/blkdev.h>
-#include <linux/socket.h>
-#include <linux/inet.h>
-
-
-#include "heartbeat.h"
-#include "nodemanager.h"
-#include "tcp.h"
-#include "dlmmod.h"
-
-static void dlm_do_local_recovery_cleanup(dlm_ctxt *dlm, u16 dead_node, int locked);
-
-int dlm_recovery_thread(void *data);
-void dlm_complete_recovery_thread(dlm_ctxt *dlm);
-int dlm_launch_recovery_thread(dlm_ctxt *dlm);
-void dlm_kick_recovery_thread(dlm_ctxt *dlm);
-
-u16 dlm_pick_recovery_master(dlm_ctxt *dlm, u16 *new_dead_node);
-static int dlm_remaster_locks_local(dlm_ctxt *dlm);
-int dlm_init_recovery_area(dlm_ctxt *dlm);
-int dlm_request_all_locks(dlm_ctxt *dlm, u16 request_from, u16 dead_node);
-void dlm_destroy_recovery_area(dlm_ctxt *dlm, u16 dead_node);
-
-#define DLM_RECOVERY_THREAD_MS 2000
-
-
-#ifdef LOUSY_RECOVERY
-
-/*
- * RECOVERY THREAD
- */
-
-void dlm_kick_recovery_thread(dlm_ctxt *dlm)
-{
- /* wake the recovery thread
- * this will wake the reco thread in one of three places
- * 1) sleeping with no recovery happening
- * 2) sleeping with recovery mastered elsewhere
- * 3) recovery mastered here, waiting on reco data */
- atomic_set(&dlm->reco.thread.woken, 1);
- wake_up(&dlm->reco.thread.thread_wq);
-}
-
-/* Launch the recovery thread */
-int dlm_launch_recovery_thread(dlm_ctxt *dlm)
-{
- dlmprintk0("starting recovery thread...\n");
- dlm->reco.thread.pid = kernel_thread (dlm_recovery_thread, dlm, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
- if (dlm->reco.thread.pid < 0) {
- dlmprintk("unable to launch recovery thread, error=%d", dlm->reco.thread.pid);
- return -EINVAL;
- }
- dlmprintk0("recovery thread running...\n");
- return 0;
-}
-
-void dlm_complete_recovery_thread(dlm_ctxt *dlm)
-{
- dlmprintk0 ("waiting for recovery thread to exit....");
- send_sig (SIGINT, dlm->reco.thread.task, 0);
- wait_for_completion (&dlm->reco.thread.complete);
- dlmprintk0 ("recovery thread exited\n");
- dlm->reco.thread.task = NULL;
-}
-
- /*
- * this is lame, but here's how recovery works...
- * 1) all recovery threads cluster wide will work on recovering
- * ONE node at a time
- * 2) negotiate who will take over all the locks for the dead node.
- * thats right... ALL the locks.
- * 3) once a new master is chosen, everyone scans all locks
- * and moves aside those mastered by the dead guy
- * 4) each of these locks should be locked until recovery is done
- * 5) the new master collects up all of secondary lock queue info
- * one lock at a time, forcing each node to communicate back
- * before continuing
- * 6) each secondary lock queue responds with the full known lock info
- * 7) once the new master has run all its locks, it sends a ALLDONE!
- * message to everyone
- * 8) upon receiving this message, the secondary queue node unlocks
- * and responds to the ALLDONE
- * 9) once the new master gets responses from everyone, he unlocks
- * everything and recovery for this dead node is done
- *10) go back to 2) while there are still dead nodes
- *
- */
-
-
-
-int dlm_recovery_thread(void *data)
-{
- int status, i;
- int cnt = 0, dlm_num;
- struct list_head *iter, *iter2, *tmpiter;
- dlm_lock_resource *res;
- char name[12];
- dlm_ctxt *dlm = data;
- u16 tmp;
-
-
- dlm_num = nm_get_group_global_index(dlm->group);
- sprintf(name, "dlmreco-%03u", dlm_num);
- util_daemonize (name, strlen(name), 1);
- dlm->reco.thread.task = current;
-
- while (1) {
- spin_lock(&dlm->spinlock);
-
- /* check to see if the new master has died */
- if (dlm->reco.new_master != NM_INVALID_SLOT_NUM &&
- test_bit(dlm->reco.new_master, dlm->recovery_map)) {
- dlmprintk("new master %u died while recovering %u!\n",
- dlm->reco.new_master, dlm->reco.dead_node);
- // unset the new_master, leave dead_node
- dlm->reco.new_master = NM_INVALID_SLOT_NUM;
- }
-
- /* select a target to recover */
- if (dlm->reco.dead_node == NM_INVALID_SLOT_NUM) {
- dlm->reco.dead_node = find_next_bit (dlm->recovery_map, NM_MAX_NODES, 0);
- if (dlm->reco.dead_node >= NM_MAX_NODES)
- dlm->reco.dead_node = NM_INVALID_SLOT_NUM;
- } else if (!test_bit(dlm->reco.dead_node, dlm->recovery_map)) {
- // BUG?
- dlmprintk("dead_node %u no longer in recovery map!\n",
- dlm->reco.dead_node);
- dlm->reco.dead_node = NM_INVALID_SLOT_NUM;
- }
-
- if (dlm->reco.dead_node == NM_INVALID_SLOT_NUM) {
- dlmprintk0("nothing to recover! sleeping now!\n");
- spin_unlock(&dlm->spinlock);
- goto sleep;
- }
- spin_unlock(&dlm->spinlock);
-
- /* take write barrier */
- /* (stops the list reshuffling thread, proxy ast handling) */
- down_write(&dlm->recovery_sem);
-
- /* choose a new master */
- if (dlm->reco.new_master == NM_INVALID_SLOT_NUM) {
- u16 new_dead_node = dlm->reco.dead_node;
- dlm->reco.new_master = dlm_pick_recovery_master(dlm, &new_dead_node);
- if (new_dead_node != dlm->reco.dead_node) {
- // master wants to recover a different node
- dlm->reco.dead_node = new_dead_node;
-
- // do local cleanup if heartbeat has not added the
- // node to the recovery map yet
- spin_lock(&dlm->spinlock);
- if (!test_bit(dlm->reco.dead_node, dlm->recovery_map)) {
- dlm_do_local_recovery_cleanup(dlm, dlm->reco.dead_node, 1);
- set_bit(dlm->reco.dead_node, dlm->recovery_map);
- clear_bit(dlm->reco.dead_node, dlm->node_map);
- }
- spin_unlock(&dlm->spinlock);
- }
- }
-
- dlmprintk("RECOVERY! new_master=%u, this node=%u, dead_node=%u\n",
- dlm->reco.new_master, dlm->group_index, dlm->reco.dead_node);
-
- if (dlm->reco.new_master != dlm->group_index) {
- /* it is safe to start everything back up here
- * because all of the dead node's lock resources
- * have been marked as in-recovery */
- up_write(&dlm->recovery_sem);
-
- // sit around until new_master is dead or done
- // we will get signalled by the waitqueue either way
- dlmprintk("new_master %u is recovering dead_node %u... waiting...\n",
- dlm->reco.new_master, dlm->reco.dead_node);
-sleep:
- atomic_set(&dlm->reco.thread.woken, 0);
- status = util_wait_atomic_eq(&dlm->reco.thread.thread_wq,
- &dlm->reco.thread.woken,
- 1, DLM_RECOVERY_THREAD_MS);
- if (status == 0 || status == -ETIMEDOUT) {
- if (atomic_read(&dlm->reco.thread.woken))
- dlmprintk0("aha!!! recovery thread woken!\n");
- else
- dlmprintk0("timed out waiting, running again\n");
- continue;
- }
- dlmprintk("recovery thread got %d while waiting\n", status);
- break;
- }
-
- /* new_master == dlm->group_index */
- status = dlm_remaster_locks_local(dlm);
- if (status < 0) {
- dlmprintk("error remastering locks for node %u!!!! retrying!\n",
- dlm->reco.dead_node);
- } else {
- // success! see if any other nodes need recovery
- spin_lock(&dlm->spinlock);
- clear_bit(dlm->reco.dead_node, dlm->recovery_map);
- spin_unlock(&dlm->spinlock);
- dlm->reco.dead_node = NM_INVALID_SLOT_NUM;
- dlm->reco.new_master = NM_INVALID_SLOT_NUM;
- dlm->reco.sending_node = NM_INVALID_SLOT_NUM;
- dlm->reco.next_seq = 0;
- }
- up_write(&dlm->recovery_sem);
- // continue and look for another dead node
- }
-
- flush_scheduled_work();
- complete (&dlm->reco.thread.complete);
- dlmprintk0("quitting recovery thread!!!!!!\n");
- return 0;
-}
-
-/* +- if this node is NOT the new master... */
-/* +--- if master's dead_node is not the one we chose, do local cleanup again with proper dead_node */
-/* +--- wait for poll messages from new master: register net message handler, it will do the work */
-/* +--- check for death of new master */
-/* +--- if dead, unregister the handler, unset new_master, keep dead_node and goto "select a target" */
-/* |- on request, send header with number of packets, get response, then start blasting packets */
-/* |- retransmit any missed packets on request */
-/* |- once ALL DONE is received, run all locks again */
-/* +--- unset the RECOVERING flag */
-/* +--- set the new owner as new_master */
-/* +--- remove dead_node from recovery map */
-/* +--- unset new_master and dead_node and start all over */
-
-static spinlock_t dlm_reco_state_lock = SPIN_LOCK_UNLOCKED;
-
-
-static int dlm_remaster_locks_local(dlm_ctxt *dlm)
-{
- int num_nodes = NM_MAX_NODES, next=0, status = 0;
- dlm_reco_node_data *ndata;
- struct list_head *iter;
- int all_nodes_done;
-
-
-/* +- if this node is the new master, init the temp recovery area */
-/* |- poll each live node for lock state */
-/* |- collect the data from each node until node says it's done, or dead */
-/* +--- if node died, throw away temp recovery area, keep new_master and dead_node, goto "select a target" */
-/* |- apply all temp area changes to real lock */
-/* +- send ALL DONE message to each node */
-
- status = dlm_init_recovery_area(dlm);
- if (status < 0)
- return status;
-
- spin_lock(&dlm_reco_state_lock);
- list_for_each(iter, &dlm->reco.node_data) {
- ndata = list_entry (iter, dlm_reco_node_data, list);
- DLM_ASSERT(ndata->state == DLM_RECO_NODE_DATA_INIT);
- ndata->state = DLM_RECO_NODE_DATA_REQUESTING;
-
- status = dlm_request_all_locks(dlm, ndata->node_num, dlm->reco.dead_node);
- if (status < 0) {
- dlm_destroy_recovery_area(dlm, dlm->reco.dead_node);
- return status;
- }
-
- switch (ndata->state) {
- case DLM_RECO_NODE_DATA_INIT:
- case DLM_RECO_NODE_DATA_FINALIZE_SENT:
- case DLM_RECO_NODE_DATA_REQUESTED:
- DLM_ASSERT(0);
- break;
- case DLM_RECO_NODE_DATA_DEAD:
- dlmprintk("eek. node %u died after requesting recovery info for node %u\n",
- ndata->node_num, dlm->reco.dead_node);
- spin_unlock(&dlm_reco_state_lock);
- // start all over
- dlm_destroy_recovery_area(dlm, dlm->reco.dead_node);
- return -EAGAIN;
- case DLM_RECO_NODE_DATA_REQUESTING:
- ndata->state = DLM_RECO_NODE_DATA_REQUESTED;
- dlmprintk("now receiving recovery data from node %u for dead node %u\n",
- ndata->node_num, dlm->reco.dead_node);
- break;
- case DLM_RECO_NODE_DATA_RECEIVING:
- dlmprintk("already receiving recovery data from node %u for dead node %u\n",
- ndata->node_num, dlm->reco.dead_node);
- break;
- case DLM_RECO_NODE_DATA_DONE:
- dlmprintk("already DONE receiving recovery data from node %u for dead node %u\n",
- ndata->node_num, dlm->reco.dead_node);
- break;
- }
- }
- spin_unlock(&dlm_reco_state_lock);
-
- /* nodes should be sending reco data now
- * just need to wait */
-
- while (1) {
- /* wait to be signalled, with periodic timeout
- * to check for node death */
- atomic_set(&dlm->reco.thread.woken, 0);
- ret = util_wait_atomic_eq(&dlm->reco.thread.thread_wq,
- &dlm->reco.thread.woken, 1,
- DLM_RECOVERY_THREAD_MS);
- if (ret == 0 || ret == -ETIMEDOUT) {
- if (atomic_read(&dlm->reco.thread.woken))
- dlmprintk0("waiting on reco data... aha!!! recovery thread woken!\n");
- else
- dlmprintk0("waiting on reco data... timed out waiting\n");
- }
-
- /* either way, recheck all the nodes now to see if we are
- * done, or if anyone died */
- all_nodes_done = 1;
- spin_lock(&dlm_reco_state_lock);
- list_for_each(iter, &dlm->reco.node_data) {
- ndata = list_entry (iter, dlm_reco_node_data, list);
-
- switch (ndata->state) {
- case DLM_RECO_NODE_DATA_INIT:
- case DLM_RECO_NODE_DATA_REQUESTING:
- DLM_ASSERT(0);
- break;
- case DLM_RECO_NODE_DATA_DEAD:
- dlmprintk("eek. node %u died after requesting recovery info for node %u\n",
- ndata->node_num, dlm->reco.dead_node);
- spin_unlock(&dlm_reco_state_lock);
- // start all over
- dlm_destroy_recovery_area(dlm, dlm->reco.dead_node);
- return -EAGAIN;
- case DLM_RECO_NODE_DATA_RECEIVING:
- case DLM_RECO_NODE_DATA_REQUESTED:
- all_nodes_done = 0;
- break;
- case DLM_RECO_NODE_DATA_DONE:
- break;
- case DLM_RECO_NODE_DATA_FINALIZE_SENT:
- break;
- }
- }
- spin_unlock(&dlm_reco_state_lock);
-
- if (all_nodes_done) {
- /* all nodes are now in DLM_RECO_NODE_DATA_DONE state
- * just send a finalize message to everyone and
- * clean up */
- ret = dlm_finalize_recovery(dlm);
- if (ret < 0) {
- dlmprintk("dlm_finalize_recovery returned %d\n", ret);
- }
- dlm_destroy_recovery_area(dlm, dlm->reco.dead_node);
- status = ret;
- break;
- }
- }
-
- return status;
-}
-
-int dlm_init_recovery_area(dlm_ctxt *dlm)
-{
- int num=0, ret;
- dlm_reco_node_data *ndata;
- LIST_HEAD(tmplist);
-
- spin_lock(&dlm->spinlock);
- memcpy(dlm->reco.node_map, dlm->node_map, sizeof(dlm->node_map));
- /* nodes can only be removed (by dying) after dropping
- * this lock, and death will be trapped later, so this should do */
- spin_unlock(&dlm->spinlock);
-
- while (1) {
- num = find_next_bit (dlm->reco.node_map, NM_MAX_NODES, num);
- if (num >= NM_MAX_NODES) {
- break;
- }
- DLM_ASSERT(num != dead_node);
-
- ndata = kmalloc(sizeof(dlm_reco_node_data), GFP_KERNEL);
- if (!ndata) {
- dlm_destroy_recovery_area(dlm, dead_node);
- return -ENOMEM;
- }
- memset(ndata, 0, sizeof(dlm_reco_node_data));
- ndata->node_num = num;
- ndata->state = DLM_RECO_NODE_DATA_INIT;
- LIST_HEAD_INIT(&ndata->granted);
- LIST_HEAD_INIT(&ndata->converting);
- LIST_HEAD_INIT(&ndata->blocked);
- spin_lock(&dlm_reco_state_lock);
- list_add_tail(&ndata->list, &dlm->reco.node_data);
- spin_unlock(&dlm_reco_state_lock);
- num++;
- }
-
- return 0;
-}
-
-void dlm_destroy_recovery_area(dlm_ctxt *dlm, u16 dead_node)
-{
- struct list_head *iter, *iter2;
- dlm_reco_node_data *ndata;
- LIST_HEAD(tmplist);
-
- spin_lock(&dlm_reco_state_lock);
- list_splice_init(&dlm->reco.node_data, &tmplist);
- spin_unlock(&dlm_reco_state_lock);
-
-#warning this probably needs to be smarter
- list_for_each_safe(iter, iter2, &tmplist) {
- ndata = list_entry (iter, dlm_reco_node_data, list);
- kfree(ndata);
- }
-}
-
-int dlm_request_all_locks(dlm_ctxt *dlm, u16 request_from, u16 dead_node)
-{
- dlmprintk("dlm_request_all_locks: dead node is %u, sending request to %u\n",
- dead_node, request_from);
- // send message
- // sleep until all received or error
- return 0;
-}
-
-typedef struct _dlm_reco_request_locks
-{
- u16 dead_node;
-} dlm_reco_request_locks;
-
-typedef struct _dlm_reco_node_data
-{
-} dlm_reco_node_data;
-
-int dlm_request_all_locks_handler(net_msg *msg, u32 len, void *data)
-{
-#if 0
- int status;
- dlm_ctxt *dlm = data;
- dlm_lock_resource *res;
- dlm_lock *lock = NULL;
- dlm_proxy_ast *past = (dlm_proxy_ast *) msg->buf;
- struct qstr lockname = { .name=past->name, .len=past->namelen };
- struct list_head *iter, *head=NULL;
- u64 cookie = past->cookie;
- u32 flags = past->flags;
-
- if ((flags & (LKM_PUT_LVB|LKM_GET_LVB)) ==
- (LKM_PUT_LVB|LKM_GET_LVB)) {
- dlmprintk("both PUT and GET lvb specified\n");
- return DLM_BADARGS;
- }
-
- dlmprintk("lvb: %s\n", flags & LKM_PUT_LVB ? "put lvb" :
- (flags & LKM_GET_LVB ? "get lvb" : "none"));
-
- lockname.hash = full_name_hash(lockname.name, lockname.len);
-
- dlmprintk("type=%d, blocked_type=%d\n", past->type, past->blocked_type);
-
- if (past->type != DLM_AST &&
- past->type != DLM_BAST) {
- dlmprintk("Eeeek unknown ast type! %d, cookie=%llu, name=%*s\n",
- past->type, cookie, lockname.len, lockname.name);
- return 0;
- }
-
- res = dlm_lookup_lock(dlm, &lockname);
- if (!res) {
- dlmprintk("eek! got %sast for unknown lockres! cookie=%llu, name=%*s, namelen=%d\n",
- past->type == DLM_AST ? "" : "b", cookie, lockname.len, lockname.name, lockname.len);
- return 0;
- }
-#endif
-
-}
-
-int dlm_reco_node_data_handler(net_msg *msg, u32 len, void *data)
-{
-#if 0
- int status;
- dlm_ctxt *dlm = data;
- dlm_lock_resource *res;
- dlm_lock *lock = NULL;
- dlm_proxy_ast *past = (dlm_proxy_ast *) msg->buf;
- struct qstr lockname = { .name=past->name, .len=past->namelen };
- struct list_head *iter, *head=NULL;
- u64 cookie = past->cookie;
- u32 flags = past->flags;
-
- if ((flags & (LKM_PUT_LVB|LKM_GET_LVB)) ==
- (LKM_PUT_LVB|LKM_GET_LVB)) {
- dlmprintk("both PUT and GET lvb specified\n");
- return DLM_BADARGS;
- }
-
- dlmprintk("lvb: %s\n", flags & LKM_PUT_LVB ? "put lvb" :
- (flags & LKM_GET_LVB ? "get lvb" : "none"));
-
- lockname.hash = full_name_hash(lockname.name, lockname.len);
-
- dlmprintk("type=%d, blocked_type=%d\n", past->type, past->blocked_type);
-
- if (past->type != DLM_AST &&
- past->type != DLM_BAST) {
- dlmprintk("Eeeek unknown ast type! %d, cookie=%llu, name=%*s\n",
- past->type, cookie, lockname.len, lockname.name);
- return 0;
- }
-
- res = dlm_lookup_lock(dlm, &lockname);
- if (!res) {
- dlmprintk("eek! got %sast for unknown lockres! cookie=%llu, name=%*s, namelen=%d\n",
- past->type == DLM_AST ? "" : "b", cookie, lockname.len, lockname.name, lockname.len);
- return 0;
- }
-#endif
-}
-
-
-
-int dlm_recovery_request_handler(net_msg *msg, u32 len, void *data);
-int dlm_recovery_response_handler(net_msg *msg, u32 len, void *data);
-int dlm_recovery_lock_arr_req_handler(net_msg *msg, u32 len, void *data);
-
-typedef struct _dlm_reco_lock_info
-{
- u16 node;
- u16 unused1;
- u64 cookie;
- s8 type;
- s8 convert_type;
- u8 list;
- u8 lockname_len;
- u8 lockname[DLM_LOCKID_NAME_MAX];
-} dlm_reco_lock_info;
-
-enum {
- DLM_RECO_MASTER_REQUEST,
- DLM_RECO_XMIT_LOCKS_REQUEST,
- DLM_RECO_XMIT_LOCK_HDR_REQUEST,
- DLM_RECO_XMIT_LOCK_ARR_REQUEST,
- DLM_RECO_XMIT_COMPLETE_REQUEST,
- DLM_RECO_ALL_DONE_REQUEST
-};
-
-enum {
- DLM_RECO_NO_RESPONSE,
- DLM_RECO_YES_RESPONSE
-};
-
-#define DLM_LOCKS_PER_PACKET 40
-
-typedef struct _dlm_reco_lock_arr_req
-{
- u8 request_type;
- u8 num_locks;
- u16 dead_node;
- u32 seqnum;
- dlm_reco_lock_info lock[DLM_LOCKS_PER_PACKET];
-} dlm_reco_lock_arr_req;
-
-typedef struct _dlm_reco_request
-{
- u8 request_type;
- u8 unused1;
- u16 dead_node;
- u32 num;
-} dlm_reco_request;
-
-typedef struct _dlm_reco_response
-{
- u8 response_type;
- u8 unused1[7];
-} dlm_reco_response;
-
-static inline int dlm_reco_lock_info_valid(dlm_reco_lock_info *info)
-{
- if (info->type != LKM_NLMODE &&
- info->type != LKM_PRMODE &&
- info->type != LKM_EXMODE)
- return 0;
- if (info->convert_type != LKM_NLMODE &&
- info->convert_type != LKM_PRMODE &&
- info->convert_type != LKM_EXMODE)
- return 0;
- if (info->list > 2)
- return 0;
- return 1;
-}
-
-static inline int dlm_check_reco_lock_arr_msg(net_msg *msg, dlm_ctxt *dlm, int *out_of_order);
-
-static inline int dlm_check_reco_lock_arr_msg(net_msg *msg, dlm_ctxt *dlm, int *out_of_order)
-{
- int ret = -EINVAL;
- dlm_reco_lock_arr_req *req = (dlm_reco_lock_arr_req *)msg->buf;
-
- /* check a bunch of ugly conditions */
- *out_of_order = 0;
- if (req->num_locks > DLM_LOCKS_PER_PACKET) {
- dlmprintk("num_locks too large! %u\n", req->num_locks);
- } else if (req->seqnum != dlm->reco.next_seq) {
- dlmprintk("expected seq %lu from node %u, got %lu\n",
- dlm->reco.next_seq, msg->src_node,
- req->seqnum);
- *out_of_order = 1;
- } else if (dlm->reco.dead_node != req->dead_node) {
- dlmprintk("bad lock array: dead node=%u, sent=%u\n",
- dlm->reco.dead_node != req->dead_node);
- } else if (dlm->reco.new_master != dlm->group_index) {
- dlmprintk0("this node is not the recovery master!\n");
- } else if (dlm->reco.sending_node != msg->src_node ||
- dlm->group_index == msg->dest_node) {
- dlmprintk0("eek. sending_node=%u, actual=%u, dest=%u, me=%u\n",
- dlm->reco.sending_node, msg->src_node,
- msg->dest_node, dlm->group_index);
- } else
- ret = 0;
- return ret;
-}
-
-
-int dlm_recovery_lock_arr_req_handler(net_msg *msg, u32 len, void *data)
-{
- dlm_ctxt *dlm = data;
- dlm_reco_lock_arr_req *req = (dlm_reco_lock_arr_req *)msg->buf;
- dlm_lock_resource *res = NULL;
- dlm_reco_lock_info *info;
- dlm_lock **newlocks = NULL;
- dlm_lock *lock = NULL;
- int ret, i, out_of_order = 0;
-
- ret = 0;
- if (req->num_locks == 0)
- goto send_response;
-
- /* check to see if it's worth kmallocing */
- spin_lock(&dlm->spinlock);
- ret = dlm_check_reco_lock_arr_msg(msg, dlm, &out_of_order);
- spin_unlock(&dlm->spinlock);
- if (ret < 0)
- goto send_response;
-
- newlocks = kmalloc(req->num_locks * sizeof(dlm_lock *), GFP_KERNEL);
- if (!newlocks) {
- dlmprintk0("failed to alloc temp lock array!\n");
- ret = -ENOMEM;
- goto send_response;
- }
- memset(newlocks, 0, req->num_locks * sizeof(dlm_lock *));
- for (i=0; i<req->num_locks; i++) {
- info = &(req->lock[i]);
- if (!dlm_reco_lock_info_valid(info)) {
- ret = -EINVAL;
- goto send_response;
- }
- lock = newlocks[i] = kmem_cache_alloc(dlm_lock_cache, GFP_KERNEL);
- if (!newlocks[i]) {
- ret = -ENOMEM;
- goto send_response;
- }
- memset(lock, 0, sizeof(dlm_lock));
- LIST_HEAD_INIT(&lock->list);
- LIST_HEAD_INIT(&lock->ast_list);
- spin_lock_init(&lock->spinlock);
- lock->type = info->type;
- lock->convert_type = info->convert_type;
- lock->node = dlm->group_index;
- //atomic_set(&lock->ast_lock, 0);
- //atomic_set(&lock->bast_lock, 0);
- lock->ast = NULL;
- lock->bast = NULL;
- lock->astdata = (void *)info->list; // cheating here...
- lock->cookie = info->cookie;
- }
-
- spin_lock(&dlm->spinlock);
- /* ok now that everything is allocated and the lock has
- * been taken again, recheck all those stupid conditions */
- ret = dlm_check_reco_lock_arr_msg(msg, dlm, &out_of_order);
- if (ret < 0) {
- spin_unlock(&dlm->spinlock);
- goto send_response;
- }
- for (i=0; i<req->num_locks; i++) {
- info = &(req->lock[i]);
- lock = newlocks[i];
- list_add_tail(&lock->list, &dlm->reco.received);
- }
- spin_unlock(&dlm->spinlock);
-
-send_response:
- if (newlocks) {
- if (ret < 0) {
- for (i=0; i<req->num_locks; i++)
- if (newlocks[i])
- kmem_cache_free(dlm_reco_lock_info_cache, newlocks[i]);
- }
- kfree(newlocks);
- }
-
- return ret;
-}
-int dlm_recovery_request_handler(net_msg *msg, u32 len, void *data)
-{
- dlm_ctxt *dlm = data;
-}
-int dlm_recovery_response_handler(net_msg *msg, u32 len, void *data)
-{
- dlm_ctxt *dlm = data;
-}
-
-
-
-
-
-static int dlm_send_reco_request(dlm_ctxt *dlm, dlm_reco_request *buf, u16 to, struct inode *node)
-{
- int ret;
- net_msg *msg = net_package_message(DLM_NET_RECOVERY_REQUEST_MSG_TYPE,
- dlm->key, buf, sizeof(*buf),
- dlm->group_index, to);
- if (!msg)
- return -ENOMEM;
- ret = net_send_udp_msg (node, msg, sizeof(*buf));
- kfree(msg);
- return ret;
-}
-
-static int dlm_recover_domain(dlm_ctxt *dlm)
-{
-
-
- return 0;
-}
-
-
-#endif /* LOUSY_RECOVERY */
-
-#warning may need to change kfree to put_lock and refcounting here
-static void dlm_do_local_recovery_cleanup(dlm_ctxt *dlm, u16 dead_node, int locked)
-{
- struct list_head *iter, *iter2, *tmpiter;
- dlm_lock_resource *res;
- dlm_lock *lock;
- int i;
- struct list_head *bucket;
-
- if (!locked)
- spin_lock(&dlm->spinlock);
-
- for (i=0; i<DLM_HASH_SIZE; i++) {
- bucket = &(dlm->resources[i]);
- list_for_each(iter, bucket) {
- res = list_entry (iter, dlm_lock_resource, list);
- spin_lock(&res->spinlock);
- if (res->owner == dead_node) {
- res->state |= DLM_LOCK_RES_RECOVERING;
- list_del(&res->recovering);
- list_add_tail(&res->recovering, &dlm->reco.resources);
- } else if (res->owner == dlm->group_index) {
- list_for_each_safe(iter2, tmpiter, &res->granted) {
- lock = list_entry (iter2, dlm_lock, list);
- if (lock->node == dead_node) {
- list_del(&lock->list);
- kfree(lock);
- }
- }
- list_for_each_safe(iter2, tmpiter, &res->converting) {
- lock = list_entry (iter2, dlm_lock, list);
- if (lock->node == dead_node) {
- list_del(&lock->list);
- kfree(lock);
- }
- }
- list_for_each_safe(iter2, tmpiter, &res->blocked) {
- lock = list_entry (iter2, dlm_lock, list);
- if (lock->node == dead_node) {
- list_del(&lock->list);
- kfree(lock);
- }
- }
- }
- spin_unlock(&res->spinlock);
- }
- }
-
- if (!locked)
- spin_unlock(&dlm->spinlock);
-}
-
-
-void dlm_hb_node_down_cb(struct inode *group, struct inode *node, int idx, void *data)
-{
- //int ret;
- //struct inode *group = ptr1;
- //struct inode *node = ptr2;
- dlm_ctxt *dlm = data;
-
- spin_lock(&dlm->spinlock);
-
- if (!test_bit(idx, dlm->node_map))
- dlmprintk("node %u already removed from nodemap!\n", idx);
- else {
- dlmprintk("node %u being removed from nodemap!\n", idx);
- clear_bit(idx, dlm->node_map);
- }
-
- if (test_bit(idx, dlm->recovery_map))
- dlmprintk("node %u already added to recovery map!\n", idx);
- else {
- set_bit(idx, dlm->recovery_map);
- dlm_do_local_recovery_cleanup(dlm, idx, 1);
- }
- spin_unlock(&dlm->spinlock);
-}
-
-void dlm_hb_node_up_cb(struct inode *group, struct inode *node, int idx, void *data)
-{
- //struct inode *group = ptr1;
- //struct inode *node = ptr2;
- dlm_ctxt *dlm = data;
-
- spin_lock(&dlm->spinlock);
-
- if (test_bit(idx, dlm->recovery_map)) {
- dlmprintk("BUG!!! node up message on node in recovery (%u)!!!\n", idx);
- } else {
- if (test_bit(idx, dlm->node_map))
- dlmprintk("node %u already in node map!!!\n", idx);
- else {
- dlmprintk("node %u being added to node map!!!\n", idx);
- set_bit(idx, dlm->node_map);
- }
- }
-
- spin_unlock(&dlm->spinlock);
-}
-
-int __dlm_hb_node_dead(dlm_ctxt *dlm, int node)
-{
- if (test_bit(node, dlm->recovery_map))
- return 1;
- return 0;
-}
-
-int __dlm_hb_node_up(dlm_ctxt *dlm, int node)
-{
- if (test_bit(node, dlm->node_map))
- return 1;
- return 0;
-}
-
-int dlm_hb_node_dead(dlm_ctxt *dlm, int node)
-{
- int ret;
- spin_lock(&dlm->spinlock);
- ret = __dlm_hb_node_dead(dlm, node);
- spin_unlock(&dlm->spinlock);
- return ret;
-}
-
-int dlm_hb_node_up(dlm_ctxt *dlm, int node)
-{
- int ret;
- spin_lock(&dlm->spinlock);
- ret = __dlm_hb_node_up(dlm, node);
- spin_unlock(&dlm->spinlock);
- return ret;
-}
-
-u16 dlm_pick_recovery_master(dlm_ctxt *dlm, u16 *new_dead_node)
-{
- u16 master = 0;
-#if 0
- dlm_status ret;
- dlm_lockstatus lksb;
-
- ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY,
- DLM_RECOVERY_LOCK_NAME, dlm_reco_ast, dlm, dlm_reco_bast);
-
- if (ret == DLM_NORMAL) {
- // I am master
- // send message to all nodes saying that I am beginning a recovery session for node XX,
- // then call dlmunlock???
-
- } else if (ret == DLM_NOTQUEUED) {
- // another node is master
- // wait on reco.new_master != NM_INVALID_SLOT_NUM
- }
-
- // at this point, every node in this domain should have reco.new_master and .dead_node set, even
- // if they have not discovered the dead node on their own
- //
- //
- // atomic_set(&dlm->reco.thread.woken, 0);
- // 232 status = util_wait_atomic_eq(&dlm->reco.thread.thread_wq,
- // 233 &dlm->reco.thread.woken,
- // 234 1, DLM_RECOVERY_THREAD_MS);
- //
-#endif
- return master;
-}
Copied: trunk/fs/ocfs2/cluster/dlmrecovery.c (from rev 1811, trunk/cluster/dlmrecovery.c)
Deleted: trunk/fs/ocfs2/cluster/dlmthread.c
===================================================================
--- trunk/cluster/dlmthread.c 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/dlmthread.c 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,326 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlmthread.c
- *
- * standalone DLM module
- *
- * Copyright (C) 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel
- */
-
-#include "warning_hack.h"
-
-#include "dlm_compat.h"
-#include "util.h"
-#include "dlmcommon.h"
-
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/highmem.h>
-#include <linux/utsname.h>
-#include <linux/init.h>
-#include <linux/sysctl.h>
-#include <linux/random.h>
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#include <linux/statfs.h>
-#include <linux/moduleparam.h>
-#endif
-#include <linux/blkdev.h>
-#include <linux/socket.h>
-#include <linux/inet.h>
-
-
-#include "heartbeat.h"
-#include "nodemanager.h"
-#include "tcp.h"
-#include "dlmmod.h"
-
-extern spinlock_t dlm_domain_lock;
-extern struct list_head dlm_domains;
-extern u16 dlm_global_index;
-
-static int dlm_thread(void *data);
-
-#define dlm_lock_is_remote(dlm, lock) ((lock)->node != (dlm)->group_index)
-
-/*
- * DLM THREAD
- */
-
-void dlm_shuffle_lists(dlm_ctxt *dlm, dlm_lock_resource *res)
-{
- dlm_lock *lock, *target;
- struct list_head *iter, *tmpiter;
- LIST_HEAD(bast_list);
- struct list_head *head;
- s8 hi;
-
- dlmprintk("shuffle res %*s\n", res->lockname.len, res->lockname.name);
-
- spin_lock(&res->spinlock);
-
-#if 0
- {
- int g=0, c=0, b=0;
- list_for_each(iter, &res->granted) {
- g++;
- }
- list_for_each(iter, &res->converting) {
- c++;
- }
- list_for_each(iter, &res->blocked) {
- b++;
- }
- dlmprintk("(%d) granted: %d, converting: %d, blocked: %d\n",
- current->pid, g, c, b);
- }
-#endif
-
-converting:
- if (list_empty(&res->converting))
- goto blocked;
- dlmprintk("res %*s has locks on a convert queue\n", res->lockname.len,
- res->lockname.name);
-
- target = list_entry(res->converting.next, dlm_lock, list);
- if (target->convert_type == LKM_IVMODE) {
- dlmprintk0("eeek!!! converting a lock with no "
- "convert_type!!!!\n");
- BUG();
- }
- head = &res->granted;
- list_for_each(iter, head) {
- lock = list_entry(iter, dlm_lock, list);
- if (lock==target)
- continue;
- if (!dlm_lock_compatible(lock->type, target->convert_type)) {
- if (lock->highest_blocked == LKM_IVMODE)
- list_add(&lock->ast_list, &bast_list);
- if (lock->highest_blocked < target->convert_type)
- lock->highest_blocked = target->convert_type;
- }
- }
- head = &res->converting;
- list_for_each(iter, head) {
- lock = list_entry(iter, dlm_lock, list);
- if (lock==target)
- continue;
- if (!dlm_lock_compatible(lock->type, target->convert_type)) {
- if (lock->highest_blocked == LKM_IVMODE)
- list_add(&lock->ast_list, &bast_list);
- if (lock->highest_blocked < target->convert_type)
- lock->highest_blocked = target->convert_type;
- }
- }
-
- /* we can convert the lock */
- if (list_empty(&bast_list)) {
- spin_lock(&target->spinlock);
- DLM_ASSERT(target->highest_blocked == LKM_IVMODE);
-
- dlmprintk("calling ast for converting lock: %*s, have: %d, "
- "granting: %d, node: %u\n", res->lockname.len,
- res->lockname.name, target->type,
- target->convert_type, target->node);
-
- target->type = target->convert_type;
- target->convert_type = LKM_IVMODE;
- list_del(&target->list);
- list_add_tail(&target->list, &res->granted);
-
- DLM_ASSERT(target->lksb);
- target->lksb->status = DLM_NORMAL;
-
- spin_unlock(&target->spinlock);
-
- if (dlm_do_ast(dlm, res, target) < 0)
- dlmprintk0("eek\n");
- /* go back and check for more */
- goto converting;
- }
-
-blocked:
- if (list_empty(&res->blocked)) {
- goto basts;
- }
- target = list_entry(res->blocked.next, dlm_lock, list);
-
- head = &res->granted;
- list_for_each(iter, head) {
- lock = list_entry(iter, dlm_lock, list);
- if (lock==target)
- continue;
- if (!dlm_lock_compatible(lock->type, target->type)) {
- if (lock->highest_blocked == LKM_IVMODE)
- list_add(&lock->ast_list, &bast_list);
- if (lock->highest_blocked < target->type)
- lock->highest_blocked = target->type;
- }
- }
-
- head = &res->converting;
- list_for_each(iter, head) {
- lock = list_entry(iter, dlm_lock, list);
- if (lock==target)
- continue;
- if (!dlm_lock_compatible(lock->type, target->type)) {
- if (lock->highest_blocked == LKM_IVMODE)
- list_add(&lock->ast_list, &bast_list);
- if (lock->highest_blocked < target->type)
- lock->highest_blocked = target->type;
- }
- }
-
- /* we can grant the blocked lock (only
- * possible if converting list empty) */
- if (list_empty(&bast_list)) {
- spin_lock(&target->spinlock);
- DLM_ASSERT(target->highest_blocked == LKM_IVMODE);
-
- dlmprintk("calling ast for blocked lock: %*s, granting: %d, "
- "node: %u\n", res->lockname.len, res->lockname.name,
- target->type, target->node);
-
- // target->type is already correct
- list_del(&target->list);
- list_add_tail(&target->list, &res->granted);
-
- DLM_ASSERT(target->lksb);
- target->lksb->status = DLM_NORMAL;
-
- spin_unlock(&target->spinlock);
-
- if (dlm_do_ast(dlm, res, target) < 0)
- dlmprintk0("eek\n");
- /* go back and check for more */
- goto converting;
- }
-
-basts:
- list_for_each_safe(iter, tmpiter, &bast_list) {
- lock = list_entry(iter, dlm_lock, ast_list);
- spin_lock(&lock->spinlock);
- DLM_ASSERT(lock->highest_blocked > LKM_IVMODE);
- hi = lock->highest_blocked;
- lock->highest_blocked = LKM_IVMODE;
- list_del(&lock->ast_list);
- spin_unlock(&lock->spinlock);
-
- dlmprintk("delivering a bast for this lockres (blocked = %d\n",
- hi);
- if (dlm_do_bast(dlm, res, lock, hi) < 0)
- dlmprintk0("eeek\n");
- }
- spin_unlock(&res->spinlock);
-}
-
-
-/* must have NO locks when calling this */
-void dlm_kick_thread(dlm_ctxt *dlm, dlm_lock_resource *res)
-{
- if (res) {
- spin_lock(&dlm->spinlock);
- spin_lock(&res->spinlock);
- if (!(res->state & DLM_LOCK_RES_DIRTY)) {
- list_add_tail(&res->dirty, &dlm->dirty_list);
- res->state |= DLM_LOCK_RES_DIRTY;
- }
- spin_unlock(&res->spinlock);
- spin_unlock(&dlm->spinlock);
- }
-
- /* wake the dlm thread */
- atomic_set(&dlm->thread.woken, 1);
- wake_up(&dlm->thread.thread_wq);
-}
-
-/* Launch the NM thread for the mounted volume */
-int dlm_launch_thread(dlm_ctxt *dlm)
-{
- dlmprintk0("starting dlm thread...\n");
- dlm->thread.pid = kernel_thread (dlm_thread, dlm,
- CLONE_FS | CLONE_FILES |
- CLONE_SIGHAND);
- if (dlm->thread.pid < 0) {
- dlmprintk("unable to launch dlm thread, error=%d",
- dlm->thread.pid);
- return -EINVAL;
- }
- dlmprintk("dlm thread running for %s...\n", dlm->name);
- return 0;
-}
-
-void dlm_complete_thread(dlm_ctxt *dlm)
-{
- dlmprintk0 ("waiting for dlm thread to exit....");
- send_sig (SIGINT, dlm->thread.task, 0);
- wait_for_completion (&dlm->thread.complete);
- dlmprintk0 ("dlm thread exited\n");
- dlm->thread.task = NULL;
-}
-
-
-
-
-static int dlm_thread(void *data)
-{
- struct list_head *iter, *tmpiter;
- dlm_lock_resource *res;
- dlm_ctxt *dlm = data;
-
- util_daemonize ("dlm_thread", strlen("dlm_thread"), 1);
- dlm->thread.task = current;
-
- while (1) {
- atomic_set(&dlm->thread.woken, 0);
-
- down_read(&dlm->recovery_sem);
- spin_lock(&dlm->spinlock);
- list_for_each_safe(iter, tmpiter, &dlm->dirty_list) {
- res = list_entry(iter, dlm_lock_resource, dirty);
- /* don't shuffle secondary queues */
- if (res->owner != dlm->group_index)
- continue;
- spin_lock(&res->spinlock);
- list_del(&res->dirty);
- res->state &= ~DLM_LOCK_RES_DIRTY;
- spin_unlock(&res->spinlock);
-
- dlm_shuffle_lists(dlm, res);
- }
- spin_unlock(&dlm->spinlock);
- up_read(&dlm->recovery_sem);
-
- wait_event_interruptible(dlm->thread.thread_wq,
- atomic_read(&dlm->thread.woken));
-
- if (signal_pending(current)) {
- dlmprintk("DLM thread got signal while waiting\n");
- break;
- }
- }
-
- flush_scheduled_work();
- complete (&dlm->thread.complete);
- dlmprintk0("quitting DLM thread!!!!!!\n");
- return 0;
-}
Copied: trunk/fs/ocfs2/cluster/dlmthread.c (from rev 1811, trunk/cluster/dlmthread.c)
Deleted: trunk/fs/ocfs2/cluster/dlmunlock.c
===================================================================
--- trunk/cluster/dlmunlock.c 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/dlmunlock.c 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,404 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlmunlock.c
- *
- * underlying calls for unlocking locks
- *
- * Copyright (C) 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel
- */
-
-#include "warning_hack.h"
-
-#include "dlm_compat.h"
-#include "util.h"
-#include "dlmcommon.h"
-
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/highmem.h>
-#include <linux/utsname.h>
-#include <linux/init.h>
-#include <linux/sysctl.h>
-#include <linux/random.h>
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#include <linux/statfs.h>
-#include <linux/moduleparam.h>
-#endif
-#include <linux/blkdev.h>
-#include <linux/socket.h>
-#include <linux/inet.h>
-#include <linux/spinlock.h>
-
-
-#include "heartbeat.h"
-#include "nodemanager.h"
-#include "tcp.h"
-#include "dlmmod.h"
-
-
-#define DLM_UNLOCK_FREE_LOCK 0x00000001
-#define DLM_UNLOCK_CALL_AST 0x00000002
-#define DLM_UNLOCK_REMOVE_LOCK 0x00000004
-#define DLM_UNLOCK_REGRANT_LOCK 0x00000008
-
-
-static dlm_status dlm_get_cancel_actions(dlm_ctxt *dlm, dlm_lock_resource *res,
- dlm_lock *lock, dlm_lockstatus *lksb,
- int *actions);
-static dlm_status dlm_get_unlock_actions(dlm_ctxt *dlm, dlm_lock_resource *res,
- dlm_lock *lock, dlm_lockstatus *lksb,
- int *actions);
-
-static dlm_status dlm_send_remote_unlock_request(dlm_ctxt *dlm,
- dlm_lock_resource *res,
- dlm_lock *lock,
- dlm_lockstatus *lksb,
- int flags);
-
-
-
-/*
- * locking:
- * caller needs: none
- * taken: res->spinlock and lock->spinlock taken and dropped
- * held on exit: none
- * returns: DLM_NORMAL, DLM_NOLOCKMGR, status from network
- */
-dlm_status dlmunlock_common(dlm_ctxt *dlm, dlm_lock_resource *res,
- dlm_lock *lock, dlm_lockstatus *lksb,
- int flags, int *call_ast, int master_node)
-{
- dlm_status status;
- int actions = 0;
-
- dlmprintk0("\n");
-
- if (master_node)
- DLM_ASSERT(res->owner == dlm->group_index);
- else
- DLM_ASSERT(res->owner != dlm->group_index);
-
- spin_lock(&res->spinlock);
- if (res->state & DLM_LOCK_RES_IN_PROGRESS) {
- if (!master_node) {
- /* TODO: should we return -EAGAIN or something here? */
- dlmprintk0("lockres in progress! eek!\n");
- }
-#warning THIS CAN SLEEP!!!
- __dlm_wait_on_lockres(res);
- res->state |= DLM_LOCK_RES_IN_PROGRESS;
- }
- spin_lock(&lock->spinlock);
-
- if (res->state & DLM_LOCK_RES_RECOVERING) {
- /* !!!!! */
- status = DLM_RECOVERING;
- goto leave;
- }
-
- /* according to spec and opendlm code
- * flags & LKM_CANCEL != 0: must be converting or blocked
- * flags & LKM_CANCEL == 0: must be granted
- * iow, to unlock a converting lock, you must first LKM_CANCEL
- * the convert, then call the unlock again with no LKM_CANCEL
- */
-
- if (flags & LKM_CANCEL) {
- status = dlm_get_cancel_actions(dlm, res, lock, lksb, &actions);
- } else {
- status = dlm_get_unlock_actions(dlm, res, lock, lksb, &actions);
- if (master_node && status == DLM_NORMAL) {
- /* make the final update to the lvb */
- if (lksb->flags & DLM_LKSB_PUT_LVB)
- memcpy(res->lvb, lksb->lvb, DLM_LVB_LEN);
- }
- }
-
- if (status != DLM_NORMAL)
- goto leave;
-
- if (!master_node) {
- /* drop locks and send message */
- spin_unlock(&lock->spinlock);
- spin_unlock(&res->spinlock);
- status = dlm_send_remote_unlock_request(dlm, res, lock,
- lksb, flags);
- spin_lock(&res->spinlock);
- spin_lock(&lock->spinlock);
- }
-
- if (actions & DLM_UNLOCK_REMOVE_LOCK)
- list_del(&lock->list);
- if (actions & DLM_UNLOCK_REGRANT_LOCK)
- list_add_tail(&lock->list, &res->granted);
-
-
-leave:
- res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
- spin_unlock(&lock->spinlock);
- spin_unlock(&res->spinlock);
- wake_up(&res->wq);
-
-
- if (actions & DLM_UNLOCK_FREE_LOCK) {
-#warning this must change to proper refcounting
- /* TODO: refcounting... tho for now this will work because
- * the middle layer is keeping track of everything */
- kfree(lock);
- lksb->lockid = NULL;
- }
- if (actions & DLM_UNLOCK_CALL_AST)
- *call_ast = 1;
-
- /* if cancel or unlock succeeded, lvb work is done */
- if (status == DLM_NORMAL)
- lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB);
-
- return status;
-}
-
-
-
-
-
-/*
- * locking:
- * caller needs: none
- * taken: none
- * held on exit: none
- * returns: DLM_NORMAL, DLM_NOLOCKMGR, status from network
- */
-static dlm_status dlm_send_remote_unlock_request(dlm_ctxt *dlm,
- dlm_lock_resource *res,
- dlm_lock *lock,
- dlm_lockstatus *lksb,
- int flags)
-{
- struct inode *inode = NULL;
- dlm_unlock_lock unlock;
- int tmpret;
- dlm_status ret;
- int status = 0;
- struct iovec iov[2];
- size_t iovlen = 1;
-
-
- dlmprintk0("\n");
-
- memset(&unlock, 0, sizeof(unlock));
- unlock.node_idx = dlm->group_index;
- unlock.flags = flags;
- unlock.cookie = lock->cookie;
- unlock.namelen = res->lockname.len;
- strncpy(unlock.name, res->lockname.name, unlock.namelen);
-
- iov[0].iov_len = sizeof(dlm_unlock_lock);
- iov[0].iov_base = &unlock;
-
- if (flags & LKM_PUT_LVB) {
- /* extra data to send if we are updating lvb */
- iov[1].iov_len = DLM_LVB_LEN;
- iov[1].iov_base = lock->lksb->lvb;
- iovlen++;
- }
-
- ret = DLM_NOLOCKMGR;
- lksb->status = DLM_NOLOCKMGR;
- inode = nm_get_group_node_by_index(dlm->group, res->owner);
- if (inode) {
- dlm_unlock_lock_to_net(&unlock);
- tmpret = net_send_message_iov(DLM_UNLOCK_LOCK_MSG, dlm->key,
- iov, iovlen, inode, &status);
- if (tmpret >= 0) {
- // successfully sent and received
- if (status == DLM_CANCELGRANT)
- ret = DLM_NORMAL;
- else
- ret = status;
- lksb->status = status;
- } else {
- dlmprintk("error occurred in net_send_message: %d\n",
- tmpret);
- ret = dlm_err_to_dlm_status(tmpret);
- lksb->status = ret;
- }
- iput(inode);
- }
-
- return ret;
-}
-
-/*
- * locking:
- * caller needs: none
- * taken: takes and drops res->spinlock
- * held on exit: none
- * returns: DLM_NORMAL, DLM_BADARGS, DLM_IVLOCKID,
- * return value from dlmunlock_master
- */
-int dlm_unlock_lock_handler(net_msg *msg, u32 len, void *data)
-{
- dlm_ctxt *dlm = data;
- dlm_unlock_lock *unlock = (dlm_unlock_lock *)msg->buf;
- dlm_lock_resource *res;
- struct list_head *iter;
- dlm_lock *lock = NULL;
- dlm_status status = DLM_NORMAL;
- int found = 0, i;
- dlm_lockstatus *lksb = NULL;
- int ignore;
- struct qstr lockname;
- u32 flags;
- struct list_head *queue;
-
- dlm_unlock_lock_to_host(unlock);
- lockname.name = unlock->name;
- lockname.len = unlock->namelen;
- flags = unlock->flags;
-
- if (flags & LKM_GET_LVB) {
- dlmprintk0("bad args! GET_LVB specified on unlock!\n");
- return DLM_BADARGS;
- }
-
- if ((flags & (LKM_PUT_LVB|LKM_CANCEL)) == (LKM_PUT_LVB|LKM_CANCEL)) {
- dlmprintk0("bad args! cannot modify lvb on a CANCEL "
- "request!\n");
- return DLM_BADARGS;
- }
-
- dlmprintk("lvb: %s\n", flags & LKM_PUT_LVB ? "put lvb" : "none");
-
- lockname.hash = full_name_hash(lockname.name, lockname.len);
-
- status = DLM_IVLOCKID;
- res = dlm_lookup_lock(dlm, &lockname);
- if (!res)
- goto not_found;
-
- queue=&res->granted;
- found = 0;
- spin_lock(&res->spinlock);
- for (i=0; i<3; i++) {
- list_for_each(iter, queue) {
- lock = list_entry(iter, dlm_lock, list);
- if (lock->cookie == unlock->cookie &&
- lock->node == unlock->node_idx) {
- found = 1;
- break;
- }
- }
- if (found)
- break;
- /* scan granted -> converting -> blocked queues */
- queue++;
- }
- spin_unlock(&res->spinlock);
- if (!found)
- goto not_found;
-
- /* lock was found on queue */
- lksb = lock->lksb;
- /* unlockast only called on originating node */
- if (flags & LKM_PUT_LVB) {
- lksb->flags |= DLM_LKSB_PUT_LVB;
- memcpy(&lksb->lvb[0], &unlock->lvb[0], DLM_LVB_LEN);
- }
-#warning BUG! THIS CAN SLEEP!!!
- /* so either we should respond with EAGAIN in dlmunlock_master
- * and skip the __dlm_wait_on_lockres, or this message type
- * should be dispatched */
- status = dlmunlock_master(dlm, res, lock, lksb, flags, &ignore);
- if (flags & LKM_PUT_LVB)
- lksb->flags &= ~DLM_LKSB_PUT_LVB;
-
-not_found:
- if (!found)
- dlmprintk("failed to find lock to unlock! cookie=%llu\n",
- unlock->cookie);
- else {
- /* send the lksb->status back to the other node */
- status = lksb->status;
- }
-
- return status;
-}
-
-
-static dlm_status dlm_get_cancel_actions(dlm_ctxt *dlm, dlm_lock_resource *res,
- dlm_lock *lock, dlm_lockstatus *lksb,
- int *actions)
-{
- dlm_status status;
-
- if (dlm_lock_on_list(&res->blocked, lock)) {
- /* cancel this outright */
- lksb->status = DLM_NORMAL;
- status = DLM_NORMAL;
- *actions = (DLM_UNLOCK_FREE_LOCK |
- DLM_UNLOCK_CALL_AST |
- DLM_UNLOCK_REMOVE_LOCK);
- } else if (dlm_lock_on_list(&res->converting, lock)) {
- /* cancel the request, put back on granted */
- lksb->status = DLM_NORMAL;
- status = DLM_NORMAL;
- *actions = (DLM_UNLOCK_CALL_AST |
- DLM_UNLOCK_REMOVE_LOCK |
- DLM_UNLOCK_REGRANT_LOCK);
- } else if (dlm_lock_on_list(&res->granted, lock)) {
- /* too late, already granted. DLM_CANCELGRANT */
- lksb->status = DLM_CANCELGRANT;
- status = DLM_NORMAL;
- *actions = DLM_UNLOCK_CALL_AST;
- } else {
- /* err. um. eek! */
- dlmprintk0("lock to cancel is not on any list! bug!\n");
- lksb->status = DLM_IVLOCKID;
- status = DLM_IVLOCKID;
- *actions = 0;
- }
- return status;
-}
-
-static dlm_status dlm_get_unlock_actions(dlm_ctxt *dlm, dlm_lock_resource *res,
- dlm_lock *lock, dlm_lockstatus *lksb,
- int *actions)
-{
- dlm_status status;
-
- /* unlock request */
- if (!dlm_lock_on_list(&res->granted, lock)) {
- lksb->status = DLM_DENIED;
- status = DLM_DENIED;
- *actions = 0;
- } else {
- /* unlock granted lock */
- lksb->status = DLM_NORMAL;
- status = DLM_NORMAL;
- *actions = (DLM_UNLOCK_FREE_LOCK |
- DLM_UNLOCK_CALL_AST |
- DLM_UNLOCK_REMOVE_LOCK);
- }
- return status;
-}
-
Copied: trunk/fs/ocfs2/cluster/dlmunlock.c (from rev 1811, trunk/cluster/dlmunlock.c)
Deleted: trunk/fs/ocfs2/cluster/heartbeat.c
===================================================================
--- trunk/cluster/heartbeat.c 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/heartbeat.c 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,1002 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * heartbeat.c
- *
- * Keeps track of alive nodes in the cluster.
- *
- * Copyright (C) 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel
- */
-
-
-#include "warning_hack.h"
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/fs.h>
-#include <linux/mount.h>
-#include <linux/vfs.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/highmem.h>
-#include <linux/utsname.h>
-#include <linux/init.h>
-#include <linux/sysctl.h>
-#include <linux/random.h>
-#include <linux/version.h>
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#include <linux/statfs.h>
-#include <linux/moduleparam.h>
-#endif
-#include <linux/blkdev.h>
-#include <linux/socket.h>
-#include <linux/inet.h>
-#include <linux/in.h>
-#include <linux/module.h>
-
-#include <linux/linkage.h>
-#include <linux/time.h>
-#include <linux/errno.h>
-#include <linux/fcntl.h>
-#include <linux/net.h>
-#include <linux/unistd.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/pagemap.h>
-#include <linux/file.h>
-
-#include <asm/uaccess.h>
-
-#include "dlm_compat.h"
-#include "util.h"
-#include "dlmcommon.h"
-
-#include "heartbeat.h"
-#include "tcp.h"
-#include "nodemanager.h"
-
-#include "compat_libfs.h"
-
-#ifndef __user
-#define __user
-#endif
-
-
-static void hb_teardown(void);
-static void hb_nm_group_node_add_cb(void *ptr1, void *ptr2, u16 idx);
-static void hb_nm_group_node_del_cb(void *ptr1, void *ptr2, u16 idx);
-static void hb_nm_node_add_cb(void *ptr1, void *ptr2, u16 idx);
-static void hb_nm_group_add_cb(void *ptr1, void *ptr2, u16 idx);
-static int hb_init_disk_hb_group(struct inode *group, dev_t dev, u32 bits,
- u32 blocks, u64 start);
-static ssize_t write_disk(struct file *file, char *buf, size_t size);
-static void hb_do_callbacks(int type, void *ptr1, void *ptr2, int idx);
-static void hb_end_buffer_io_sync(struct buffer_head *bh, int uptodate);
-static int hb_do_node_down(struct inode *group, struct inode *node, int idx);
-static int hb_do_node_up(struct inode *group, struct inode *node, int idx);
-static int hb_do_disk_heartbeat(void *page);
-static int hb_thread(void *data);
-static void hb_complete_thread(void);
-static void hb_kick_thread(void);
-static int hb_launch_thread(void);
-static inline int hb_wait_on_callback_state(int type);
-
-
-
-/* globals */
-static spinlock_t hb_lock = SPIN_LOCK_UNLOCKED;
-static LIST_HEAD(hb_net_groups);
-static LIST_HEAD(hb_disk_groups);
-static int hb_callback_state[HB_NUM_CB];
-struct list_head hb_callbacks[HB_NUM_CB];
-static spinlock_t hb_cb_lock = SPIN_LOCK_UNLOCKED;
-static struct task_struct *hb_task = NULL;
-static atomic_t hb_thread_woken = ATOMIC_INIT(0);
-static DECLARE_WAIT_QUEUE_HEAD(hb_thread_wait_queue);
-static struct completion hb_complete;
-static int hb_pid = -1;
-
-static wait_queue_head_t hb_cb_wq;
-static atomic_t hb_cb_ready = ATOMIC_INIT(0);
-
-//#if 0
-#define hbprintk(x, arg...) printk("(hb:%d) " x, current->pid, ##arg)
-#define hbprintk0(x) printk("(hb:%d) " x, current->pid)
-//#else
-#if 0
-#define hbprintk(x, arg...)
-#define hbprintk0(x)
-#endif
-
-
-
-static void hb_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
-{
- if (uptodate)
- set_buffer_uptodate(bh);
- else {
- hbprintk("eek! EIO!\n");
- clear_buffer_uptodate(bh);
- }
- unlock_buffer(bh);
-}
-
-
-
-static int hb_do_node_down(struct inode *group, struct inode *node, int idx)
-{
- //int ret;
- hbprintk("hb_do_node_down: group=%lu, node=%lu\n", group->i_ino,
- node->i_ino);
- hbprintk("NOT removing node from group\n");
- //ret = nm_remove_node_from_group(group, node);
- hb_do_callbacks(HB_NODE_DOWN_CB, group, node, idx);
- return 0;
-}
-
-static int hb_do_node_up(struct inode *group, struct inode *node, int idx)
-{
- hbprintk("hb_do_node_up: group=%lu, node=%lu\n", group->i_ino,
- node->i_ino);
- hb_do_callbacks(HB_NODE_UP_CB, group, node, idx);
- return 0;
-}
-
-static inline void hb_submit_bh(int rw, struct buffer_head *bh)
-{
-#if 0
- unsigned long long blocknr = bh->b_blocknr;
-
- hbprintk("submit_bh: rw=%s, blocknr=%llu, mapped=%s\n",
- rw==WRITE?"write":"read", blocknr,
- buffer_mapped(bh) ? "yes" : "no");
-#endif
- submit_bh(rw, bh);
- blk_run_address_space(bh->b_bdev->bd_inode->i_mapping);
-}
-
-/*
- * hb_init_disk_hb_group() sets disk->blocksize_bits which defines the block
- * size that we'll use to calculate block offsets. The blocksize of the
- * device might change under us, though. this detects when that happens
- * and tries to lookup the bh again with the newer blocksize. The exciting
- * wrinkle here is that its fatal to call __getblk() with the wrong block
- * size in some 2.6 kernels.
- *
- * We only ever use a few bytes of the block so it is ok that we return
- * a smaller bh than what is implied by blocksize_bits. We're just fixing
- * up addressing here.
- */
-static struct buffer_head *hb_getblk(int orig_blkno, cluster_disk *disk)
-{
- /* XXX getblk() takes an int block in 2.4 :/ */
- int blkno;
- int bits, dev_bits;
- struct buffer_head *bh = NULL;
-
- bits = disk->blocksize_bits;
- blkno = orig_blkno;
-
- dev_bits = ocfs_dev_bits(disk->dev);
- if (dev_bits < 0)
- goto out;
-
- if (dev_bits < bits)
- blkno <<= bits - dev_bits;
- else if (dev_bits > bits)
- blkno >>= dev_bits - bits;
-
- bh = getblk(disk->dev, blkno, 1 << dev_bits);
-out:
- return bh;
-}
-
-static struct buffer_head *hb_get_locked_mapped(hb_disk_slot *slot,
- int ino,
- cluster_disk *disk)
-{
- struct buffer_head *bh = NULL;
-
- bh = slot->bh;
- if (bh) {
- lock_buffer(bh);
- if (buffer_mapped(bh))
- goto out;
- slot->bh = NULL;
- unlock_buffer(bh);
- brelse(bh);
- }
-
- slot->bh = hb_getblk(ino + disk->start_block, disk);
- if (slot->bh) {
- bh = slot->bh;
- lock_buffer(bh);
- }
-out:
- return bh;
-}
-
-static int hb_do_disk_heartbeat(void *page)
-{
- nm_group_inode_private *priv;
- struct inode *group, *node;
- struct list_head *iter;
- struct buffer_head *bh;
- hb_disk_slot *slot;
- hb_disk_heartbeat_block *hb_block;
- int rw, ino, idx, ret, i;
- struct inode **dead_nodes, **live_nodes;
- LIST_HEAD(tmplist);
- cluster_disk *disk;
- u64 cputime;
-
- // NM_MAX_NODES is 255
- dead_nodes = page;
- live_nodes = page + (sizeof(struct inode *) * 256);
-
- spin_lock(&hb_lock);
- list_splice_init(&hb_disk_groups, &tmplist);
- spin_unlock(&hb_lock);
-
- list_for_each(iter, &tmplist) {
- priv = list_entry(iter, nm_group_inode_private, disk_list);
- group = priv->inode;
- disk = &priv->disk;
-
- memset(page, 0, PAGE_SIZE);
- down(&group->i_sem);
-
- idx = 0;
- while ((slot = nm_iterate_group_disk_slots(group, &idx))) {
-
- node = slot->inode;
- if (!node) {
- hbprintk("no inode in slot %d!\n", idx);
- idx++;
- continue;
- }
- ino = nm_get_node_global_index(node);
-
- bh = hb_get_locked_mapped(slot, ino, disk);
- if (bh == NULL) {
- hbprintk("getblk failed in slot %d!\n",
- idx);
- idx++;
- continue;
- }
-
- if (ino == nm_this_node(group)) {
- memset(bh->b_data, 0, bh->b_size);
- hb_block= (hb_disk_heartbeat_block *)bh->b_data;
- BUG_ON(bh->b_size < sizeof(*hb_block));
-
- /* TODO: time stuff */
- cputime = OCFS_CURRENT_SECONDS;
- if (!cputime)
- cputime = 1;
- hb_block->time = cpu_to_le64(cputime);
-
- set_buffer_uptodate(bh);
- clear_buffer_dirty(bh);
- rw = WRITE;
- } else {
- clear_buffer_uptodate(bh);
- rw = READ;
- }
-
- bh->b_end_io = hb_end_buffer_io_sync;
- hb_submit_bh(rw, bh);
- idx++;
- }
-
- idx = 0;
- while ((slot = nm_iterate_group_disk_slots(group, &idx))) {
- bh = slot->bh;
- if (!bh) {
- hbprintk("no bh in slot %d!\n", idx);
- idx++;
- continue;
- }
-
- node = slot->inode;
- if (!node) {
- hbprintk("no inode in slot %d!\n", idx);
- idx++;
- continue;
- }
-
- ino = nm_get_node_global_index(node);
-
- wait_on_buffer(bh);
- hb_block = (hb_disk_heartbeat_block *)bh->b_data;
-
- /* TODO: time stuff */
- cputime = le64_to_cpu(hb_block->time);
- if (slot->last_time != cputime) {
- if (slot->state == HB_NODE_STATE_INIT) {
- hbprintk("first time for this node!\n");
- live_nodes[ino] = node;
- slot->state = HB_NODE_STATE_UP;
- }
- node->i_atime = mk_inode_time(cputime, 0);
- slot->last_time = cputime;
- slot->margin = HB_DISK_MARGIN;
- hb_do_callbacks(HB_NODE_RESPONDED_CB,
- group, node, idx);
- } else {
- slot->margin--;
- hbprintk("node %d missed. margin=%d\n",
- ino, slot->margin);
- }
-
- if (ino != nm_this_node(group) && slot->margin <= 0) {
- hbprintk("node %d JUST DIED!!!!\n", ino);
- dead_nodes[ino] = node;
- slot->state = HB_NODE_STATE_DOWN;
- }
- idx++;
- }
-
- up(&group->i_sem);
-
- /* Do holding group i_sem while doing node-up/down.
- * Changes may need to be made to the group, so
- * i_sem will be needed... */
- for (i=0; i<NM_MAX_NODES; i++) {
- if (live_nodes[i])
- ret = hb_do_node_up(group, live_nodes[i], i);
- else if (dead_nodes[i])
- ret = hb_do_node_down(group, dead_nodes[i], i);
- }
- }
-
- spin_lock(&hb_lock);
- list_splice(&tmplist, &hb_disk_groups);
- spin_unlock(&hb_lock);
- return 0;
-}
-
-
-static int hb_thread(void *data)
-{
- int status;
- void *page;
-
- page = (void *) __get_free_page(GFP_KERNEL);
- if (!page)
- return -ENOMEM;
-
- util_daemonize ("hb_thread", strlen("hb_thread"), 1);
- hb_task = current;
-
- while (1) {
- status = hb_do_disk_heartbeat(page);
-
- atomic_set(&hb_thread_woken, 0);
- status = util_wait_atomic_eq(&hb_thread_wait_queue,
- &hb_thread_woken,
- 1, HB_THREAD_MS);
-
- if (status == 0 || status == -ETIMEDOUT) {
-#if 0
- if (atomic_read(&hb_thread_woken))
- hbprintk("aha!!! hb thread woken!\n");
- else
- hbprintk("hb thread timed out waiting, "
- "running again\n");
-#endif
- continue;
- }
- hbprintk("hb thread got %d while waiting\n", status);
- break;
- }
-
- flush_scheduled_work();
- complete (&hb_complete);
- hbprintk("quitting hb thread!!!!!!\n");
- return 0;
-}
-
-
-static void hb_kick_thread(void)
-{
- atomic_set(&hb_thread_woken, 1);
- wake_up(&hb_thread_wait_queue);
-}
-
-/* Launch the hb thread for the mounted volume */
-static int hb_launch_thread(void)
-{
- hb_pid = -1;
- hb_task = NULL;
- init_completion (&hb_complete);
-
- hbprintk("starting hb thread...\n");
- hb_pid = kernel_thread (hb_thread, NULL,
- CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
- if (hb_pid < 0) {
- hbprintk("unable to launch hb thread, error=%d", hb_pid);
- return -EINVAL;
- }
- hbprintk("hb thread running...\n");
- return 0;
-}
-
-static void hb_complete_thread(void)
-{
- hbprintk ("waiting for hb thread to exit....");
- send_sig (SIGINT, hb_task, 0);
- wait_for_completion (&hb_complete);
- hbprintk ("hb thread exited\n");
- hb_task = NULL;
-}
-
-
-
-
-
-
-
-static int hb_init_disk_hb_group(struct inode *group, dev_t dev, u32 bits,
- u32 blocks, u64 start)
-{
- int ret = -EINVAL;
- cluster_disk *disk;
- nm_group_inode_private *priv;
-
- priv = group->u.generic_ip;
- if (!priv)
- goto leave;
-
- if (priv->state == NM_GROUP_READY)
- return 0;
-
- /* hold an extra ref as long as hb keeps track of the group */
- igrab(group);
-
- disk = &priv->disk;
- if (blocks > NM_MAX_NODES)
- blocks = NM_MAX_NODES;
- disk->dev = dev;
- disk->blocksize_bits = bits;
- disk->num_blocks = blocks;
- disk->start_block = start;
- util_init_rarray(&disk->slots, sizeof(hb_disk_slot));
-
- /* start allowing group additions */
- ret = nm_make_group_ready(group);
-
-leave:
- if (ret < 0)
- iput(group);
-
- return ret;
-}
-
-
-static ssize_t write_disk(struct file *file, char *buf, size_t size)
-{
- hb_op *data;
- struct inode *group = NULL;
- struct file *filp = NULL;
- dev_t dev;
- int ret, tmpret;
- nm_group_inode_private *priv;
- u32 tmpmap[8];
-
- hbprintk("write_disk\n");
-
- if (size < sizeof(*data))
- return -EINVAL;
- data = (hb_op *) buf; if (data->magic != HB_OP_MAGIC)
- return -EINVAL;
-
- switch (data->opcode)
- {
- case HB_OP_START_DISK_HEARTBEAT:
- if (data->bits < 9 || data->bits > 12) {
- ret = sprintf(buf, "%d: bad blocksize bits! %u",
- -EINVAL, data->bits);
- break;
- }
- group = nm_get_group_by_num(data->group_num);
- if (!group || !group->u.generic_ip) {
- ret = sprintf(buf, "%d: bad group number! %u",
- -EINVAL, data->group_num);
- break;
- }
- priv = group->u.generic_ip;
- if (strncmp(priv->disk.uuid, data->disk_uuid,
- CLUSTER_DISK_UUID_LEN) != 0) {
- ret = sprintf(buf, "%d: bad disk uuid!",
- -EINVAL);
- break;
- }
- filp = fget(data->fd);
- if (!filp) {
- ret = sprintf(buf, "%d: bad fd!", -EINVAL);
- break;
- }
- dev = filp->f_dentry->d_inode->i_rdev;
- tmpret = hb_init_disk_hb_group(group, dev, data->bits,
- data->blocks,
- data->start);
- if (tmpret < 0) {
- fput(filp);
- ret = sprintf(buf, "%d: failed to init disk "
- "heartbeat for group %u!",
- -EINVAL, data->group_num);
- } else {
- ret = sprintf(buf, "0: disk heartbeat started "
- "for group %u!", data->group_num);
- }
- break;
-
- case HB_OP_GET_NODE_MAP:
- group = nm_get_group_by_num(data->group_num);
- if (!group || !group->u.generic_ip) {
- ret = sprintf(buf, "%d: bad group number! %u",
- -EINVAL, data->group_num);
- break;
- }
-
- if ((ret = hb_fill_node_map(group, tmpmap,
- sizeof(tmpmap))) == 0) {
- ret = sprintf(buf, "0: ");
- buf += ret;
- memcpy(buf, tmpmap, sizeof(tmpmap));
- ret += sizeof(tmpmap);
- } else {
- ret = sprintf(buf, "%d: error occurred in "
- "hb_fill_node_map", ret);
- }
- break;
-
- default:
- ret = sprintf(buf, "%d: bad opcode! %u", -EINVAL,
- data->opcode);
- break;
- }
-
- if (group)
- iput(group);
-
- return ret;
-}
-
-
-extern struct file_operations transaction_ops;
-
-/*----------------------------------------------------------------------------*/
-/*
- * populating the filesystem.
- */
-static int hb_fill_super(struct super_block * sb, void * data, int silent)
-{
- int ret;
- TA_write_ops *ops;
- static struct tree_descr hb_files[] = {
- [HB_Disk] = {".disk", &transaction_ops, S_IWUSR},
- /* last one */ {""}
- };
-
- ops = kmalloc(sizeof(TA_write_ops) + (1 * sizeof(TA_write_op *)),
- GFP_KERNEL);
- if (!ops)
- return -ENOMEM;
-
- memset(ops, 0, sizeof(TA_write_ops) + (1 * sizeof(TA_write_op *)));
- ops->num_ops = HB_WriteOpArraySize;
- ops->write_op[HB_Disk] = write_disk;
-
- hbprintk("calling simple_fill_super...\n");
- ret = simple_fill_super(sb, 0x5551212f, hb_files);
- if (ret >= 0)
- TA_GENERIC_SB_MEMBER(sb) = ops;
- else
- kfree(ops);
- return ret;
-}
-
-/* blindly copied from ocfs2 */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-static struct super_block *hb_get_sb(struct file_system_type *fs_type,
- int flags,
- const char *dev_name,
- void *data)
-{
- return get_sb_single(fs_type, flags, data, hb_fill_super);
-}
-
-static struct file_system_type hb_fs_type = {
- .owner = THIS_MODULE,
- .name = "hb",
- .get_sb = hb_get_sb,
- .kill_sb = kill_anon_super,
-};
-#else
-static struct super_block *hb_read_super(struct super_block *sb,
- void *data,
- int silent)
-{
- hbprintk("welcome to hb_read_super!!!\n");
- return (hb_fill_super(sb, data, silent) < 0) ? NULL : sb;
-}
-static DECLARE_FSTYPE (hb_fs_type, "hb", hb_read_super, FS_SINGLE|FS_LITTER);
-#endif
-
-/* TODO: make callbacks all return int */
-static void hb_nm_group_node_add_cb(void *ptr1, void *ptr2, u16 idx)
-{
- hb_disk_slot *slot;
- struct inode *group = ptr1;
- struct inode *node = ptr2;
- cluster_disk *disk;
- nm_group_inode_private *priv;
- int ino, ret = 0;
- u64 block;
-
- hbprintk("hb_nm_group_node_add_cb: group=%lu, node=%lu, idx=%u\n",
- group->i_ino, node->i_ino, idx);
-
- down(&group->i_sem);
- priv = group->u.generic_ip;
- if (!priv) {
- hbprintk("eek! bad group inode!\n");
- goto leave;
- }
- disk = &priv->disk;
- if (disk->uuid[0]) {
- ret = util_resize_rarray(&disk->slots, idx+1);
- if (ret < 0) {
- hbprintk("eeeeeeek!!!! failed to resize disk state "
- "data\n");
- goto leave;
- }
-
- ino = nm_get_node_global_index(node);
- if (ino > disk->num_blocks) {
- hbprintk("disk heartbeat area does not have enough "
- "blocks!\n");
- goto leave;
- }
- block = ino + disk->start_block;
-
- slot = util_rarray_idx_to_slot(&disk->slots, idx);
- if (!slot) {
- hbprintk("eeeeeeek!!!! failed to get disk state data "
- "pointer: %d\n", idx);
- goto leave;
- }
- slot->inode = igrab(node);
- slot->last_time = 0;
- slot->margin = HB_INITIAL_DISK_MARGIN;
-#warning needs to change for 2.6
- slot->bh = hb_getblk(block, disk);
- slot->state = HB_NODE_STATE_INIT;
- } else {
- hbprintk("doing nothing for group add for non-disk heartbeat "
- "group\n");
- }
-
-leave:
- up(&group->i_sem);
- return;
-}
-
-static void hb_nm_group_node_del_cb(void *ptr1, void *ptr2, u16 idx)
-{
- hb_disk_slot *slot;
- struct inode *group = ptr1;
- struct inode *node = ptr2;
- cluster_disk *disk;
- nm_group_inode_private *priv;
- int ret = -EINVAL;
-
- hbprintk("hb_nm_group_node_del_cb: group=%lu, node=%lu, idx=%u\n",
- group->i_ino, node->i_ino, idx);
-
- down(&group->i_sem);
- priv = group->u.generic_ip;
- if (!priv) {
- hbprintk("eek! bad group inode!\n");
- goto leave;
- }
- disk = &priv->disk;
- slot = util_rarray_idx_to_slot(&disk->slots, idx);
- if (!slot) {
- hbprintk("eeeeeeek!!!! failed to get disk state data "
- "pointer: %d\n", idx);
- goto leave;
- }
- if (slot->inode!=node) {
- hbprintk("eeeeeeek!!!! node inode changed!\n");
- goto leave;
- }
- iput(node);
- if (slot->bh) {
- wait_on_buffer(slot->bh);
- brelse(slot->bh);
- }
- memset(slot, 0, sizeof(hb_disk_slot));
- ret = 0;
-leave:
-
- up(&group->i_sem);
- hbprintk("hb_nm_group_node_del_cb done: %d\n", ret);
- return;
-}
-
-static void hb_nm_node_add_cb(void *ptr1, void *ptr2, u16 idx)
-{
- //struct inode *node = ptr1;
-}
-
-static void hb_nm_group_add_cb(void *ptr1, void *ptr2, u16 idx)
-{
- struct inode *group = ptr1;
- nm_group_inode_private *priv;
-
- hbprintk("hb_nm_group_add_cb: group=%lu, idx=%u\n",
- group->i_ino, idx);
-
- priv = group->u.generic_ip;
- if (!priv) {
- hbprintk("eek! bad group inode!\n");
- return;
- }
-
- spin_lock(&hb_lock);
- list_add_tail(&priv->net_list, &hb_net_groups);
- if (priv->disk.uuid[0]) {
- hbprintk("adding priv=%p inode=%p to disk group list\n",
- priv, group);
- list_add_tail(&priv->disk_list, &hb_disk_groups);
- }
- spin_unlock(&hb_lock);
-}
-
-enum {
- HB_CB_STATE_FROZEN = 0,
- HB_CB_STATE_READY
-};
-
-static inline int hb_wait_on_callback_state(int type)
-{
- while (hb_callback_state[type] == HB_CB_STATE_FROZEN) {
- spin_unlock(&hb_cb_lock);
- atomic_set(&hb_cb_ready, 0);
- if (util_wait_atomic_eq(&hb_cb_wq, &hb_cb_ready, 1, 0) ==
- -EINTR) {
- return -EINTR;
- }
- spin_lock(&hb_cb_lock);
- }
- return 0;
-}
-
-static int __init init_hb(void)
-{
- int retval=-1, i;
- hbprintk("loading heartbeat module: nodename is %s\n", nm_nodename);
-
- if (proc_mkdir("cluster/heartbeat", 0)) {
- // ???
- }
-
- //hb_net_timestamps = __get_free_page(GFP_KERNEL);
- //if (!hb_net_timestamps)
- // goto done;
-
- for (i=HB_NODE_DOWN_CB; i<HB_NUM_CB; i++)
- INIT_LIST_HEAD(&hb_callbacks[i]);
- init_waitqueue_head(&hb_cb_wq);
- for (i=HB_NODE_DOWN_CB; i<HB_NUM_CB; i++)
- hb_callback_state[i] = HB_CB_STATE_READY;
-
- if (nm_register_callback(NM_GROUP_NODE_DEL_CB, hb_nm_group_node_del_cb))
- goto done;
- if (nm_register_callback(NM_GROUP_NODE_ADD_CB, hb_nm_group_node_add_cb))
- goto done;
- if (nm_register_callback(NM_NODE_ADD_CB, hb_nm_node_add_cb))
- goto done;
- if (nm_register_callback(NM_GROUP_ADD_CB, hb_nm_group_add_cb))
- goto done;
-
- if (hb_launch_thread() < 0)
- goto done;
-
- retval = register_filesystem(&hb_fs_type);
-done:
- if (retval)
- hb_teardown();
- return retval;
-}
-
-static void __exit exit_hb(void)
-{
- int i;
- spin_lock(&hb_cb_lock);
- for (i=HB_NODE_DOWN_CB; i<HB_NUM_CB; i++) {
- hb_wait_on_callback_state(i);
- hb_callback_state[i] = HB_CB_STATE_FROZEN;
- }
- spin_unlock(&hb_cb_lock);
-
- hb_complete_thread();
- hb_teardown();
- unregister_filesystem(&hb_fs_type);
- hbprintk("unloading heartbeat module\n");
-}
-
-static void hb_teardown(void)
-{
- nm_unregister_callback(NM_GROUP_NODE_DEL_CB, hb_nm_group_node_del_cb);
- nm_unregister_callback(NM_GROUP_NODE_ADD_CB, hb_nm_group_node_add_cb);
- nm_unregister_callback(NM_NODE_ADD_CB, hb_nm_node_add_cb);
- nm_unregister_callback(NM_GROUP_ADD_CB, hb_nm_group_add_cb);
- remove_proc_entry("cluster/heartbeat", NULL);
- //if (hb_net_timestamps)
- // kfree(hb_net_timestamps);
-}
-
-MODULE_LICENSE("GPL");
-module_init(init_hb)
-module_exit(exit_hb)
-
-
-int hb_fill_node_map(struct inode *group, void *map, int size)
-{
- hb_disk_slot *slot;
- int idx = 0;
- nm_group_inode_private *priv;
-
- priv = group->u.generic_ip;
-
- memset(map, 0, size);
- down(&group->i_sem);
-
- if (priv->disk.uuid[0]) {
- while ((slot = nm_iterate_group_disk_slots(group, &idx))) {
- if (idx >= size-1) {
- hbprintk("map size (%d) too small for "
- "index (%d)\n", size, idx);
- up(&group->i_sem);
- return -EINVAL;
- }
- if (slot->state == HB_NODE_STATE_UP)
- set_bit(idx, map);
- idx++;
- }
- } else {
- hbprintk("filling straight from slot bitmap for non-disk "
- "heartbeat group\n");
- memcpy(map, priv->slot_bitmap, size);
- }
-
- up(&group->i_sem);
-
- return 0;
-}
-EXPORT_SYMBOL(hb_fill_node_map);
-
-int hb_register_callback(int type, hb_cb_func *func, void *data, int priority)
-{
- hb_callback_func *f, *tmp;
- struct list_head *iter;
- int ret;
-
- if (type < HB_NODE_DOWN_CB || type >= HB_NUM_CB)
- return -EINVAL;
- f = kmalloc(sizeof(hb_callback_func), GFP_KERNEL);
- if (f == NULL)
- return -ENOMEM;
- memset(f, 0, sizeof(hb_callback_func));
- f->func = func;
- f->data = data;
- f->priority = priority;
-
- spin_lock(&hb_cb_lock);
- ret = hb_wait_on_callback_state(type);
- if (ret < 0) {
- spin_unlock(&hb_cb_lock);
- kfree(f);
- return ret;
- }
-
- list_for_each(iter, &hb_callbacks[type]) {
- tmp = list_entry (iter, hb_callback_func, list);
- if (priority < tmp->priority) {
- list_add_tail(&f->list, iter);
- spin_unlock(&hb_cb_lock);
- return 0;
- }
- }
- list_add_tail(&f->list, &hb_callbacks[type]);
- spin_unlock(&hb_cb_lock);
- return 0;
-}
-EXPORT_SYMBOL(hb_register_callback);
-
-int hb_unregister_callback(int type, hb_cb_func *func, void *data)
-{
- struct list_head *iter, *tmpiter;
- int ret = -EINVAL;
- hb_callback_func *f;
-
- if (type < HB_NODE_DOWN_CB || type >= HB_NUM_CB)
- return -EINVAL;
-
- spin_lock(&hb_cb_lock);
- ret = hb_wait_on_callback_state(type);
- if (ret < 0) {
- spin_unlock(&hb_cb_lock);
- return ret;
- }
- hb_callback_state[type] = HB_CB_STATE_FROZEN;
- spin_unlock(&hb_cb_lock);
-
- list_for_each_safe(iter, tmpiter, &hb_callbacks[type]) {
- f = list_entry (iter, hb_callback_func, list);
- if (f->func == func && f->data == data) {
- list_del(&f->list);
- kfree(f);
- ret = 0;
- break;
- }
- }
-
- spin_lock(&hb_cb_lock);
- hb_callback_state[type] = HB_CB_STATE_READY;
- atomic_set(&hb_cb_ready, 1);
- wake_up(&hb_cb_wq);
- spin_unlock(&hb_cb_lock);
- return ret;
-}
-EXPORT_SYMBOL(hb_unregister_callback);
-
-
-static void hb_do_callbacks(int type, void *ptr1, void *ptr2, int idx)
-{
- struct list_head *iter;
- hb_callback_func *f;
- int ret;
-
- spin_lock(&hb_cb_lock);
- ret = hb_wait_on_callback_state(type);
- if (ret < 0) {
- spin_unlock(&hb_cb_lock);
- hbprintk("missed hb callback(%d) due to EINTR!\n", type);
- return;
- }
- hb_callback_state[type] = HB_CB_STATE_FROZEN;
- spin_unlock(&hb_cb_lock);
-
- list_for_each(iter, &hb_callbacks[type]) {
- f = list_entry (iter, hb_callback_func, list);
- (f->func) (ptr1, ptr2, idx, f->data);
- }
-
- spin_lock(&hb_cb_lock);
- hb_callback_state[type] = HB_CB_STATE_READY;
- atomic_set(&hb_cb_ready, 1);
- wake_up(&hb_cb_wq);
- spin_unlock(&hb_cb_lock);
-}
Copied: trunk/fs/ocfs2/cluster/heartbeat.c (from rev 1811, trunk/cluster/heartbeat.c)
Deleted: trunk/fs/ocfs2/cluster/heartbeat.h
===================================================================
--- trunk/cluster/heartbeat.h 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/heartbeat.h 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,107 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * heartbeat.h
- *
- * Function prototypes
- *
- * Copyright (C) 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel
- */
-
-#ifndef CLUSTER_HEARTBEAT_H
-#define CLUSTER_HEARTBEAT_H
-
-#include "ocfs2_heartbeat.h"
-
-enum {
- HB_NODE_STATE_INIT = 0,
- HB_NODE_STATE_DOWN,
- HB_NODE_STATE_UP
-};
-
-struct _heartbeat_ctxt
-{
- int dummy;
-};
-
-typedef struct _hb_disk_slot
-{
- struct inode *inode;
- struct buffer_head *bh;
- struct list_head list;
- unsigned long last_time;
- u16 margin;
- u16 state;
-} hb_disk_slot;
-
-
-
-#define HB_THREAD_MS 2000 // every 2 seconds
-
-
-enum {
- HB_TYPE_DISK = 0,
- HB_TYPE_NET
-};
-
-
-/* callback stuff */
-
-enum {
- HB_NODE_DOWN_CB = 0,
- HB_NODE_UP_CB,
- HB_NODE_RESPONDED_CB, // this one is very chatty
- HB_NUM_CB
-};
-
-typedef void (hb_cb_func)(struct inode *, struct inode *, int, void *);
-
-typedef struct _hb_callback_func
-{
- struct list_head list;
- hb_cb_func *func;
- void *data;
- int priority;
-} hb_callback_func;
-
-
-enum {
- HB_Root = 1,
- HB_Disk,
- HB_WriteOpArraySize
-};
-
-
-// number of initial allowed misses
-#define HB_INITIAL_DISK_MARGIN 60
-#define HB_INITIAL_NET_MARGIN 60
-
-// number of allowed misses in steady state
-#define HB_DISK_MARGIN 30
-#define HB_NET_MARGIN 30
-
-
-int hb_unregister_callback(int type, hb_cb_func *func, void *data);
-int hb_register_callback(int type, hb_cb_func *func, void *data, int priority);
-int hb_fill_node_map(struct inode *group, void *map, int size);
-
-
-
-#endif /* CLUSTER_HEARTBEAT_H */
Copied: trunk/fs/ocfs2/cluster/heartbeat.h (from rev 1811, trunk/cluster/heartbeat.h)
Deleted: trunk/fs/ocfs2/cluster/nodemanager.c
===================================================================
--- trunk/cluster/nodemanager.c 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/nodemanager.c 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,1386 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * nodemanager.c
- *
- * totally lame static node management placeholder
- *
- * Copyright (C) 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel
- */
-
-#include "warning_hack.h"
-
-#include "dlm_compat.h"
-#include "util.h"
-#include "dlmcommon.h"
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/fs.h>
-#include <linux/mount.h>
-#include <linux/vfs.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/highmem.h>
-#include <linux/utsname.h>
-#include <linux/init.h>
-#include <linux/sysctl.h>
-#include <linux/random.h>
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#include <linux/statfs.h>
-#include <linux/moduleparam.h>
-#include <linux/namei.h>
-#endif
-#include <linux/blkdev.h>
-#include <linux/socket.h>
-#include <linux/inet.h>
-#include <linux/in.h>
-#include <linux/module.h>
-
-#include <linux/linkage.h>
-#include <linux/time.h>
-#include <linux/errno.h>
-#include <linux/fcntl.h>
-#include <linux/net.h>
-#include <linux/unistd.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/pagemap.h>
-#include <linux/hash.h>
-#include <linux/bitops.h>
-
-#include <asm/uaccess.h>
-
-#include "tcp.h"
-#include "dlmmod.h"
-#include "nodemanager.h"
-#include "heartbeat.h"
-
-#include "compat_libfs.h"
-
-#ifndef __user
-#define __user
-#endif
-
-
-/*
- * This nm module is similar to nfsd/nfsctl.c in that it uses
- * transaction files (in /proc/cluster/nm) to communicate with
- * the kernel module instead of ioctls or other means.
- *
- * Files involved:
- * /proc/cluster/nm/cluster - used to create/destroy cluster, adds
- * nodes/groups to the cluster, queries info
- * about the cluster
- * /proc/cluster/nm/group - adds/removes nodes from a group, queries
- * info about a group
- * /proc/cluster/nm/node - changes info for a node, queries info about
- * a node
- *
- * This nm implementation basically allows this node to live in exactly one
- * cluster. All "clustered" nodes that are known to this node should be
- * added to the cluster, and all nodes should see the same list of nodes in
- * the same order at all times. The "slot" number given to a node in this
- * global cluster list is fixed and never changes. Groups can be dynamically
- * created within a cluster (TODO: currently static only) and be made up of
- * one or more nodes (listed at most once) in the global list. A node may exist
- * in many groups. Also, a group may have an optional disk UUID which is simply
- * stored for later use by the heartbeat service. (The heartbeat service will
- * do disk heartbeating only for those groups with valid UUIDs.)
- *
- * USAGE:
- * For our purposes, the nm service can be autoloaded by an fstab entry or
- * manually through mount (mount -t nm none /proc/cluster/nm). Once that is
- * done, an init script (or single executable on an initrd) should be run to
- * create the static cluster info, possibly from a file like /etc/nm.conf or
- * similar. We should probably create a "dlm" or "everyone" group (with NO disk
- * heartbeating) so that the dlm service can be used with the network only.
- * This group should contain all known nodes. After this is done, the net, hb
- * and dlm modules can come up. The nm service is now ready for use, since
- * groups don't need to be created till later.
- *
- * A group services daemon can be written (by someone!? ;-) to run at this
- * point. Since the "dlm" group has everything it needs for full dlmming (since
- * it uses only network), the dlm itself can be used to arbitrate for group
- * creation, and additions/deletions from groups. Callbacks should be
- * registered with nm by other services that care on each of these events. For
- * instance, heartbeat should register a callback with nm for group creation,
- * and addition and deletion from a group so that it can make any necessary
- * changes to its heartbeating (primarily so that it can begin/end disk
- * heartbeat for any group/node that needs it).
- *
- * NOTE NOTE NOTE !!!!:
- * This is intended to be a quickie implementation. (translation: lame) I do
- * not want to step on anyone's toes who may have implemented something wayyy
- * better. If something out there "wins", we will plug into that instead. If
- * nothing really takes off, we at least have a (lame) reference to work off of.
- * However, since this implementation exists solely to make ocfs2 work, and one
- * of the major advantages of ocfs version 1 was ease of setup, we don't want
- * to move to something substantially more complicated than this (one conf
- * file).
- *
- */
-
-
-
-/* globals */
-nm_cluster cluster;
-struct super_block *single_sb;
-char *nm_nodename;
-EXPORT_SYMBOL(nm_nodename);
-static spinlock_t nm_lock = SPIN_LOCK_UNLOCKED;
-static spinlock_t nm_cb_lock = SPIN_LOCK_UNLOCKED;
-struct list_head nm_callbacks[NM_NUM_CB];
-
-
-static void nm_teardown(void);
-static int nm_create_cluster(char *buf);
-static void nm_init_cluster(nm_cluster *cluster);
-int nm_create_node(char *buf, nm_op *data);
-int nm_name_cluster(char *buf, nm_op *data);
-int nm_destroy_cluster(char *buf);
-int nm_get_cluster_num_nodes(char *buf);
-int nm_get_cluster_num_groups(char *buf);
-int nm_get_node_info(char *buf, nm_op *data);
-int nm_get_group_info(char *buf, nm_op *data);
-nm_cluster *nm_get_cluster(void);
-struct inode *nm_get_group_by_name(char *node_name);
-struct inode *nm_get_node_by_name(char *node_name);
-int nm_init(dlm_ctxt *dlm);
-static void nm_do_callbacks(int type, void *ptr1, void *ptr2, u16 idx);
-
-/* support for adding files, dirs, hardlinks in /proc/cluster/nm/... */
-extern struct file_operations simple_dir_operations;
-extern struct inode_operations simple_dir_inode_operations;
-extern struct file_operations transaction_ops;
-
-static inline int nm_find_next_slot(void *bitmap, int max, int request);
-static struct dentry * nm_add_file(struct super_block *s, struct dentry *parent, struct tree_descr *file, int ino);
-static struct dentry * nm_add_link(struct super_block *s, struct dentry *parent, struct tree_descr *file, int ino);
-
-static ssize_t write_node(struct file *file, char *buf, size_t size);
-static ssize_t write_group(struct file *file, char *buf, size_t size);
-static ssize_t write_cluster(struct file *file, char *buf, size_t size);
-
-static struct inode * __nm_get_group_by_num(u16 group_num);
-static struct inode * __nm_get_node_by_num(u16 node_num);
-
-
-static u16 nm_get_group_index(struct inode *group, struct inode *inode, struct dentry **child);
-
-#define NM_HASH_BITS 7
-#define NM_HASH_SIZE (1 << NM_HASH_BITS)
-#define NM_HASH_MASK (NM_HASH_SIZE - 1)
-
-
-
-#define nmprintk(x, arg...) printk("(nm:%d) " x, current->pid, ##arg)
-#define nmprintk0(x) printk("(nm:%d) " x, current->pid)
-#if 0
-#define nmprintk(x, arg...)
-#define nmprintk0(x)
-#endif
-
-static struct list_head *nm_ip_hash = NULL;
-static spinlock_t nm_ip_hash_lock;
-
-static int nm_init_ip_hash(void);
-static void nm_destroy_ip_hash(void);
-
-
-static void nm_destroy_ip_hash(void)
-{
- int i;
- if (!nm_ip_hash)
- return;
- for (i=0; i<NM_HASH_SIZE; i++) {
- /* TODO: cleanup */
- }
- free_page((unsigned long)nm_ip_hash);
-}
-
-static int nm_init_ip_hash(void)
-{
- int i;
-
- if ((PAGE_SIZE / sizeof(struct list_head)) < NM_HASH_SIZE) {
- nmprintk("eek! hash size too big for this arch!\n");
- BUG();
- }
-
- nm_ip_hash = (struct list_head *) __get_free_page(GFP_KERNEL);
- if (!nm_ip_hash)
- return -ENOMEM;
- for (i=0; i<NM_HASH_SIZE; i++)
- INIT_LIST_HEAD(&nm_ip_hash[i]);
- spin_lock_init(&nm_ip_hash_lock);
- return 0;
-}
-
-
-
-
-
-static inline int nm_find_next_slot(void *bitmap, int max, int request)
-{
- int start = 0, slot_num;
- if (request != NM_INVALID_SLOT_NUM)
- start = request;
- slot_num = find_next_zero_bit (bitmap, max, start);
- if (slot_num >= max)
- return -1;
- if (request != NM_INVALID_SLOT_NUM && slot_num != request)
- return -1;
- set_bit(slot_num, bitmap);
- return slot_num;
-}
-
-
-
-
-static struct dentry * nm_add_file(struct super_block *s, struct dentry *parent,
- struct tree_descr *file, int ino)
-{
- struct qstr name;
- struct dentry *dentry = ERR_PTR(-EINVAL);
- struct inode *inode;
-
- if (!file->name)
- goto out;
- name.name = file->name;
- name.len = strlen(name.name);
- nmprintk("adding file %*s\n", name.len, name.name);
- name.hash = full_name_hash(name.name, name.len);
- dentry = d_alloc(parent, &name);
- if (!dentry) {
- dentry = ERR_PTR(-EINVAL);
- goto out;
- }
- inode = new_inode(s);
- if (!inode) {
- dput(dentry);
- dentry = ERR_PTR(-EINVAL);
- goto out;
- }
- inode->i_mode = file->mode;
- inode->i_uid = inode->i_gid = 0;
- inode->i_blksize = PAGE_CACHE_SIZE;
- inode->i_blocks = 0;
- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- if (file->mode & S_IFDIR) {
- inode->i_op = &simple_dir_inode_operations;
- inode->i_fop = &simple_dir_operations;
- } else {
- inode->i_fop = file->ops;
- }
- inode->i_ino = ino;
- insert_inode_hash(inode);
- d_add(dentry, inode);
-
-out:
- return dentry;
-}
-
-
-static struct dentry * nm_add_link(struct super_block *s, struct dentry *parent,
- struct tree_descr *file, int ino)
-{
- struct qstr name;
- struct dentry *dentry = ERR_PTR(-EINVAL);
- struct inode *inode;
-
- if (!file->name)
- goto out;
- name.name = file->name;
- name.len = strlen(name.name);
- nmprintk("adding link %*s\n", name.len, name.name);
- name.hash = full_name_hash(name.name, name.len);
- dentry = d_alloc(parent, &name);
- if (!dentry) {
- nmprintk("failed to d_alloc\n");
- dentry = ERR_PTR(-EINVAL);
- goto out;
- }
- inode = iget(s, ino);
- if (!inode) {
- nmprintk("failed to iget\n");
- dput(dentry);
- dentry = ERR_PTR(-EINVAL);
- goto out;
- }
- if (!inode->u.generic_ip) {
- nmprintk("bad inode: %d\n", ino);
- iput(inode);
- dput(dentry);
- dentry = ERR_PTR(-EINVAL);
- goto out;
- }
- inode->i_nlink++;
- d_add(dentry, inode);
-
-out:
- return dentry;
-}
-
-
-
-
-
-/* cluster, node and group transaction files.
- * here's where the actual work of nm takes place. */
-
-static int nm_create_cluster(char *buf)
-{
- int ret = -EINVAL;
-
- nmprintk("create cluster...\n");
-
- spin_lock(&nm_lock);
- if (cluster.state == NM_CLUSTER_UP) {
- ret = sprintf(buf, "%d: cluster already up\n", -EINVAL);
- } else {
- cluster.state = NM_CLUSTER_UP;
- ret = sprintf(buf, "0: cluster state: UP");
- }
- spin_unlock(&nm_lock);
- return ret;
-}
-
-
-
-int nm_create_group(char *buf, nm_op *data)
-{
- struct tree_descr desc;
- struct dentry *dentry = NULL;
- struct inode *inode = NULL;
- int ino, group_num;
- int ret = -EINVAL;
- nm_group_inode_private *g = NULL;
-
- nmprintk("create group...\n");
-
- data->arg_u.gc.name[NM_MAX_NAME_LEN] = '\0';
- inode = nm_get_group_by_name(data->arg_u.gc.name);
- if (inode) {
- ret = sprintf(buf, "%d: group %u (%s) already exists", -EEXIST,
- nm_get_group_global_index(inode),
- data->arg_u.gc.name);
- iput(inode);
- return ret;
- }
-
- group_num = data->arg_u.gc.group_num;
- if (group_num > NM_INVALID_SLOT_NUM)
- goto leave;
-
- spin_lock(&cluster.bitmap_lock);
- group_num = nm_find_next_slot(&(cluster.group_bitmap[0]), 255,
- group_num);
- spin_unlock(&cluster.bitmap_lock);
-
- if (group_num < 0) {
- nmprintk("out of group slots!\n");
- goto leave;
- }
-
- ino = group_num + NM_GROUP_INODE_START;
-
- desc.name = data->arg_u.gc.name;
- desc.ops = NULL;
- desc.mode = S_IFDIR | 0755;
- dentry = nm_add_file(single_sb, single_sb->s_root, &desc, ino);
- if (IS_ERR(dentry))
- goto leave;
- inode = igrab(dentry->d_inode);
- if (!inode) {
- nmprintk("igrab failed!\n");
- goto leave;
- }
-
- g = kmalloc(sizeof(nm_group_inode_private), GFP_KERNEL);
- if (!g)
- goto leave;
-
- memset(g, 0, sizeof(nm_group_inode_private));
- memcpy(g->disk.uuid, data->arg_u.gc.disk_uuid, CLUSTER_DISK_UUID_LEN);
- spin_lock_init(&g->bitmap_lock);
- if (g->disk.uuid[0])
- g->state = NM_GROUP_NOT_READY;
- else
- g->state = NM_GROUP_READY;
- g->inode = inode;
- inode->u.generic_ip = g;
-
- ret = sprintf(buf, "0: group %u (%s) added, uuid: %s", group_num,
- data->arg_u.gc.name, g->disk.uuid);
- nm_do_callbacks(NM_GROUP_ADD_CB, inode, NULL, group_num);
-
-leave:
- if (ret < 0) {
- if (inode) {
- if (inode->u.generic_ip)
- kfree(inode->u.generic_ip);
- iput(inode);
- }
- if (dentry)
- dput(dentry);
- }
- return ret;
-}
-EXPORT_SYMBOL(nm_create_group);
-
-int nm_create_node(char *buf, nm_op *data)
-{
- struct tree_descr desc;
- struct dentry *dentry = NULL;
- struct inode *inode = NULL;
- int ino, node_num, bucket;
- int ret = -EINVAL;
- nm_node_inode_private *n = NULL;
- struct page *page = NULL;
-
- nmprintk("add cluster node ...\n");
-
- data->arg_u.node.node_name[NM_MAX_NAME_LEN] = '\0';
- inode = nm_get_node_by_name(data->arg_u.node.node_name);
- if (inode) {
- ret = sprintf(buf, "%d: node %u (%s) already exists", -EEXIST,
- nm_get_node_global_index(inode),
- data->arg_u.node.node_name);
- iput(inode);
- return ret;
- }
-
- node_num = data->arg_u.node.node_num;
- if (node_num > NM_INVALID_SLOT_NUM) {
- nmprintk("bad node_num: %d\n", node_num);
- goto leave;
- }
-
- spin_lock(&cluster.bitmap_lock);
- node_num = nm_find_next_slot(&(cluster.node_bitmap[0]), 255, node_num);
- spin_unlock(&cluster.bitmap_lock);
-
- if (node_num < 0) {
- nmprintk("out of node slots!\n");
- goto leave;
- }
-
- ino = node_num + NM_NODE_INODE_START;
-
- desc.name = data->arg_u.node.node_name;
- desc.ops = NULL;
- desc.mode = S_IFREG | S_IWUSR;
- dentry = nm_add_file(single_sb, single_sb->s_root, &desc, ino);
- if (IS_ERR(dentry)) {
- nmprintk("bad dentry\n");
- goto leave;
- }
- inode = igrab(dentry->d_inode);
- if (!inode) {
- nmprintk("igrab failed!\n");
- goto leave;
- }
-
- n = kmalloc(sizeof(nm_node_inode_private), GFP_KERNEL);
- if (!n) {
- nmprintk("could not kmalloc\n");
- goto leave;
- }
- memcpy(&n->node, &data->arg_u.node, sizeof(nm_node_info));
- INIT_LIST_HEAD(&n->ip_hash);
- n->net.sock = NULL;
- INIT_LIST_HEAD(&n->net.active_item);
- spin_lock_init(&n->net.sock_lock);
- n->net.flags = 0;
- n->net.page = NULL;
- n->net.page_off = 0;
-
- page = alloc_page(GFP_KERNEL);
- if (page == NULL) {
- nmprintk("page allocation failed\n");
- goto leave;
- }
- n->net.page = page;
-
- /* hash on first ip address */
- spin_lock(&nm_ip_hash_lock);
- bucket = hash_long(n->node.ifaces[0].addr_u.ip_addr4, NM_HASH_BITS);
- list_add_tail(&n->ip_hash, &nm_ip_hash[bucket]);
- spin_unlock(&nm_ip_hash_lock);
- nmprintk("hashed ip %d.%d.%d.%d to bucket %d\n",
- NIPQUAD(n->node.ifaces[0].addr_u.ip_addr4), bucket);
- n->inode = inode;
- inode->u.generic_ip = n;
-
- ret = sprintf(buf, "0: node %u (%s) added", node_num,
- n->node.node_name);
- nm_do_callbacks(NM_NODE_ADD_CB, inode, NULL, node_num);
-
-leave:
- if (ret < 0) {
- if (page)
- __free_page(page);
- if (inode) {
- if (inode->u.generic_ip)
- kfree(inode->u.generic_ip);
- iput(inode);
- }
- if (dentry)
- dput(dentry);
- }
- return ret;
-}
-
-int nm_make_group_ready(struct inode *group)
-{
- nm_group_inode_private *g = group->u.generic_ip;
- if (!g)
- return -EINVAL;
- g->state = NM_GROUP_READY;
- return 0;
-}
-EXPORT_SYMBOL(nm_make_group_ready);
-
-int nm_add_node_to_group(char *buf, nm_op *data)
-{
- struct tree_descr desc;
- struct inode *inode = NULL;
- struct dentry *dentry = NULL, *child = NULL;
- nm_group_inode_private *g = NULL;
- int group_num, slot_num;
- int ret = -EINVAL;
- u16 ino;
- char tmpname[6];
-
- nmprintk("add node to group...\n");
-
- group_num = data->arg_u.gc.group_num;
- ino = data->arg_u.gc.node_num;
- slot_num = data->arg_u.gc.slot_num;
-
- /* request a certain slot, or NM_INVALID_SLOT_NUM for any slot */
- if (slot_num > NM_INVALID_SLOT_NUM)
- goto leave;
-
- if (ino >= NM_INVALID_SLOT_NUM || group_num >= NM_INVALID_SLOT_NUM)
- goto leave;
-
- inode = __nm_get_group_by_num(group_num);
- if (!inode)
- goto leave;
- if (list_empty(&inode->i_dentry))
- goto leave;
- dentry = dget(list_entry(inode->i_dentry.next, struct dentry, d_alias));
- if (!dentry)
- goto leave;
- g = inode->u.generic_ip;
- if (!g)
- goto leave;
-
- if (g->state == NM_GROUP_NOT_READY) {
- ret = sprintf(buf, "%d: group disk has not been discovered. "
- "cannot add nodes.", -EROFS);
- goto leave;
- }
-
- spin_lock(&g->bitmap_lock);
- slot_num = nm_find_next_slot(&(g->slot_bitmap[0]), 255, slot_num);
- spin_unlock(&g->bitmap_lock);
- if (slot_num < 0)
- goto leave;
-
- /* create hardlink to ino with name "slot_num" */
- sprintf(tmpname, "%03u", slot_num);
- desc.name = &(tmpname[0]);
- desc.ops = NULL;
- desc.mode = 0;
- child = nm_add_link(single_sb, dentry, &desc,
- NM_NODE_INODE_START+ino);
- if (IS_ERR(child)) {
- nmprintk("error adding link for %s\n", tmpname);
- child = NULL;
- goto leave;
- }
-
- ret = sprintf(buf, "0: node %u added to group: %*s",
- ino, dentry->d_name.len, dentry->d_name.name);
-
- if (!igrab(child->d_inode))
- goto leave;
- nm_do_callbacks(NM_GROUP_NODE_ADD_CB, inode, child->d_inode, slot_num);
- iput(child->d_inode);
-
-leave:
- if (dentry)
- dput(dentry);
- if (child)
- dput(child);
- if (inode)
- iput(inode);
- return ret;
-}
-EXPORT_SYMBOL(nm_add_node_to_group);
-
-int nm_remove_node_from_group(struct inode *group, struct inode *node)
-{
- struct dentry *child = NULL;
- nm_group_inode_private *g = NULL;
- int slot_num;
- int ret = -EINVAL;
-
- nmprintk("remove node from group...\n");
-
- slot_num = nm_get_group_index(group, node, &child);
-
- if (slot_num == NM_MAX_NODES || !child)
- goto leave;
-
- g = group->u.generic_ip;
- if (!g)
- goto leave;
-
- nmprintk("killing the dentry now!!\n");
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
- down(&group->i_zombie);
- node->i_nlink--;
- d_delete(child);
- up(&group->i_zombie);
-#else
- down(&group->i_sem);
- node->i_nlink--;
- d_delete(child);
- up(&group->i_sem);
-#endif
- nmprintk("done killing the dentry!!\n");
-
-
- if (!igrab(node))
- goto leave;
- nm_do_callbacks(NM_GROUP_NODE_DEL_CB, group, node, slot_num);
- iput(node);
-
- spin_lock(&g->bitmap_lock);
- clear_bit(slot_num, (void *)(&g->slot_bitmap[0]));
- spin_unlock(&g->bitmap_lock);
-
- ret = 0;
-
-leave:
- if (child)
- dput(child);
- return ret;
-}
-
-
-
-int nm_name_cluster(char *buf, nm_op *data)
-{
- int ret = -EINVAL;
-
- nmprintk("name cluster...\n");
- spin_lock(&nm_lock);
- if (cluster.state == NM_CLUSTER_UP) {
- ret = sprintf(buf, "%d: cluster name could not be set. "
- "cluster already up.", -EINVAL);
- goto leave;
- }
- memset(cluster.name, 0, NM_MAX_NAME_LEN+1);
- memcpy(cluster.name, data->arg_u.name, NM_MAX_NAME_LEN);
- ret = sprintf(buf, "0: cluster name set: %s", cluster.name);
-leave:
- spin_unlock(&nm_lock);
- return ret;
-}
-
-int nm_destroy_cluster(char *buf)
-{
- int ret;
- nmprintk("destroy cluster...\n");
-
- /* TODO */
- spin_lock(&nm_lock);
- nm_init_cluster(&cluster);
- ret = sprintf(buf, "0: rudely destroyed cluster!!!");
- spin_unlock(&nm_lock);
- return ret;
-}
-
-
-int nm_get_cluster_num_nodes(char *buf)
-{
- int num_nodes=0, i;
-
- nmprintk("get cluster num nodes...\n");
-
- spin_lock(&cluster.bitmap_lock);
- for (i=0; i<ARRAY_SIZE(cluster.node_bitmap); i++)
- num_nodes += nm_hweight(cluster.node_bitmap[i]);
- spin_unlock(&cluster.bitmap_lock);
-
- return sprintf(buf, "0: %d", num_nodes);
-}
-
-int nm_get_cluster_num_groups(char *buf)
-{
- int num_groups=0, i;
-
- nmprintk("get cluster num groups...\n");
-
- spin_lock(&cluster.bitmap_lock);
- for (i=0; i<ARRAY_SIZE(cluster.group_bitmap); i++)
- num_groups += nm_hweight(cluster.group_bitmap[i]);
- spin_unlock(&cluster.bitmap_lock);
-
- return sprintf(buf, "0: %d", num_groups);
-}
-
-int nm_get_group_num_nodes(struct inode *group)
-{
- int num_nodes=0, i;
- nm_group_inode_private *g;
-
- nmprintk("get group num nodes...\n");
-
- g = group->u.generic_ip;
- if (!g)
- return -EINVAL;
-
- spin_lock(&g->bitmap_lock);
- for (i=0; i<ARRAY_SIZE(g->slot_bitmap); i++)
- num_nodes += nm_hweight(g->slot_bitmap[i]);
- spin_unlock(&g->bitmap_lock);
-
- return num_nodes;
-}
-
-void * nm_iterate_group_disk_slots(struct inode *group, int *idx)
-{
- nm_group_inode_private *priv;
- int next;
-
- if (*idx >= 255)
- return NULL;
- priv = group->u.generic_ip;
- if (!priv)
- return NULL;
- next = find_next_bit(priv->slot_bitmap, 255, *idx);
- if (next >= 255)
- return NULL;
- *idx = next;
- return util_rarray_idx_to_slot(&priv->disk.slots, next);
-}
-EXPORT_SYMBOL(nm_iterate_group_disk_slots);
-
-int nm_get_node_info(char *buf, nm_op *data)
-{
- int ret, tmpret, i;
- nm_node_inode_private *priv;
- nm_network_iface *n;
- struct inode *inode = NULL;
- struct dentry *dentry;
- u16 node_num;
- u16 vers;
-
- ret = -EINVAL;
- node_num = data->arg_u.index;
- inode = __nm_get_node_by_num(node_num);
- if (inode) {
- dentry = list_entry(inode->i_dentry.next, struct dentry,
- d_alias);
- priv = inode->u.generic_ip;
- ret = sprintf(buf, "0: global_index=%u\n"
- "name=%*s\n",
- priv->node.node_num, dentry->d_name.len,
- dentry->d_name.name);
- buf += ret;
- for (i=0; i<NM_MAX_IFACES; i++) {
- n = &priv->node.ifaces[i];
- vers = ntohs(n->ip_version);
- nmprintk("ip_version=%u, vers=%u\n",
- n->ip_version, vers);
- if (vers!=4 && vers!=6)
- continue;
- /* TODO: how to print ipv6? */
- tmpret = sprintf(buf, "iface%d.port=%u\n"
- "iface%d.version=%d\n"
- "iface%d.addr=%d.%d.%d.%d\n",
- i, ntohs(n->ip_port), i, vers, i,
- NIPQUAD(n->addr_u.ip_addr4));
- buf += tmpret;
- ret += tmpret;
- }
- iput(inode);
- }
- return ret;
-}
-
-int nm_get_group_info(char *buf, nm_op *data)
-{
- int ret, tmpret;
- nm_group_inode_private *g = NULL;
- struct inode *inode = NULL;
- u16 group_num;
- struct dentry *dentry, *child;
-
- ret = -EINVAL;
- group_num = data->arg_u.index;
- inode = __nm_get_group_by_num(group_num);
- if (inode) {
- g = inode->u.generic_ip;
- dentry = list_entry(inode->i_dentry.next, struct dentry,
- d_alias);
- ret = sprintf(buf, "0: group_num=%u\n"
- "name=%*s\n"
- "disk_uuid=%s\n",
- group_num, dentry->d_name.len,
- dentry->d_name.name, g->disk.uuid);
- buf += ret;
-
- spin_lock(&dcache_lock);
- list_for_each_entry(child, &dentry->d_subdirs, d_child) {
- tmpret = sprintf(buf, "%*s\n", child->d_name.len,
- child->d_name.name);
- buf += tmpret;
- ret += tmpret;
- }
- spin_unlock(&dcache_lock);
- iput(inode);
- }
- return ret;
-}
-
-
-
-static ssize_t write_cluster(struct file *file, char *buf, size_t size)
-{
- nm_op *data;
- int ret;
- u16 me;
-
- nmprintk("write_cluster\n");
-
- if (size < sizeof(*data))
- return -EINVAL;
- data = (nm_op *) buf;
- if (data->magic != NM_OP_MAGIC)
- return -EINVAL;
-
- switch (data->opcode) {
- case NM_OP_CREATE_CLUSTER:
- ret = nm_create_cluster(buf);
- break;
- case NM_OP_CREATE_GROUP:
- ret = nm_create_group(buf, data);
- break;
- case NM_OP_NAME_CLUSTER:
- ret = nm_name_cluster(buf, data);
- break;
- case NM_OP_DESTROY_CLUSTER:
- ret = nm_destroy_cluster(buf);
- break;
- case NM_OP_ADD_CLUSTER_NODE:
- ret = nm_create_node(buf, data);
- break;
- case NM_OP_GET_CLUSTER_NUM_NODES:
- ret = nm_get_cluster_num_nodes(buf);
- break;
- case NM_OP_GET_GLOBAL_NODE_NUM:
- ret = 0;
- me = nm_this_node(NULL);
- if (me >= NM_MAX_NODES)
- ret = -EINVAL;
- ret = sprintf(buf, "%d: %u", ret, me);
- break;
- default:
- ret = sprintf(buf, "%d: bad opcode: %u", -EINVAL,
- data->opcode);
- break;
- }
- nmprintk("leaving!\n");
- return ret;
-}
-
-static ssize_t write_node(struct file *file, char *buf, size_t size)
-{
- nm_op *data;
- int ret;
-
- nmprintk("write_node\n");
-
- if (size < sizeof(*data))
- return -EINVAL;
- data = (nm_op *) buf;
- if (data->magic != NM_OP_MAGIC)
- return -EINVAL;
-
- switch (data->opcode) {
- case NM_OP_GET_NODE_INFO:
- ret = nm_get_node_info(buf, data);
- break;
- default:
- ret = sprintf(buf, "%d: bad opcode: %u", -EINVAL,
- data->opcode);
- break;
- }
- nmprintk("leaving!\n");
- return ret;
-}
-
-static ssize_t write_group(struct file *file, char *buf, size_t size)
-{
- nm_op *data;
- int ret;
-
- nmprintk("write_group\n");
-
- if (size < sizeof(*data))
- return -EINVAL;
- data = (nm_op *) buf;
- if (data->magic != NM_OP_MAGIC)
- return -EINVAL;
-
- nmprintk("opcode is %u, add_group is %u\n", data->opcode,
- NM_OP_ADD_GROUP_NODE);
- switch (data->opcode) {
- case NM_OP_GET_GROUP_INFO:
- ret = nm_get_group_info(buf, data);
- break;
-
- case NM_OP_ADD_GROUP_NODE:
- ret = nm_add_node_to_group(buf, data);
- break;
-
- default:
- ret = sprintf(buf, "%d: bad opcode: %u",
- -EINVAL, data->opcode);
- break;
- }
- nmprintk("leaving!\n");
- return ret;
-}
-
-
-
-static struct inode * __nm_get_group_by_num(u16 group_num)
-{
- struct inode *inode = iget(single_sb, group_num + NM_GROUP_INODE_START);
- if (!inode)
- return NULL;
- if (!inode->u.generic_ip) {
- iput(inode);
- return NULL;
- }
- return inode;
-}
-
-static struct inode * __nm_get_node_by_num(u16 node_num)
-{
- struct inode *inode = iget(single_sb, node_num + NM_NODE_INODE_START);
- if (!inode)
- return NULL;
- if (!inode->u.generic_ip) {
- iput(inode);
- return NULL;
- }
- return inode;
-}
-
-/* ipv4 only for now... */
-struct inode * nm_get_node_by_ip(u32 addr)
-{
- int bucket;
- struct list_head *iter;
- nm_node_inode_private *priv;
- struct inode *ret = NULL;
-
- bucket = hash_long(addr, NM_HASH_BITS);
-
- spin_lock(&nm_ip_hash_lock);
- list_for_each(iter, &nm_ip_hash[bucket]) {
- priv = list_entry(iter, nm_node_inode_private, ip_hash);
- if (priv->node.ifaces[0].addr_u.ip_addr4 == addr) {
- ret = igrab(priv->inode);
- break;
- }
-
- }
- spin_unlock(&nm_ip_hash_lock);
- return ret;
-}
-EXPORT_SYMBOL(nm_get_node_by_ip);
-
-struct inode * nm_get_group_by_num(u16 group_num)
-{
- struct inode *inode;
- spin_lock(&nm_lock);
- inode = __nm_get_group_by_num(group_num);
- spin_unlock(&nm_lock);
- return inode;
-}
-EXPORT_SYMBOL(nm_get_group_by_num);
-
-nm_cluster * nm_get_cluster(void)
-{
- return &cluster;
-}
-
-struct inode * nm_get_node_by_num(u16 node_num)
-{
- struct inode *inode;
- spin_lock(&nm_lock);
- inode = __nm_get_node_by_num(node_num);
- spin_unlock(&nm_lock);
- return inode;
-}
-EXPORT_SYMBOL(nm_get_node_by_num);
-
-struct inode * nm_get_group_node_by_index(struct inode *group, u16 index)
-{
- struct dentry *dentry = NULL, *parent;
- struct inode *inode = NULL;
- char tmpname[6];
-
- if (list_empty(&group->i_dentry))
- return NULL;
- parent = dget(list_entry(group->i_dentry.next, struct dentry, d_alias));
- if (!parent)
- return NULL;
-
- sprintf(tmpname, "%03u", index);
- dentry = lookup_one_len(tmpname, parent, strlen(tmpname));
- if (!IS_ERR(dentry)) {
- inode = dentry->d_inode;
- if (inode) {
- inode = igrab(inode);
- if (!inode->u.generic_ip || !S_ISREG (inode->i_mode)) {
- nmprintk("bad inode!\n");
- iput(inode);
- inode = NULL;
- }
- }
- if (!inode)
- dput(dentry);
- }
- dput(parent);
- return inode;
-}
-EXPORT_SYMBOL(nm_get_group_node_by_index);
-
-struct inode * __nm_get_node_by_name(char *node_name, int dir)
-{
- struct dentry *dentry = NULL;
- struct inode *inode = NULL;
-
- NM_ASSERT(node_name);
- NM_ASSERT(single_sb);
- NM_ASSERT(single_sb->s_root);
-
- dentry = lookup_one_len(node_name, single_sb->s_root,
- strlen(node_name));
- if (!IS_ERR(dentry)) {
- inode = dentry->d_inode;
- if (inode) {
- inode = igrab(inode);
- if (!inode->u.generic_ip ||
- (dir && !S_ISDIR (inode->i_mode)) ||
- (!dir && !S_ISREG (inode->i_mode))) {
- nmprintk("bad inode!\n");
- iput(inode);
- inode = NULL;
- }
- }
- }
- return inode;
-}
-EXPORT_SYMBOL(__nm_get_node_by_name);
-
-
-/*
- * if group is NULL: return the global index for this node
- * if group is non NULL: return the index within the group of this node
- *
- * NOTE: currently getting the group index is slow
- * will need to change this somehow
- */
-u16 nm_this_node(struct inode *group)
-{
- struct inode *inode = NULL;
- struct dentry *child = NULL;
- u16 node_num = NM_MAX_NODES;
-
- inode = nm_get_node_by_name(nm_nodename);
- if (inode && inode->u.generic_ip) {
- if (group)
- node_num = nm_get_group_index(group, inode, &child);
- else
- node_num = nm_get_node_global_index(inode);
-
- }
- iput(inode);
- dput(child);
- //nmprintk("for group=%p, this node is %u\n", group, node_num);
- return node_num;
-}
-EXPORT_SYMBOL(nm_this_node);
-
-/* slow */
-static u16 nm_get_group_index(struct inode *group, struct inode *inode,
- struct dentry **child)
-{
- struct dentry *tmp = NULL, *parent = NULL;
- u16 slot_num = NM_MAX_NODES;
- struct list_head *iter;
- char tmpname[6];
- char *err;
-
- *child = NULL;
- parent = NULL;
- if (list_empty(&group->i_dentry))
- goto leave;
- parent = dget(list_entry(group->i_dentry.next, struct dentry, d_alias));
- if (!parent)
- goto leave;
-
- spin_lock(&dcache_lock);
- list_for_each(iter, &parent->d_subdirs) {
- tmp = list_entry(iter, struct dentry, d_child);
- if (tmp->d_inode == inode)
- break;
- tmp = NULL;
- }
- if (tmp)
- dget_locked(tmp);
- spin_unlock(&dcache_lock);
-
- if (!tmp || tmp->d_name.len > 3)
- goto leave;
- strncpy(tmpname, tmp->d_name.name, tmp->d_name.len);
- tmpname[tmp->d_name.len] = '\0';
- err=NULL;
- slot_num = simple_strtoul(tmpname, &err, 10);
-
- if (*err != '\0')
- slot_num = NM_MAX_NODES; // error
- else
- *child = dget(tmp); // done, get extra ref for child
-
-leave:
- dput(parent);
- dput(tmp);
-
- return slot_num;
-}
-
-int nm_init(dlm_ctxt *dlm)
-{
- return 0;
-}
-
-int nm_register_callback(int type, void (*func)(void *, void *, u16))
-{
- nm_callback_func *f;
-
- if (type < NM_NODE_ADD_CB || type > NM_GROUP_NODE_DEL_CB)
- return -EINVAL;
- f = kmalloc(sizeof(nm_callback_func), GFP_KERNEL);
- if (f == NULL)
- return -ENOMEM;
- memset(f, 0, sizeof(nm_callback_func));
- f->func = func;
- spin_lock(&nm_cb_lock);
- list_add_tail(&f->list, &nm_callbacks[type]);
- spin_unlock(&nm_cb_lock);
- return 0;
-}
-EXPORT_SYMBOL(nm_register_callback);
-
-#warning need to change nm callbacks to be like hb callbacks... no locks when calling.
-int nm_unregister_callback(int type, void (*func)(void *, void *, u16))
-{
- struct list_head *iter, *tmpiter;
- int ret = -EINVAL;
- nm_callback_func *f;
-
- if (type < NM_NODE_ADD_CB || type > NM_GROUP_NODE_DEL_CB)
- return ret;
-
- spin_lock(&nm_cb_lock);
- list_for_each_safe(iter, tmpiter, &nm_callbacks[type]) {
- f = list_entry (iter, nm_callback_func, list);
- if (f->func == func) {
- list_del(&f->list);
- kfree(f);
- ret = 0;
- break;
- }
- }
- spin_unlock(&nm_cb_lock);
- return ret;
-}
-EXPORT_SYMBOL(nm_unregister_callback);
-
-static void nm_do_callbacks(int type, void *ptr1, void *ptr2, u16 idx)
-{
- struct list_head *iter;
- nm_callback_func *f;
-
- spin_lock(&nm_cb_lock);
- list_for_each(iter, &nm_callbacks[type]) {
- f = list_entry (iter, nm_callback_func, list);
- (f->func) (ptr1, ptr2, idx);
- }
- spin_unlock(&nm_cb_lock);
-}
-
-
-static void nm_teardown(void)
-{
- remove_proc_entry("cluster/nm", NULL);
- remove_proc_entry("cluster", NULL);
-}
-
-static void nm_init_cluster(nm_cluster *cluster)
-{
- int i;
- memset(cluster, 0, sizeof(nm_cluster));
- cluster->state = NM_CLUSTER_DOWN;
- spin_lock_init(&cluster->bitmap_lock);
-
- for (i=NM_NODE_ADD_CB; i<=NM_GROUP_NODE_DEL_CB; i++)
- INIT_LIST_HEAD(&nm_callbacks[i]);
-}
-
-
-
-
-
-/*----------------------------------------------------------------------------*/
-/*
- * populating the filesystem.
- */
-static int nm_fill_super(struct super_block * sb, void * data, int silent)
-{
- int ret, sz;
- TA_write_ops *ops;
- static struct tree_descr nm_files[] = {
- [NM_Cluster] = {".cluster", &transaction_ops, S_IWUSR},
- [NM_Node] = {".node", &transaction_ops, S_IWUSR},
- [NM_Group] = {".group", &transaction_ops, S_IWUSR},
- /* last one */ {""}
- };
-
- sz = sizeof(nm_files) / sizeof(struct tree_descr);
- ops = kmalloc(sizeof(TA_write_ops) + (sz * sizeof(TA_write_op *)),
- GFP_KERNEL);
- if (!ops)
- return -ENOMEM;
-
- memset(ops, 0, sizeof(TA_write_ops) + (sz * sizeof(TA_write_op *)));
- ops->num_ops = sz;
- ops->write_op[NM_Cluster] = write_cluster;
- ops->write_op[NM_Node] = write_node;
- ops->write_op[NM_Group] = write_group;
-
- single_sb = NULL;
- nmprintk("calling simple_fill_super...\n");
- ret = simple_fill_super(sb, 0x98675309, nm_files);
- if (ret >= 0) {
- TA_GENERIC_SB_MEMBER(sb) = ops;
- single_sb = sb;
- } else {
- kfree(ops);
- }
- return ret;
-}
-
-/* blindly copied from ocfs2 */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-static struct super_block *nm_get_sb(struct file_system_type *fs_type,
- int flags,
- const char *dev_name,
- void *data)
-{
- return get_sb_single(fs_type, flags, data, nm_fill_super);
-}
-
-static struct file_system_type nm_fs_type = {
- .owner = THIS_MODULE,
- .name = "nm",
- .get_sb = nm_get_sb,
- .kill_sb = kill_anon_super,
-};
-#else
-static struct super_block *nm_read_super(struct super_block *sb,
- void *data,
- int silent)
-{
- nmprintk("welcome to nm_read_super!!!\n");
- return (nm_fill_super(sb, data, silent) < 0) ? NULL : sb;
-}
-
-static DECLARE_FSTYPE (nm_fs_type, "nm", nm_read_super, FS_SINGLE|FS_LITTER);
-#endif
-
-static int __init init_nm(void)
-{
- int retval;
- nm_nodename = kmalloc(strlen(system_utsname.nodename) + 1, GFP_KERNEL);
- if (nm_nodename==NULL) {
- nmprintk("could not allocate a few bytes for nodename!\n");
- return -ENOMEM;
- }
- strcpy(nm_nodename, system_utsname.nodename);
- nmprintk("loading nm module: nodename is %s\n", nm_nodename);
-
- if (nm_init_ip_hash() < 0) {
- nmprintk("failed to allocate node IP hash\n");
- return -ENOMEM;
- }
-
- nm_init_cluster(&cluster);
-
- if (proc_mkdir("cluster", 0)) {
- if (proc_mkdir("cluster/nm", 0)) {
- }
- }
- nmprintk("calling register_filesystem\n");
- retval = register_filesystem(&nm_fs_type);
- nmprintk("done calling register_filesystem: ret=%d\n", retval);
- if (retval)
- nm_teardown();
- return retval;
-}
-
-static void __exit exit_nm(void)
-{
- nm_teardown();
- unregister_filesystem(&nm_fs_type);
- nm_destroy_ip_hash();
- kfree(nm_nodename);
- nmprintk("unloading nm module\n");
-}
-
-
-
-
-MODULE_LICENSE("GPL");
-module_init(init_nm)
-module_exit(exit_nm)
Copied: trunk/fs/ocfs2/cluster/nodemanager.c (from rev 1811, trunk/cluster/nodemanager.c)
Deleted: trunk/fs/ocfs2/cluster/nodemanager.h
===================================================================
--- trunk/cluster/nodemanager.h 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/nodemanager.h 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,197 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * nodemanager.h
- *
- * Function prototypes
- *
- * Copyright (C) 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel
- */
-
-#ifndef CLUSTER_NODEMANAGER_H
-#define CLUSTER_NODEMANAGER_H
-
-#define NM_ASSERT(x) ({ if (!(x)) { printk("nm: assert failed! %s:%d\n", __FILE__, __LINE__); BUG(); } })
-
-#include "ocfs2_nodemanager.h"
-
-
-#if BITS_PER_LONG == 32
-# define nm_hweight hweight32
-#elif BITS_PER_LONG == 64
-# define nm_hweight hweight64
-#else
-# error BITS_PER_LONG is something awful
-#endif
-
-
-
-struct _nm_ctxt
-{
- int dummy;
-};
-
-enum {
- NM_CLUSTER_DOWN=0,
- NM_CLUSTER_UP
-};
-
-enum {
- NM_GROUP_NOT_READY=0,
- NM_GROUP_READY
-};
-
-enum {
- NM_Root = 1,
- NM_Cluster,
- NM_Node,
- NM_Group,
-};
-
-typedef struct _nm_cluster
-{
- char name[NM_MAX_NAME_LEN+1];
- int state;
- spinlock_t bitmap_lock;
- unsigned long group_bitmap[BITS_TO_LONGS(NM_MAX_NODES)];
- unsigned long node_bitmap[BITS_TO_LONGS(NM_MAX_NODES)];
-} nm_cluster;
-
-
-typedef struct _nm_group_inode_private
-{
- struct inode *inode;
- struct list_head net_list;
- struct list_head disk_list;
- cluster_disk disk;
- int state;
- spinlock_t bitmap_lock;
- unsigned long slot_bitmap[BITS_TO_LONGS(NM_MAX_NODES)];
-} nm_group_inode_private;
-
-/* TODO: move this */
-struct sock;
-#define NET_FLAG_CREATING_SOCKET 0x00000001
-typedef struct _net_inode_private
-{
- struct socket *sock;
- spinlock_t sock_lock;
- struct list_head handlers;
- struct list_head active_item;
- int flags;
- struct page *page;
- size_t page_off;
-
- void (*orig_data_ready)(struct sock *sk, int bytes);
- void (*orig_error_report)(struct sock *sk);
-} net_inode_private;
-
-typedef struct _nm_node_inode_private
-{
- struct inode *inode;
- nm_node_info node;
- struct list_head ip_hash;
- net_inode_private net;
-} nm_node_inode_private;
-
-
-/* callback stuff */
-
-enum {
- NM_NODE_ADD_CB = 0,
- NM_NODE_DEL_CB,
- NM_GROUP_ADD_CB,
- NM_GROUP_DEL_CB,
- NM_GROUP_NODE_ADD_CB,
- NM_GROUP_NODE_DEL_CB,
- NM_NUM_CB
-};
-
-typedef void (nm_cb_func)(void *, void *, u16);
-
-typedef struct _nm_callback_func
-{
- struct list_head list;
- nm_cb_func *func;
- //void (*func)(void *, void *, u16);
-} nm_callback_func;
-
-
-
-
-u16 nm_this_node(struct inode *group);
-int nm_init(struct _dlm_ctxt *dlm);
-nm_cluster * nm_get_cluster(void);
-int nm_register_callback(int type, void (*func)(void *, void *, u16));
-int nm_unregister_callback(int type, void (*func)(void *, void *, u16));
-int nm_get_group_num_nodes(struct inode *group);
-int nm_make_group_ready(struct inode *group);
-void * nm_iterate_group_disk_slots(struct inode *group, int *idx);
-int nm_remove_node_from_group(struct inode *group, struct inode *node);
-int nm_create_group(char *buf, nm_op *data);
-int nm_add_node_to_group(char *buf, nm_op *data);
-
-extern char *nm_nodename;
-
-
-struct inode * nm_get_group_by_num(u16 group_num);
-struct inode * nm_get_node_by_num(u16 node_num);
-struct inode * __nm_get_node_by_name(char *node_name, int dir);
-struct inode * nm_get_node_by_ip(u32 addr);
-struct inode * nm_get_group_node_by_index(struct inode *group, u16 index);
-
-static inline struct inode * nm_get_node_by_name(char *node_name)
-{
- return __nm_get_node_by_name(node_name, 0);
-}
-static inline struct inode * nm_get_group_by_name(char *group_name)
-{
- return __nm_get_node_by_name(group_name, 1);
-}
-
-
-static inline int nm_get_node_global_index(struct inode *node)
-{
- return (node->i_ino - NM_NODE_INODE_START);
-}
-static inline int nm_get_group_global_index(struct inode *group)
-{
- return (group->i_ino - NM_GROUP_INODE_START);
-}
-
-static inline int nm_valid_ino(int ino)
-{
-#if 0
- // these should never be referred to in kernel
- if (ino >= NM_Cluster && ino <= NM_Group)
- return 1;
-#endif
- if (ino >= NM_NODE_INODE_START &&
- ino < NM_NODE_INODE_START + NM_MAX_NODES)
- return 1;
- if (ino >= NM_GROUP_INODE_START &&
- ino < NM_GROUP_INODE_START + NM_MAX_NODES)
- return 1;
- return 0;
-}
-
-
-
-#endif /* CLUSTER_NODEMANAGER_H */
Copied: trunk/fs/ocfs2/cluster/nodemanager.h (from rev 1811, trunk/cluster/nodemanager.h)
Deleted: trunk/fs/ocfs2/cluster/ocfs2_heartbeat.h
===================================================================
--- trunk/cluster/ocfs2_heartbeat.h 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/ocfs2_heartbeat.h 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,57 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * ocfs2_heartbeat.h
- *
- * Describes the interface between userspace and the kernel for the
- * ocfs2_heartbeat module.
- *
- * Copyright (C) 2002, 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef _OCFS2_HEARTBEAT_H
-#define _OCFS2_HEARTBEAT_H
-
-#define CLUSTER_DISK_UUID_LEN 32 // 16 byte binary == 32 char hex string
-
-
-#define HB_OP_MAGIC 0xf00d
-enum {
- HB_OP_START_DISK_HEARTBEAT=371,
- HB_OP_GET_NODE_MAP
-};
-
-typedef struct _hb_op
-{
- __u16 magic;
- __u16 opcode;
- __u32 fd;
- char disk_uuid[CLUSTER_DISK_UUID_LEN+1];
- char pad1[15]; /* Pad to the __u16 following it */
- __u16 group_num;
- __u32 bits;
- __u32 blocks;
- __u64 start;
-} hb_op;
-
-typedef struct _hb_disk_heartbeat_block
-{
- __u64 time;
-} hb_disk_heartbeat_block;
-
-#endif /* _OCFS2_HEARTBEAT_H */
Copied: trunk/fs/ocfs2/cluster/ocfs2_heartbeat.h (from rev 1811, trunk/cluster/ocfs2_heartbeat.h)
Deleted: trunk/fs/ocfs2/cluster/ocfs2_nodemanager.h
===================================================================
--- trunk/cluster/ocfs2_nodemanager.h 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/ocfs2_nodemanager.h 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,106 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * ocfs2_nodemanager.h
- *
- * Header describing the interface between userspace and the kernel
- * for the ocfs2_nodemanager module.
- *
- * Copyright (C) 2002, 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
- * Manish Singh, Neeraj Goyal, Suchit Kaura
- */
-
-#ifndef _OCFS2_NODEMANAGER_H
-#define _OCFS2_NODEMANAGER_H
-
-#include "ocfs2_heartbeat.h"
-
-#define NM_MAX_IFACES 2
-#define NM_MAX_NODES 255
-#define NM_INVALID_SLOT_NUM 255
-
-/* host name, group name, cluster name all 64 bytes */
-#define NM_MAX_NAME_LEN 64 // __NEW_UTS_LEN
-
-
-#define NM_GROUP_INODE_START 200000
-#define NM_NODE_INODE_START 100000
-
-
-
-typedef struct _nm_network_iface
-{
- __u16 ip_port; /* for simplicity, just define exactly one port for this if */
- __u16 ip_version;
- union {
- __u32 ip_addr4; /* IPv4 address in NBO */
- __u32 ip_addr6[4]; /* IPv6 address in NBO */
- } addr_u;
-} nm_network_iface;
-
-typedef struct _nm_node_info
-{
- __u16 node_num;
- __u16 pad1;
- __u32 pad2;
- char node_name[NM_MAX_NAME_LEN+1];
- char pad3[63];
- nm_network_iface ifaces[NM_MAX_IFACES];
-} nm_node_info;
-
-/* transaction file nm_op stuff */
-
-#define NM_OP_MAGIC 0xbeaf
-enum {
- NM_OP_CREATE_CLUSTER=123,
- NM_OP_DESTROY_CLUSTER,
- NM_OP_NAME_CLUSTER,
- NM_OP_ADD_CLUSTER_NODE,
- NM_OP_GET_CLUSTER_NUM_NODES,
- NM_OP_GET_NODE_INFO,
- NM_OP_CREATE_GROUP,
- NM_OP_GET_GROUP_INFO,
- NM_OP_ADD_GROUP_NODE,
- NM_OP_GET_GLOBAL_NODE_NUM
-};
-
-typedef struct _nm_group_change
-{
- __u16 group_num;
- __u16 node_num;
- __u16 slot_num;
- char disk_uuid[CLUSTER_DISK_UUID_LEN+1];
- char name[NM_MAX_NAME_LEN+1];
-} nm_group_change;
-
-typedef struct _nm_op
-{
- __u16 magic;
- __u16 opcode;
- __u32 pad1;
- union {
- __u16 index;
- char name[NM_MAX_NAME_LEN+1];
- nm_node_info node;
- nm_group_change gc;
- } arg_u;
-} nm_op;
-
-#endif /* _OCFS2_NODEMANAGER_H */
Copied: trunk/fs/ocfs2/cluster/ocfs2_nodemanager.h (from rev 1811, trunk/cluster/ocfs2_nodemanager.h)
Deleted: trunk/fs/ocfs2/cluster/ocfs2_tcp.h
===================================================================
--- trunk/cluster/ocfs2_tcp.h 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/ocfs2_tcp.h 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,46 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * ocfs2_tcp.h
- *
- * Copyright (C) 2002, 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef _OCFS2_TCP_H
-#define _OCFS2_TCP_H
-
-typedef struct _gsd_ioc
-{
- int fd;
- int namelen;
- char name[NM_MAX_NAME_LEN+1];
- int status;
-} gsd_ioc;
-
-typedef struct _net_ioc
-{
- __u32 status;
-} net_ioc;
-
-#define NET_IOC_MAGIC 'O'
-#define NET_IOC_ACTIVATE _IOR(NET_IOC_MAGIC, 1, net_ioc)
-#define NET_IOC_GETSTATE _IOR(NET_IOC_MAGIC, 2, net_ioc)
-#define GSD_IOC_CREATE_GROUP _IOR(NET_IOC_MAGIC, 3, gsd_ioc)
-#define GSD_IOC_ADD_GROUP_NODE _IOR(NET_IOC_MAGIC, 4, gsd_ioc)
-
-#endif /* _OCFS2_TCP_H */
Copied: trunk/fs/ocfs2/cluster/ocfs2_tcp.h (from rev 1811, trunk/cluster/ocfs2_tcp.h)
Deleted: trunk/fs/ocfs2/cluster/tcp.c
===================================================================
--- trunk/cluster/tcp.c 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/tcp.c 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,1630 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * Copyright (C) 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * ----
- *
- * Callers for this were originally written against a very simple
- * synchronus API. This implementation reflects those simple callers. Some
- * day I'm sure we'll need to move to a more robust posting/callback
- * mechanism.
- *
- * Transmit calls pass in kernel virtual addresses and block copying this into
- * the socket's tx buffers via a usual blocking sendmsg. They'll block
- * waiting for a failed socket to timeout. TX callers can also pass in
- * a poniter to an 'int' which gets filled with an errno off the wire
- * in response to the message they send.
- *
- * Handlers for unsolicited messages are registered. Each socket has
- * a page that incoming data is copied into. First the header, then
- * the data. Handlers are called from only one thread with a reference
- * to this per-socket page. This page is destroyed after the handler
- * call, so it can't be referenced beyond the call. Handlers may block
- * but are discouraged from doing so.
- *
- * Any framing errors (bad magic, unknown message types, large payload
- * lengths) closes a connection.
- *
- * One can imagine the direction a more sophisticated API would head in:
- * (there are certainly a half dozen examples in the kernel)
- * * tx
- * - passes in page/off/len to send, gets put on a queue
- * - if response data is needed, passes in preallocated page/off/len
- * - tx header includes message id to associate reply with posted rx buf
- * - write_space triggers passing the p/o/l tx queue to ->sendpage()
- * * rx
- * - data_ready uses tcp_read_sock to parse message header
- * - header identifies whether to copy into posted rx buf or unsolicited
- * - handers must be callable from bh context
- * but it really depends on what the semantics and messages are.
- *
- * XXX we should resolve these before release
- * - disable preemt before calling rx handler when debugging
- * - find explicit stack call to drain rx queue
- * - goto out style exiting
- * - get sin/iov/msg off the stack, per sock structures
- * - add trivial version trading message at the start of a conn
- * - go nuts adding static
- * - properly life-cycle management is waiting on a more functional
- * setup and teardown facility:
- * - implement net_remove_handlers
- * - refcounting around sock against tx/teardown/etc
- * - make sure ->net.page gets torn down with net_inode_private
- * - tear down sockets on exit.. via removing their inodes?
- * - simplify rx thread exit path (completion, etc)
- *
- * - move gsd into its own file
- * - move to userspace connection management?
- *
- */
-#include "warning_hack.h"
-
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/highmem.h>
-#include <linux/utsname.h>
-#include <linux/init.h>
-#include <linux/sysctl.h>
-#include <linux/random.h>
-#include <linux/version.h>
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#include <linux/statfs.h>
-#include <linux/moduleparam.h>
-#endif
-#include <linux/blkdev.h>
-#include <linux/proc_fs.h>
-#include <linux/file.h>
-
-#include <asm/uaccess.h>
-
-#include "dlm_compat.h"
-#include "util.h"
-#include "dlmcommon.h"
-
-
-#include "heartbeat.h"
-#include "tcp.h"
-#include "nodemanager.h"
-
-#if 1
-#define netprintk(x, arg...) printk("(tcp:%d)(%s:%d) " x, current->pid, __FUNCTION__, __LINE__, ##arg)
-#define netprintk0(x) printk("(tcp:%d)(%s:%d) " x, current->pid, __FUNCTION__, __LINE__)
-/* yeah, a little gross, but it gets the job done */
-#define __msg_fmt "[mag %u len %u typ %u status %d key %u num %u] "
-#define __msg_args __hdr->magic, __hdr->data_len, __hdr->msg_type, \
- __hdr->status, __hdr->key, __hdr->msg_num
-#define msgprintk(hdr, fmt, args...) do { \
- typeof(hdr) __hdr = (hdr); \
- printk(__msg_fmt fmt, __msg_args, args); \
-} while (0)
-#define msgprintk0(hdr, fmt) do { \
- typeof(hdr) __hdr = (hdr); \
- printk(__msg_fmt fmt, __msg_args); \
-} while (0)
-
-#else
-#define netprintk(x, arg...)
-#define netprintk0(x)
-#define msgprintk(hdr, fmt, args...)
-#define msgprintk0(hdr, fmt)
-#endif
-
-/* let's only pollute this unit with these ridiculous definitions */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-#define sk_sleep sleep
-#define sk_state state
-#define sk_receive_queue receive_queue
-#define sk_reuse reuse
-#define sk_family family
-#define sk_type type
-#define sk_protocol protocol
-#define sk_callback_lock callback_lock
-#define sk_user_data user_data
-#define sk_data_ready data_ready
-#define sk_error_report error_report
-#endif
-
-struct socket *recv_sock = NULL;
-static u16 ip_version, ip_port;
-static struct inode *net_inode = NULL;
-static u16 net_node_num;
-
-char *gsd_handler_buf = NULL;
-
-
-/* all this state should eventually be brought up by object activation
- * and tied to that object rather than being globally valid at insmod */
-static spinlock_t net_handler_lock = SPIN_LOCK_UNLOCKED;
-static spinlock_t net_status_lock = SPIN_LOCK_UNLOCKED;
-static LIST_HEAD(net_handlers);
-static LIST_HEAD(net_status_list);
-/* this lock is also grabbed from bh context, non-bh use _bh() locking */
-static spinlock_t net_active_lock = SPIN_LOCK_UNLOCKED;
-static LIST_HEAD(net_active_list);
-
-static int net_recv_pid = -1;
-static struct task_struct *net_recv_task = NULL;
-static struct completion net_recv_complete;
-
-static inline void net_abort_status_return(net_status_ctxt *nsc)
-{
- spin_lock(&net_status_lock);
- list_del(&nsc->list);
- spin_unlock(&net_status_lock);
-}
-
-/////////////////////
-static void net_shutdown(void);
-static int net_startup(void);
-static int __init net_driver_entry (void);
-static int net_init_driver(void);
-static void __exit net_driver_exit (void);
-static int net_add_handler(net_msg_handler *nmh);
-static void net_remove_handlers(void);
-static void net_dump_and_close_sock(struct socket *sock, struct inode *inode);
-static int net_init_tcp_recv_sock(void);
-static int net_receive_thread(void *data);
-static int net_receive(void);
-static int net_accept_tcp_connections(void);
-static void net_release_tcp_sock(void);
-static int net_process_message(struct inode *inode, struct socket *sock,
- net_msg *hdr);
-static int net_ioctl (struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
-
-int gsd_message_action(gsd_message *g);
-int gsd_message_handler(net_msg *msg, u32 len, void *data);
-void gsd_teardown(void);
-int gsd_setup(void);
-
-
-//////////////////////
-
-static void net_get_handler(net_msg_handler *nmh)
-{
- atomic_inc(&nmh->refcnt);
-}
-
-/* called with net_handler_lock held so we can verify the flags :/ */
-static void __net_put_handler(net_msg_handler *nmh)
-{
- if (atomic_dec_and_test(&nmh->refcnt))
- kfree(nmh);
-}
-
-static void net_put_handler(net_msg_handler *nmh)
-{
- spin_lock(&net_handler_lock);
- __net_put_handler(nmh);
- spin_unlock(&net_handler_lock);
-}
-
-
-DECLARE_MUTEX(net_state_lock);
-u32 net_driver_state = NET_DRIVER_UNINITED;
-u32 net_num_dispatched = 0;
-
-
-/*
- * net_driver_entry()
- *
- * Driver entry point. Called on insmod.
- */
-static int __init net_driver_entry (void)
-{
- struct proc_dir_entry *de;
- de = proc_mkdir("cluster/net", 0);
- if (!de)
- return -1;
- de->proc_fops->ioctl = net_ioctl;
-
- netprintk0("Loaded net Driver module\n");
- return 0;
-} /* net_driver_entry */
-
-static int net_ioctl (struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
-{
- net_ioc data;
- gsd_ioc gsd_data;
- int ret = 0;
- gsd_message g;
- int response = 0;
- struct inode *to = NULL;
- struct file *file = NULL;
-
- if (_IOC_TYPE (cmd) != NET_IOC_MAGIC) {
- ret = -ENOTTY;
- goto exit_ioctl;
- }
-
- switch (cmd) {
- case NET_IOC_ACTIVATE:
- memset(&data, 0, sizeof(net_ioc));
- down(&net_state_lock);
- data.status = net_driver_state;
- if (net_driver_state == NET_DRIVER_UNINITED) {
- ret = net_init_driver();
- if (ret < 0) {
- netprintk("error trying to activate net driver: %d\n", ret);
- data.status = NET_DRIVER_UNINITED;
- } else {
- netprintk0("activated net driver!\n");
- net_driver_state = data.status = NET_DRIVER_READY;
- }
- }
- up(&net_state_lock);
-
- ret = copy_to_user ((net_ioc *) arg, &data,
- sizeof (net_ioc));
- break;
- case NET_IOC_GETSTATE:
- memset(&data, 0, sizeof(net_ioc));
- down(&net_state_lock);
- data.status = net_driver_state;
- up(&net_state_lock);
- ret = copy_to_user ((net_ioc *) arg, &data,
- sizeof (net_ioc));
- break;
-
- case GSD_IOC_CREATE_GROUP:
- memset(&gsd_data, 0, sizeof(gsd_ioc));
- ret = copy_from_user(&gsd_data, (gsd_ioc *)arg, sizeof(gsd_ioc));
-
- file = fget(gsd_data.fd);
- if (!file || !file->f_dentry || !file->f_dentry->d_inode) {
- ret = -EINVAL;
- break;
- }
- to = file->f_dentry->d_inode;
-
- g.action = GSD_ACTION_ADD_GROUP;
- g.from = net_node_num;
- g.namelen = gsd_data.namelen;
- memcpy(g.name, gsd_data.name, gsd_data.namelen);
-
- if (to == net_inode) {
- /* create the group locally */
- ret = gsd_message_action(&g);
- } else {
- /* create the group on remote node */
- gsd_message_to_net(&g);
- ret = net_send_message(GSD_MESSAGE, 0, &g, sizeof(g), to, &response);
- if (ret == 0)
- ret = response;
- }
-
- memset(&gsd_data, 0, sizeof(gsd_ioc));
- gsd_data.status = ret;
- ret = copy_to_user((gsd_ioc *)arg, &gsd_data, sizeof(gsd_ioc));
- break;
-
- case GSD_IOC_ADD_GROUP_NODE:
- memset(&gsd_data, 0, sizeof(gsd_ioc));
- ret = copy_from_user(&gsd_data, (gsd_ioc *)arg, sizeof(gsd_ioc));
-
- file = fget(gsd_data.fd);
- if (!file || !file->f_dentry || !file->f_dentry->d_inode) {
- ret = -EINVAL;
- break;
- }
- to = file->f_dentry->d_inode;
-
- g.action = GSD_ACTION_ADD_GROUP_NODE;
- g.from = net_node_num;
- g.namelen = gsd_data.namelen;
- memcpy(g.name, gsd_data.name, gsd_data.namelen);
-
- if (to == net_inode) {
- /* create the group locally */
- ret = gsd_message_action(&g);
- } else {
- /* create the group on remote node */
- gsd_message_to_net(&g);
- ret = net_send_message(GSD_MESSAGE, 0, &g, sizeof(g), to, &response);
- if (ret == 0)
- ret = response;
- }
- memset(&gsd_data, 0, sizeof(gsd_ioc));
- gsd_data.status = ret;
- ret = copy_to_user((gsd_ioc *)arg, &gsd_data, sizeof(gsd_ioc));
- break;
- default:
- ret = -ENOTTY;
- break;
- }
-
-exit_ioctl:
-
- if (file)
- fput(file);
-
- return ret;
-} /* net_ioctl */
-
-static int net_init_driver(void)
-{
- nm_node_info *info;
- nm_node_inode_private *priv;
-
- /* get the global node number for this node */
- net_node_num = nm_this_node(NULL);
- if (net_node_num >= NM_MAX_NODES) {
- netprintk0("local nm node number not initialized!\n");
- return -1;
- }
- net_inode = nm_get_node_by_num(net_node_num);
- if (!net_inode) {
- netprintk0("local nm node inode not initialized!\n");
- return -1;
- }
- priv = (nm_node_inode_private *)net_inode->u.generic_ip;
- if (!priv) {
- iput(net_inode);
- netprintk0("local nm node info not initialized!\n");
- return -1;
- }
- info = &priv->node;
- ip_version = info->ifaces[0].ip_version;
- ip_port = info->ifaces[0].ip_port;
-
- if (net_startup() < 0)
- return -1;
-
- if (gsd_setup() < 0)
- return -1;
-
- return 0;
-} /* net_init_driver*/
-
-
-/*
- * net_driver_exit()
- *
- * Called on rmmod
- */
-static void __exit net_driver_exit (void)
-{
- down(&net_state_lock);
- if (net_driver_state == NET_DRIVER_READY) {
- netprintk0("shutting down network\n");
- net_shutdown();
- netprintk0("removing all net driver handlers\n");
- net_remove_handlers();
- gsd_teardown();
- if (net_inode)
- iput(net_inode);
- net_driver_state = NET_DRIVER_UNINITED;
- }
- up(&net_state_lock);
- remove_proc_entry("cluster/net", NULL);
- netprintk0("Unloading net driver module\n");
- return;
-} /* net_driver_exit */
-
-
-static int net_startup(void)
-{
- net_recv_pid = -1;
- net_recv_task = NULL;
- init_completion (&net_recv_complete);
-
- netprintk0("starting net receive thread...\n");
- net_recv_pid = kernel_thread (net_receive_thread, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
- if (net_recv_pid < 0) {
- netprintk("unable to launch net receive thread, error=%d\n",
- net_recv_pid);
- net_shutdown();
- return -EINVAL;
- }
-
- netprintk0("net thread running...\n");
- return 0;
-}
-
-static void net_shutdown(void)
-{
- netprintk ("waiting for net thread to exit....\n");
- send_sig (SIGINT, net_recv_task, 0);
- wait_for_completion (&net_recv_complete);
- netprintk ("net thread exited\n");
-}
-
-static int net_rx_should_wake(void)
-{
- int empty;
-
- spin_lock_bh(&net_active_lock);
- empty = list_empty(&net_active_list);
- spin_unlock_bh(&net_active_lock);
-
- return !empty || tcp_sk(recv_sock->sk)->accept_queue;
-}
-
-static int net_receive_thread(void *data)
-{
- int status;
- DECLARE_WAITQUEUE(main_wait, current);
-
- util_daemonize ("netrecv", strlen("netrecv"), 1);
- net_recv_task = current;
-
- status = net_init_tcp_recv_sock();
- if (status >= 0 && recv_sock) {
- add_wait_queue_exclusive(recv_sock->sk->sk_sleep, &main_wait);
- while (1) {
- net_accept_tcp_connections();
- net_receive();
-
- wait_event_interruptible(*recv_sock->sk->sk_sleep,
- net_rx_should_wake());
-
- if (signal_pending(current)) {
- netprintk0("net recv thread got signal!\n");
- break;
- }
- }
- remove_wait_queue(recv_sock->sk->sk_sleep, &main_wait);
- } else {
- netprintk0("failed to initialize net_thread!\n");
- }
-
- /* Flush all scheduled tasks */
- flush_scheduled_work();
- net_release_tcp_sock();
- net_recv_task = NULL;
- complete (&net_recv_complete);
- return 0;
-}
-
-//////////////////////////////////////////////////////////////////////////////
-/* for lack of a better place to do this */
-
-int gsd_setup()
-{
- int ret;
- /* need this stupidity until I can divorce the actual nm actions
- * from the output they send to their user buffer */
- gsd_handler_buf = (char *) __get_free_page(GFP_KERNEL);
- if (!gsd_handler_buf)
- return -ENOMEM;
-
- ret = net_register_handler(GSD_MESSAGE, 0, 0, sizeof(gsd_message),
- gsd_message_handler, NULL);
-
- return ret;
-}
-
-void gsd_teardown()
-{
- free_page((unsigned long)gsd_handler_buf);
-}
-
-int gsd_message_handler(net_msg *msg, u32 len, void *data)
-{
- gsd_message *g = (gsd_message *)msg->buf;
- gsd_message_to_host(g);
- return gsd_message_action(g);
-}
-
-int gsd_message_action(gsd_message *g)
-{
- int ret;
- nm_op op;
- int namelen = g->namelen;
- struct inode *node=NULL, *group=NULL;
- char name[NM_MAX_NAME_LEN+1];
-
- if (namelen > NM_MAX_NAME_LEN)
- return -EINVAL;
- strncpy(name, g->name, namelen);
- name[namelen] = '\0';
-
- memset(&op, 0, sizeof(op));
- switch (g->action) {
- case GSD_ACTION_ADD_GROUP:
- group = nm_get_group_by_name(name);
- if (group) {
- ret = 0;
- break;
- }
- op.arg_u.gc.group_num = NM_INVALID_SLOT_NUM;
- memcpy(op.arg_u.gc.name, name, namelen);
- memcpy(op.arg_u.gc.disk_uuid, name, namelen);
-
- ret = nm_create_group(gsd_handler_buf, &op);
- if (ret >= 0)
- ret = 0;
- break;
-
- case GSD_ACTION_ADD_GROUP_NODE:
- group = nm_get_group_by_name(name);
- if (!group) {
- ret = -EINVAL;
- break;
- }
- node = nm_get_group_node_by_index(group, g->from);
- if (node) {
- ret = 0;
- if (nm_get_node_global_index(node) != g->from)
- ret = -EINVAL;
- break;
- }
- op.arg_u.gc.group_num = nm_get_group_global_index(group);
- op.arg_u.gc.node_num = g->from;
- op.arg_u.gc.slot_num = g->from;
- ret = nm_add_node_to_group(gsd_handler_buf, &op);
- if (ret >= 0)
- ret = 0;
- break;
- default:
- ret = -EINVAL;
- break;
- }
-
- if (node)
- iput(node);
- if (group)
- iput(group);
- return ret;
-}
-
-//////////////////////////////////////////////////////////////////////////////
-
-/* max_len is protection for the handler func. incoming messages won't
- * be given to the handler if their payload is longer than the max. */
-int net_register_handler(u32 msg_type, u32 key, int flags, u32 max_len,
- net_msg_handler_func *func, void *data)
-{
- net_msg_handler *nmh, *found=NULL;
- int ret;
-
- if (max_len > NET_MAX_PAYLOAD_BYTES) {
- netprintk("max_len for message handler out of range: %u\n",
- max_len);
- return -EINVAL;
- }
-
- if (!msg_type) {
- netprintk("no message type provided: %u, %p\n", msg_type, func);
- return -EINVAL;
-
- }
- if (!func) {
- netprintk("no message handler provided: %u, %p\n",
- msg_type, func);
- return -EINVAL;
- }
-
- nmh = kmalloc(sizeof(net_msg_handler), GFP_KERNEL);
- if (!nmh) {
- return -ENOMEM;
- }
- memset(nmh, 0, sizeof(net_msg_handler));
- nmh->func = func;
- nmh->data = data;
- nmh->msg_type = msg_type;
- nmh->max_len = max_len;
- nmh->key = key;
- spin_lock_init(&nmh->lock);
- atomic_set(&nmh->refcnt, 0);
- nmh->flags = flags;
- INIT_LIST_HEAD(&nmh->list);
- net_get_handler(nmh);
-
- ret = net_add_handler(nmh);
- if (ret) {
- net_put_handler(found);
- netprintk("message handler for type %u, key %u already exists!!!\n",
- msg_type, key);
- }
- return ret;
-}
-EXPORT_SYMBOL(net_register_handler);
-
-
-
-/* net_handler_lock should be held */
-static net_msg_handler * __net_lookup_handler(u32 msg_type, u32 key)
-{
- net_msg_handler *ret;
- struct list_head *iter;
-
- list_for_each(iter, &net_handlers) {
- ret = list_entry(iter, net_msg_handler, list);
- if (ret->msg_type == msg_type && ret->key == key) {
- net_get_handler(ret);
- return ret;
- }
- }
- return NULL;
-}
-
-net_msg_handler * net_lookup_handler(u32 msg_type, u32 key)
-{
- net_msg_handler *ret;
-
- spin_lock(&net_handler_lock);
- ret = __net_lookup_handler(msg_type, key);
- spin_unlock(&net_handler_lock);
-
- return ret;
-}
-
-static int net_add_handler(net_msg_handler *nmh)
-{
- net_msg_handler *existing;
- int ret;
-
- spin_lock(&net_handler_lock);
-
- existing = __net_lookup_handler(nmh->msg_type, nmh->key);
- if (existing != NULL) {
- __net_put_handler(existing);
- ret = -EEXIST;
- goto out;
- }
-
- list_add_tail(&nmh->list, &net_handlers);
- ret = 0;
-
-out:
- spin_unlock(&net_handler_lock);
- return ret;
-}
-
-
-/* TODO Fix */
-static void net_remove_handlers(void)
-{
- /* TODO: make an iterator in nm for running over each global inode
- * do I have this already? then call destroy on each. last put
- * will do the work. doesnt matter if it's slow. this is only
- * on shutdown... */
-}
-
-static int net_recv_tcp_msg(struct inode *inode, struct socket *sock,
- void *data, size_t len)
-{
- nm_node_inode_private *priv;
- nm_node_info *node;
- int ret;
- mm_segment_t oldfs;
- struct iovec iov = {
- .iov_len = len,
- .iov_base = data
- };
- struct msghdr msg = {
- .msg_iovlen = 1,
- .msg_iov = &iov,
- .msg_flags = MSG_DONTWAIT,
- };
-
- priv = (nm_node_inode_private *)inode->u.generic_ip;
- node = &priv->node;
- if (!sock) {
- spin_lock(&priv->net.sock_lock);
- /* TODO: sock refcounting... i think we can get/put the sk */
- sock = priv->net.sock;
- spin_unlock(&priv->net.sock_lock);
- if (!sock) {
- ret = -EINVAL;
- goto out;
- }
- }
-
- oldfs = get_fs();
- set_fs(get_ds());
- ret = sock_recvmsg(sock, &msg, len, msg.msg_flags);
- set_fs(oldfs);
-
-out:
- return ret;
-}
-
-static int net_send_tcp_msg(struct inode *inode, struct socket *sock,
- struct iovec *iov, size_t iovlen, size_t total)
-{
- int ret;
- nm_node_inode_private *priv;
- nm_node_info *node;
- mm_segment_t oldfs;
- struct msghdr msg = {
- .msg_iov = iov,
- .msg_iovlen = iovlen,
- };
-
- priv = (nm_node_inode_private *)inode->u.generic_ip;
- node = &priv->node;
- if (!sock) {
- spin_lock(&priv->net.sock_lock);
- /* TODO: sock refcounting... i think we can get/put the sk */
- sock = priv->net.sock;
- spin_unlock(&priv->net.sock_lock);
- }
-
- netprintk("Sending msg to node=%u, name=%s\n",
- node->node_num, node->node_name);
-
- if (sock == NULL) {
- ret = -EINVAL;
- goto out;
- }
-
- oldfs = get_fs();
- set_fs(get_ds());
- ret = sock_sendmsg(sock, &msg, total);
- set_fs(oldfs);
- if (ret != total) {
- netprintk("sendmsg returned %d instead of %zu\n", ret, total);
- if (ret >= 0)
- ret = -EINTR; /* should be smarter, I bet */
- goto out;
- }
-
- ret = 0;
-out:
- if (ret < 0)
- netprintk("returning error: %d\n", ret);
- return ret;
-}
-
-static u64 net_next_msg_num(void)
-{
- static spinlock_t net_msg_num_lock = SPIN_LOCK_UNLOCKED;
- static u64 net_msg_num = 1;
- u64 ret;
-
- spin_lock(&net_msg_num_lock);
- ret = net_msg_num++;
- spin_unlock(&net_msg_num_lock);
-
- return ret;
-}
-
-int net_send_message_iov(u32 msg_type, u32 key, struct iovec *caller_iov,
- size_t caller_iovlen, struct inode *inode,
- int *status)
-{
- int ret, tmpret;
- net_msg *msg = NULL;
- net_status_ctxt nsc;
- wait_queue_t sleep;
- nm_node_inode_private *priv = NULL;
- net_inode_private *net = NULL;
- size_t i, iovlen, caller_bytes = 0;
- struct iovec *iov = NULL;
-
- if (!inode || !inode->u.generic_ip) {
- netprintk0("bad inode, cannot send message\n");
- ret = -EINVAL;
- goto done;
- }
- if (caller_iovlen == 0) {
- netprintk0("bad iovec array length\n");
- ret = -EINVAL;
- goto done;
- }
-
- for(i = 0; i < caller_iovlen; i++)
- caller_bytes += caller_iov[i].iov_len;
-
- if (caller_bytes > NET_MAX_PAYLOAD_BYTES) {
- netprintk("total payload len %zu too large\n", caller_bytes);
- ret = -EINVAL;
- goto done;
- }
-
- priv = (nm_node_inode_private *)inode->u.generic_ip;
- net = &priv->net;
- spin_lock(&net->sock_lock);
- if (!net->sock) {
- spin_unlock(&net->sock_lock);
- ret = net_init_tcp_sock(inode);
- if (!(ret == 0 || ret == -EEXIST)) {
- netprintk0("failed to create socket!\n");
- ret = -EINVAL;
- goto done;
- }
- spin_lock(&net->sock_lock);
- }
- spin_unlock(&net->sock_lock);
-
- /* build up our iovec */
- iovlen = caller_iovlen + 1;
- iov = kmalloc(sizeof(struct iovec) * iovlen, GFP_KERNEL);
- if (iov == NULL) {
- netprintk("failed to %zu element iovec!\n", iovlen);
- ret = -ENOMEM;
- goto done;
- }
-
- msg = kmalloc(sizeof(net_msg), GFP_KERNEL);
- if (!msg) {
- netprintk("failed to allocate a net_msg!\n");
- ret = -ENOMEM;
- goto done;
- }
- memset(msg, 0, sizeof(net_msg));
- msg->magic = NET_MSG_MAGIC;
- msg->data_len = caller_bytes;
- msg->msg_type = msg_type;
- msg->key = key;
- msg->msg_num = net_next_msg_num();
-
- iov[0].iov_len = sizeof(net_msg);
- iov[0].iov_base = msg;
- memcpy(&iov[1], caller_iov, caller_iovlen * sizeof(struct iovec));
-
- /* does the caller want to wait for a simple status? */
- if (status) {
- msg->status = 1;
-
- INIT_LIST_HEAD(&nsc.list);
- init_waitqueue_head(&nsc.wq);
- atomic_set(&nsc.woken, 0);
- nsc.msg_num = msg->msg_num;
- nsc.status = 0;
- spin_lock(&net_status_lock);
- list_add(&nsc.list, &net_status_list);
- spin_unlock(&net_status_lock);
-
- init_waitqueue_entry(&sleep, current);
- spin_lock(&net->sock_lock);
- if (!net->sock) {
- spin_unlock(&net->sock_lock);
- netprintk0("caller wanted status return but socket went away!\n");
- kfree(msg);
- return -EINVAL;
- }
- add_wait_queue(net->sock->sk->sk_sleep, &sleep);
- spin_unlock(&net->sock_lock);
- }
-
- /* finally, convert the message header to network byte-order and send */
- net_msg_to_net(msg);
- ret = net_send_tcp_msg(inode, NULL, iov, iovlen,
- sizeof(net_msg) + caller_bytes);
- net_msg_to_host(msg); /* just swapping for printk, its unused now */
- msgprintk(msg, "sending returned %d\n", ret);
-
- if (status) {
- if (ret >= 0) {
- /* wait on other node's handler */
- tmpret = util_wait_atomic_eq(&nsc.wq, &nsc.woken, 1, 0);
- if (tmpret==0) {
- *status = nsc.status;
- netprintk("status return requested, status is %d\n", *status);
- } else {
- ret = tmpret;
- net_abort_status_return(&nsc);
- netprintk0("net_abort_status_return called\n");
- netprintk("status return requested, and error occurred while waiting=%d\n", ret);
- *status = ret;
- }
- remove_wait_queue(recv_sock->sk->sk_sleep, &sleep);
- } else {
- netprintk("status return requested, and error returned from net_send_tcp_msg=%d\n", ret);
- /* return bad status right away */
- *status = ret;
- }
- } else if (ret < 0) {
- netprintk("no status return requested, but error returned from net_send_tcp_msg=%d\n", ret);
- }
-
-done:
- if (iov)
- kfree(iov);
- if (msg)
- kfree(msg);
- return ret;
-}
-EXPORT_SYMBOL(net_send_message_iov);
-
-/*
- * net_send_message
- *
- * - this is probably the function you are looking for
- * - it will package up the message for you, verifying that
- * the message handler is there and the length is ok,
- * connect to the other node if there is not already a
- * socket for it, and optionally wait on a status return
- * from the other node
- * - all you need prior to this call is to have inited the
- * net stuff, to have a valid inode for the node to contact
- * in nm, and to have registered the message handler
- * - if status was requested, it will be returned to the caller
- * already converted to host byteorder
- */
-int net_send_message(u32 msg_type, u32 key, void *data, u32 len,
- struct inode *inode, int *status)
-{
- struct iovec iov = {
- .iov_base = data,
- .iov_len = len,
- };
- return net_send_message_iov(msg_type, key, &iov, 1, inode, status);
-}
-EXPORT_SYMBOL(net_send_message);
-
-static int net_send_status_magic(struct inode *inode, struct socket *sock,
- net_msg *hdr, int err)
-{
- struct iovec iov = {
- .iov_base = hdr,
- .iov_len = sizeof(net_msg),
- };
-
- /* leave other fields intact from the incoming message, msg_num
- * in particular */
- hdr->status = err;
- hdr->magic = NET_MSG_STATUS_MAGIC; // twiddle the magic
- hdr->data_len = 0;
-
- msgprintk(hdr, "about to send status magic %d\n", err);
- /* hdr has been in host byteorder this whole time */
- net_msg_to_net(hdr);
- return net_send_tcp_msg(inode, sock, &iov, 1, sizeof(net_msg));
-}
-
-static inline int net_is_valid_error_type(u32 err_type)
-{
- if (err_type == NET_ALREADY_CONNECTED ||
- err_type == NET_UNKNOWN_HOST)
- return 1;
- return 0;
-}
-
-static void net_send_error(struct inode *inode, struct socket *sock,
- u16 err_type)
-{
- net_msg hdr = {
- .magic = NET_MSG_MAGIC,
- .msg_type = err_type,
- .data_len = 0,
- };
- struct iovec iov = {
- .iov_base = &hdr,
- .iov_len = sizeof(hdr),
- };
-
- if (!net_is_valid_error_type(err_type)) {
- netprintk("bug! bad error type! %u\n", err_type);
- goto out;
- }
-
- msgprintk(&hdr, "about to send error %u\n", err_type);
- net_msg_to_net(&hdr);
- net_send_tcp_msg(inode, sock, &iov, 1, sizeof(net_msg));
-out:
- return;
-}
-
-static void net_got_sock_callback(net_inode_private *net, struct sock *sk)
-{
- BUG_ON(net == NULL);
- BUG_ON(net->sock == NULL);
- BUG_ON(net->sock->sk != sk);
-
- spin_lock(&net_active_lock);
- if (list_empty(&net->active_item))
- list_add_tail(&net->active_item, &net_active_list);
- spin_unlock(&net_active_lock);
-
- if (recv_sock != NULL)
- wake_up(recv_sock->sk->sk_sleep);
-}
-
-static void net_data_ready(struct sock *sk, int bytes)
-{
- net_inode_private *net = sk->sk_user_data;
- void (*ready)(struct sock *sk, int bytes);
-
- read_lock(&sk->sk_callback_lock);
- net_got_sock_callback(net, sk);
- ready = net->orig_data_ready;
- read_unlock(&sk->sk_callback_lock);
-
- ready(sk, bytes);
-
-}
-static void net_error_report(struct sock *sk)
-{
- net_inode_private *net = sk->sk_user_data;
- void (*report)(struct sock *sk);
-
- read_lock(&sk->sk_callback_lock);
- net_got_sock_callback(net, sk);
- report = net->orig_error_report;
- read_unlock(&sk->sk_callback_lock);
-
- report(sk);
-}
-
-static int net_receive(void)
-{
- struct inode *inode;
- LIST_HEAD(snapshot_list);
- nm_node_inode_private *priv;
- net_inode_private *net;
- struct socket *sock;
- net_msg *hdr;
- int err = 0, read_eagain;
- void *data;
- size_t datalen;
-
- /* process in batches so that the receive thread gets
- * a chance to accept new sockets now and again */
- spin_lock_bh(&net_active_lock);
- list_splice_init(&net_active_list, &snapshot_list);
- spin_unlock_bh(&net_active_lock);
-
- /* we don't need locks to test our list because we're the
- * only people who remove active_items from lists */
- while (!list_empty(&snapshot_list)) {
- net = list_entry(snapshot_list.next, net_inode_private,
- active_item);
-
- /* remove the net from the active list so that data_ready
- * can put it back on if it hits just after we read */
- spin_lock_bh(&net_active_lock);
- list_del_init(&net->active_item);
- spin_unlock_bh(&net_active_lock);
-
- priv = container_of(net, nm_node_inode_private, net);
- inode = priv->inode;
- sock = net->sock;
- BUG_ON(sock == NULL); /* real refcounting, please! */
-
- err = 0;
- read_eagain = 0;
-
- /* do we need more header? */
- if (net->page_off < sizeof(net_msg)) {
- data = page_address(net->page) + net->page_off;
- datalen = sizeof(net_msg) - net->page_off;
- err = net_recv_tcp_msg(inode, sock, data, datalen);
- if (err > 0) {
- net->page_off += err;
- /* only swab incoming here.. we can
- * only get here once as we cross from
- * being under to over */
- if (net->page_off == sizeof(net_msg)) {
- hdr = page_address(net->page);
- net_msg_to_host(hdr);
- if (hdr->data_len > NET_MAX_PAYLOAD_BYTES)
- err = -EOVERFLOW;
- }
- }
- if (err < 0) {
- if (err == -EAGAIN)
- read_eagain = 1;
- goto done;
- }
- }
-
- if (net->page_off < sizeof(net_msg)) {
- /* oof, still don't have a header */
- goto done;
- }
-
- /* this was swabbed above when we first read it */
- hdr = page_address(net->page);
-
- msgprintk(hdr, "at page_off %zu\n", net->page_off);
-
- /* do we need more payload? */
- if (net->page_off - sizeof(net_msg) < hdr->data_len) {
- /* need more payload */
- data = page_address(net->page) + net->page_off;
- datalen = (sizeof(net_msg) + hdr->data_len) -
- net->page_off;
- err = net_recv_tcp_msg(inode, sock, data, datalen);
- if (err > 0)
- net->page_off += err;
- if (err < 0) {
- if (err == -EAGAIN)
- read_eagain = 1;
- goto done;
- }
- }
-
- if (net->page_off - sizeof(net_msg) == hdr->data_len) {
- /* whooo peee, we have a full message */
- /* after calling this the message is toast */
- err = net_process_message(inode, sock, hdr);
- net->page_off = 0;
- }
-
-done:
- /* we might not have consumed all the data that has been
- * announced to us through data_ready.. keep the net active
- * as long as there may still be remaining data.
- * data_ready might have been called after we saw eagain */
- spin_lock_bh(&net_active_lock);
- if (!read_eagain && list_empty(&net->active_item))
- list_add_tail(&net->active_item, &net_active_list);
- spin_unlock_bh(&net_active_lock);
-
- netprintk("net %p finished reading with %d\n", net, err);
- if (err < 0 && err != -EAGAIN) {
- netprintk("socket saw err %d, closing\n", err);
- net_dump_and_close_sock(sock, inode);
- }
- }
-
- return 0;
-}
-
-
-static void net_do_status_return(net_msg *hdr)
-{
- net_status_ctxt *nsc = NULL;
- struct list_head *iter;
-
- spin_lock(&net_status_lock);
- list_for_each(iter, &net_status_list) {
- nsc = list_entry(iter, net_status_ctxt, list);
- if (nsc->msg_num == hdr->msg_num) {
- nsc->status = hdr->status;
- atomic_set(&nsc->woken, 1);
- list_del(&nsc->list);
- wake_up(&nsc->wq);
- break;
- }
- nsc = NULL;
- }
- spin_unlock(&net_status_lock);
-
- msgprintk(hdr, "sent to nsc %p\n", nsc);
-}
-
-/* this returns -errno if the header was unknown or too large, etc.
- * after this is called the buffer us reused for the next message */
-static int net_process_message(struct inode *inode, struct socket *sock,
- net_msg *hdr)
-{
- int ret;
- net_msg_handler *hnd = NULL;
-
- netprintk("received message header... magic=%u type=%u key=%u\n",
- hdr->magic, hdr->msg_type, hdr->key);
-
- if (hdr->magic == NET_MSG_STATUS_MAGIC) {
- /* special type for returning message status */
- net_do_status_return(hdr);
- ret = 0;
- goto out;
- } else if (hdr->magic != NET_MSG_MAGIC) {
- msgprintk0(hdr, "bad magic\n");
- ret = -EINVAL;
- goto out;
- }
-
- if (net_is_valid_error_type(hdr->msg_type)) {
- if (hdr->msg_type == NET_ALREADY_CONNECTED) {
- msgprintk0(hdr, "error: there is already a socket "
- "for this connection\n");
- } else if (hdr->msg_type == NET_UNKNOWN_HOST) {
- msgprintk0(hdr, "error: unknown host\n");
- }
- ret = 0;
- goto out;
- }
-
- /* find a handler for it */
- hnd = net_lookup_handler(hdr->msg_type, hdr->key);
- if (!hnd) {
- ret = -EINVAL;
- msgprintk0(hdr, "no handler for message.\n");
- goto out;
- }
-
- ret = 0;
- spin_lock(&hnd->lock);
- if (hdr->data_len > hnd->max_len)
- ret = -EOVERFLOW;
- spin_unlock(&hnd->lock);
- if (ret) {
- msgprintk(hdr, "advertised data_len > handlers max_len (%u)\n",
- hnd->max_len);
- goto out;
- }
-
- net_num_dispatched++;
- ret = (hnd->func)(hdr, sizeof(net_msg) + hdr->data_len, hnd->data);
-
- /* if node has requested status return, do it now */
- if (hdr->status) {
- int tmpret;
- /* this destroys the hdr, so don't use it after this */
- tmpret = net_send_status_magic(inode, sock, hdr, ret);
- hdr = NULL;
- netprintk("sending status %d returned %d\n", ret, tmpret);
- ret = 0;
- } else if (ret < 0) {
- msgprintk(hdr, "dispatch returned %d\n", ret);
- }
-
-out:
- if (hnd)
- net_put_handler(hnd);
- return ret;
-}
-
-static void net_record_new_sock(net_inode_private *net)
-{
- struct sock *sk;
-
- BUG_ON(net->sock == NULL);
- BUG_ON(net->sock->sk == NULL);
-
- netprintk("added net %p to net_active_list\n", net);
-
- sk = net->sock->sk;
- write_lock_bh(&sk->sk_callback_lock);
- if (sk->sk_user_data != net) {
- net->orig_data_ready = sk->sk_data_ready;
- net->orig_error_report = sk->sk_error_report;
-
- sk->sk_user_data = net;
- sk->sk_data_ready = net_data_ready;
- sk->sk_error_report = net_error_report;
- }
- write_unlock_bh(&sk->sk_callback_lock);
-
- /* record it as active initially to make sure we didn't miss
- * any incoming data while we were setting it up */
- spin_lock_bh(&net_active_lock);
- if (list_empty(&net->active_item))
- list_add_tail(&net->active_item, &net_active_list);
- spin_unlock_bh(&net_active_lock);
-
- if (recv_sock != NULL)
- wake_up(recv_sock->sk->sk_sleep);
-}
-
-/*
- * net_accept_tcp_connections()
- *
- */
-static int net_accept_tcp_connections(void)
-{
- int error, slen;
- struct sockaddr_in sin;
- struct socket *sock;
- struct inode *inode;
-
- if (!recv_sock) {
- netprintk0("no socket!\n");
- return 0;
- }
-
- if (!tcp_sk(recv_sock->sk)->accept_queue) {
- //netprintk0("no connections on the queue\n");
- return 0;
- }
- error = 0;
- while (error >= 0) {
- error = sock_create_lite(recv_sock->sk->sk_family,
- recv_sock->sk->sk_type,
- recv_sock->sk->sk_protocol,
- &sock);
- if (error)
- break;
-
- sock->type = recv_sock->type;
- sock->ops = recv_sock->ops;
- error = recv_sock->ops->accept(recv_sock, sock, O_NONBLOCK);
- if (error < 0) {
- sock_release(sock);
- break;
- }
- if (sock->sk->sk_state == TCP_CLOSE) {
- sock_release(sock);
- continue;
- }
-
- tcp_sk(recv_sock->sk)->nonagle = 1;
-
- slen = sizeof(sin);
- error = sock->ops->getname(sock, (struct sockaddr *) &sin, &slen, 1);
- if (error < 0)
- break;
-
- netprintk("attempt to connect from %u.%u.%u.%u:%04x\n",
- NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
-
- inode = nm_get_node_by_ip(sin.sin_addr.s_addr);
- if (inode) {
- int exists = 1;
- nm_node_inode_private *priv = inode->u.generic_ip;
- net_inode_private *net = NULL;
-
- if (priv) {
- net = &priv->net;
- netprintk("connect from known host: %s\n",
- priv->node.node_name);
- if (ntohs(sin.sin_port) >= 1024)
- netprintk("warning: connect from unprivileged port: %u.%u.%u.%u:%d\n",
- NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
- spin_lock(&priv->net.sock_lock);
- if (!priv->net.sock) {
- netprintk("new sock, giving net %p sock %p\n", net, sock);
- exists = 0;
- priv->net.sock = sock;
-
- }
- spin_unlock(&priv->net.sock_lock);
-
- if (exists) {
- netprintk0("already a socket for this connection!\n");
- net_send_error(inode, sock, NET_ALREADY_CONNECTED);
- net_dump_and_close_sock(sock, inode);
- } else {
- net_record_new_sock(net);
- }
- }
-
- iput(inode);
- } else {
- netprintk0("connect from unknown host...\n");
- net_send_error(inode, sock, NET_UNKNOWN_HOST);
- net_dump_and_close_sock(sock, inode);
- }
- }
- return error;
-}
-
-static void net_dump_and_close_sock(struct socket *sock, struct inode *inode)
-{
- nm_node_inode_private *priv = NULL;
- struct msghdr msg;
- struct iovec iov;
- int len;
- mm_segment_t oldfs;
-
- if (sock->sk) {
- if (inode)
- priv = inode->u.generic_ip;
- if (!priv)
- goto release;
-
- len = 1;
- while (len>0)
- {
- msg.msg_name = 0;
- msg.msg_namelen = 0;
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- msg.msg_flags = MSG_DONTWAIT;
- msg.msg_iov->iov_base = page_address(priv->net.page);
- msg.msg_iov->iov_len = (__kernel_size_t)PAGE_SIZE;
-
- oldfs = get_fs();
- set_fs(KERNEL_DS);
- len = sock_recvmsg(sock, &msg, PAGE_SIZE, MSG_DONTWAIT);
- set_fs(oldfs);
-
- if (!len)
- break;
- }
- }
-
- if (sock->sk) {
- if (inode) {
- priv = inode->u.generic_ip;
- if (priv) {
- spin_lock(&priv->net.sock_lock);
- priv->net.sock = NULL;
- spin_unlock(&priv->net.sock_lock);
- }
- }
- }
-release:
- sock_release(sock);
-}
-
-/* this is racey beyond reason, the userspace work will involve some tracking
- * structures that senders can wait on or time out on for connections
- * to happen */
-int net_init_tcp_sock(struct inode *inode)
-{
- nm_node_inode_private *priv;
- nm_node_info *node;
- net_inode_private *net = NULL;
- struct sockaddr_in myaddr, remoteaddr;
- int err = -EINVAL;
- int i;
- struct sock *sk;
- struct socket *sock = NULL;
-
- priv = inode->u.generic_ip;
- if (!priv) {
- netprintk0("bad inode\n");
- return -EINVAL;
- }
- net = &priv->net;
- node = &priv->node;
-
- if ((err = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
- netprintk("can't create socket: err=%d\n", err);
- return err;
- }
-
- spin_lock(&net->sock_lock);
- if (net->sock || net->flags & NET_FLAG_CREATING_SOCKET) {
- netprintk("socket already created or creating for inode %lu\n", inode->i_ino);
- spin_unlock(&net->sock_lock);
- sock_release(sock);
- return -EEXIST;
- }
- net->flags |= NET_FLAG_CREATING_SOCKET;
- spin_unlock(&net->sock_lock);
-
- memset(&myaddr, 0, sizeof(myaddr));
- myaddr.sin_family = AF_INET;
- myaddr.sin_port = htons(0); // any port
- err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, sizeof(myaddr));
-
- memset (&remoteaddr, 0, sizeof (remoteaddr));
- remoteaddr.sin_family = net_ip_version_to_family(node->ifaces[0].ip_version);
- remoteaddr.sin_addr.s_addr = node->ifaces[0].addr_u.ip_addr4;
- remoteaddr.sin_port = node->ifaces[0].ip_port;
-
- //netprintk("connecting new socket: ip %d.%d.%d.%d, port %d\n", NIPQUAD(remoteaddr.sin_addr.s_addr), remoteaddr.sin_port);
- err = sock->ops->connect(sock, (struct sockaddr *) &remoteaddr,
- sizeof(remoteaddr), 0); /* TODO put this back! O_NONBLOCK); */
- //netprintk("connect status %d\n", err);
-
- if (err >= 0) {
- spin_lock(&net->sock_lock);
- net->sock = sock;
- net->flags &= ~NET_FLAG_CREATING_SOCKET;
- spin_unlock(&net->sock_lock);
- goto out;
- }
-
- sk = sock->sk;
- switch (err) {
- case -EALREADY:
- case -EINPROGRESS:
-
- /* TODO: awful awful awful */
- for (i=0; i<100; i++) {
- /* Protect against TCP socket state changes */
- lock_sock(sk);
- if (sk->sk_state == TCP_ESTABLISHED) {
- release_sock(sk);
- netprintk0("woo! connected...\n");
- err = 0;
- spin_lock(&net->sock_lock);
- net->flags &= ~NET_FLAG_CREATING_SOCKET;
- net->sock = sock;
- spin_unlock(&net->sock_lock);
- break;
- } else {
- netprintk("waiting for connection: pass %d, state %d\n", i, sk->sk_state);
- /* TODO */
-#if 0
- task->tk_timeout = RPC_CONNECT_TIMEOUT;
- /* if the socket is already closing, delay briefly */
- if ((1<<sk->state) & ~(TCPF_SYN_SENT|TCPF_SYN_RECV))
- task->tk_timeout = RPC_REESTABLISH_TIMEOUT;
- rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL);
-#endif
- /* TODO: this is awful... change it later */
- }
- release_sock(sk);
- util_sleep(100);
- }
- break;
- case -ECONNREFUSED:
- case -ECONNRESET:
- case -ENOTCONN:
- netprintk("conn refused, reset or not connected\n");
- break;
- default:
- /* Report myriad other possible returns. If this file
- * system is soft mounted, just error out, like Solaris. */
- netprintk("error %d connecting to server\n", err);
- /* TODO */
-#if 0
- /* This will prevent anybody else from connecting */
- rpc_delay(task, RPC_REESTABLISH_TIMEOUT);
- task->tk_status = status;
-#endif
- break;
- }
-
-out:
- if (err < 0) {
- if (net) {
- spin_lock(&net->sock_lock);
- if (net->sock)
- netprintk0("wha?! there's a socket there already!!!!\n");
- net->flags &= ~NET_FLAG_CREATING_SOCKET;
- spin_unlock(&net->sock_lock);
- }
- if (sock)
- sock_release(sock);
- } else {
- net_record_new_sock(net);
- }
-
- return err;
-}
-
-
-
-/*
- * net_init_tcp_recv_sock()
- *
- */
-static int net_init_tcp_recv_sock(void)
-{
- struct sockaddr_in sin;
- int status = -EINVAL;
-
- /* Create Receive Socket */
- status = sock_create(net_ip_version_to_family(ip_version),
- SOCK_STREAM, IPPROTO_TCP,
- &recv_sock);
- if (status < 0) {
- netprintk ("unable to create socket, error=%d\n", status);
- goto bail;
- }
-
-
- /* Bind Receive Socket */
- memset(&sin, 0, sizeof(sin));
- sin.sin_family = net_ip_version_to_family(ip_version);
- sin.sin_addr.s_addr = htonl(INADDR_ANY);
- sin.sin_port = ip_port;
-
- status = recv_sock->ops->bind(recv_sock,
- (struct sockaddr *)&sin,
- sizeof(sin));
- if (status < 0) {
- netprintk ("unable to bind socket to port %d, error=%d\n",
- ntohs(ip_port), status);
- }
-
- /* !!! dunno about these... */
- recv_sock->sk->sk_reuse = 1;
- status = recv_sock->ops->listen(recv_sock, 64);
-
-bail:
- return status;
-} /* net_init_tcp_recv_sock */
-
-
-static void net_release_tcp_sock(void)
-{
- if (recv_sock) {
- sock_release (recv_sock);
- recv_sock = NULL;
- }
-}
-
-MODULE_LICENSE("GPL");
-module_init (net_driver_entry);
-module_exit (net_driver_exit);
Copied: trunk/fs/ocfs2/cluster/tcp.c (from rev 1811, trunk/cluster/tcp.c)
Deleted: trunk/fs/ocfs2/cluster/tcp.h
===================================================================
--- trunk/cluster/tcp.h 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/tcp.h 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,205 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * tcp.h
- *
- * Function prototypes
- *
- * Copyright (C) 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel
- */
-
-#ifndef CLUSTER_TCP_H
-#define CLUSTER_TCP_H
-
-#include <linux/socket.h>
-#ifdef __KERNEL__
-#include <net/sock.h>
-#include <linux/tcp.h>
-#else
-#include <sys/socket.h>
-#endif
-#include <linux/inet.h>
-#include <linux/in.h>
-
-#include "nodemanager.h"
-#include "ocfs2_tcp.h"
-
-
-#define NET_DISP_THREAD_MS 5000 /* TODO */
-#define NET_RECV_THREAD_MS 5000 /* TODO */
-
-#define NET_MSG_MAGIC ((u16)0xfa55)
-#define NET_MSG_STATUS_MAGIC ((u16)0xfa56)
-typedef struct _net_msg
-{
- __u16 magic;
- __u16 data_len;
- __u16 msg_type;
- __s16 status;
- __u32 key;
- __u32 msg_num;
- __u8 buf[0];
-} net_msg;
-
-static inline void net_msg_to_net(net_msg *m)
-{
- m->magic = htons(m->magic);
- m->data_len = htons(m->data_len);
- m->msg_type = htons(m->msg_type);
- m->status = htons(m->status);
- m->key = htonl(m->key);
- m->msg_num = htonl(m->msg_num);
-}
-static inline void net_msg_to_host(net_msg *m)
-{
- m->magic = ntohs(m->magic);
- m->data_len = ntohs(m->data_len);
- m->msg_type = ntohs(m->msg_type);
- m->status = ntohs(m->status);
- m->key = ntohl(m->key);
- m->msg_num = ntohl(m->msg_num);
-}
-
-typedef int (net_msg_handler_func)(net_msg *msg, u32 len, void *data);
-
-typedef struct _net_msg_handler
-{
- struct list_head list;
- u32 msg_type;
- u32 key;
- net_msg_handler_func *func;
- void *data;
- u32 max_len;
- spinlock_t lock;
- atomic_t refcnt;
- int flags;
-} net_msg_handler;
-
-typedef struct _net_status_ctxt
-{
- struct list_head list;
- s32 status;
- u64 msg_num;
- wait_queue_head_t wq;
- atomic_t woken;
-} net_status_ctxt;
-
-#define NET_MAX_PAYLOAD_BYTES (4096 - sizeof(net_msg))
-
-/* RESERVED */
-#define NET_ALREADY_CONNECTED (0xfff0)
-#define NET_UNKNOWN_HOST (0xfff1)
-
-#define NET_HND_VAR_LEN 0x00000001
-
-static inline int net_handler_msg_len_ok(net_msg_handler *handler, u32 len)
-{
- return ((handler->flags & NET_HND_VAR_LEN) ?
- len <= handler->max_len :
- len == handler->max_len);
-}
-
-
-static inline int net_ip_version_to_family(u16 ip_version)
-{
- printk("ip_version passed: %u, host byteorder: %u\n", ip_version, ntohs(ip_version));
- return PF_INET;
- switch (ntohs(ip_version)) {
- case 4:
- return PF_INET;
- case 6:
- return PF_INET6;
- default:
- BUG();
- }
-
- return 4;
-}
-
-
-
-/* TODO: figure this out.... */
-static inline int net_link_down(int err, struct socket *sock)
-{
- if (sock) {
-/* the alternative is to #define around the members */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
- if (sock->sk->state != TCP_ESTABLISHED &&
- sock->sk->state != TCP_CLOSE_WAIT)
-#else
- if (sock->sk->sk_state != TCP_ESTABLISHED &&
- sock->sk->sk_state != TCP_CLOSE_WAIT)
-#endif
- return 1;
- }
-
- if (err >= 0)
- return 0;
- switch (err) {
- /* ????????????????????????? */
- case -ERESTARTSYS:
- case -EBADF:
- /* When the server has died, an ICMP port unreachable
- * message prompts ECONNREFUSED. */
- case -ECONNREFUSED:
- case -ENOTCONN:
- case -ECONNRESET:
- case -EPIPE:
- return 1;
- }
- return 0;
-}
-
-enum {
- NET_DRIVER_UNINITED,
- NET_DRIVER_READY,
-};
-
-int net_register_handler(u32 msg_type, u32 key, int flags,
- u32 max_len, net_msg_handler_func *func, void *data);
-int net_init_tcp_sock(struct inode *inode);
-int net_send_message(u32 msg_type, u32 key, void *data, u32 len, struct inode *inode, int *status);
-int net_send_message_iov(u32 msg_type, u32 key, struct iovec *iov,
- size_t iovlen, struct inode *inode, int *status);
-int net_broadcast_message(u32 msg_type, u32 key, void *data, u32 len, struct inode *group);
-net_msg_handler * net_lookup_handler(u32 msg_type, u32 key);
-
-#define GSD_MESSAGE 130
-#define GSD_ACTION_ADD_GROUP (0x01)
-#define GSD_ACTION_ADD_GROUP_NODE (0x02)
-
-typedef struct _gsd_message
-{
- u16 from;
- u8 action;
- u8 namelen;
- u8 name[NM_MAX_NAME_LEN];
-} gsd_message;
-
-static inline void gsd_message_to_net(gsd_message *g)
-{
- g->from = htons(g->from);
-}
-static inline void gsd_message_to_host(gsd_message *g)
-{
- g->from = ntohs(g->from);
-}
-
-#endif /* CLUSTER_TCP_H */
Copied: trunk/fs/ocfs2/cluster/tcp.h (from rev 1811, trunk/cluster/tcp.h)
Deleted: trunk/fs/ocfs2/cluster/util.c
===================================================================
--- trunk/cluster/util.c 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/util.c 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,349 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * util.c
- *
- * General purpose code
- *
- * Copyright (C) 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel
- */
-
-#include "warning_hack.h"
-
-#include "dlm_compat.h"
-
-#include <linux/fs.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-
-#include "util.h"
-
-static void util_timeout_func(unsigned long data);
-
-/* block all but 'mask' sigs, optionally saving off our previous
- * signal state. */
-void util_block_sigs(sigset_t *oldsigs, unsigned long mask)
-{
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
- sigset_t tmpsig;
-
- siginitsetinv(&tmpsig, mask);
- sigprocmask(SIG_BLOCK, &tmpsig, oldsigs);
-#else
-#ifdef HAVE_NPTL
- spin_lock_irq (¤t->sighand->siglock);
- if (oldsigs)
- *oldsigs = current->blocked;
- siginitsetinv (¤t->blocked, mask);
- recalc_sigpending ();
- spin_unlock_irq (¤t->sighand->siglock);
-#else
- spin_lock_irq (¤t->sigmask_lock);
- if (oldsigs)
- *oldsigs = current->blocked;
- siginitsetinv (¤t->blocked, mask);
- recalc_sigpending (current);
- spin_unlock_irq (¤t->sigmask_lock);
-#endif
-#endif
-}
-
-void util_unblock_sigs(sigset_t newsig)
-{
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
- sigprocmask(SIG_SETMASK, &newsig, NULL);
-#else
-#ifdef HAVE_NPTL
- spin_lock_irq (¤t->sighand->siglock);
- current->blocked = newsig;
- recalc_sigpending ();
- spin_unlock_irq (¤t->sighand->siglock);
-#else
- spin_lock_irq (¤t->sigmask_lock);
- current->blocked = newsig;
- recalc_sigpending (current);
- spin_unlock_irq (¤t->sigmask_lock);
-#endif
-#endif
-}
-
-/*
- * util_daemonize()
- *
- */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-/* yes, len is unused but kept here for backwards compatibility. */
-void util_daemonize (char *name, int len, int shutdown_sigs)
-{
- sigset_t tmpsig;
-
- daemonize (name);
-
- if (shutdown_sigs) {
- /* Unblock SIGKILL, SIGSTOP, SIGHUP and SIGINT */
- sigemptyset(&tmpsig);
- sigaddsetmask(&tmpsig, SHUTDOWN_SIGS);
- sigprocmask(SIG_UNBLOCK, &tmpsig, NULL);
- }
-
- return;
-} /* util_daemonize */
-#else
-void util_daemonize (char *name, int len, int shutdown_sigs)
-{
- daemonize ();
- reparent_to_init ();
-
- if (len > 0) {
- if (len > 15)
- BUG();
- strncpy (current->comm, name, len);
- current->comm[len] = '\0';
- }
-
- if (shutdown_sigs)
- util_block_sigs(NULL, SHUTDOWN_SIGS);
- else
- util_block_sigs(NULL, 0);
- return;
-} /* util_daemonize */
-#endif
-
-/*
- * util_sleep()
- *
- * The interval time is in milliseconds
- *
- * This function needs to be removed.
- * Instead call schedule_timeout() directly and handle signals.
- */
-int util_sleep (__u32 ms)
-{
- __u32 numJiffies;
-
- /* 10ms = 1 jiffy, minimum resolution is one jiffy */
- numJiffies = ms * HZ / 1000;
- numJiffies = (numJiffies < 1) ? 1 : numJiffies;
-
- set_current_state (TASK_INTERRUPTIBLE);
- numJiffies = schedule_timeout (numJiffies);
-
- return 0;
-} /* util_sleep */
-
-/* prefetch has been declared to allow to build in debug mode */
-#ifdef DEBUG
-#ifndef ARCH_HAS_PREFETCH
-inline void prefetch (const void *x)
-{;
-}
-#endif
-#endif
-
-
-static void util_timeout_func(unsigned long data)
-{
- util_timeout *to = (util_timeout *)data;
-
- to->timed_out = 1;
- wake_up(&to->wait);
-}
-
-void util_init_timeout(util_timeout *to)
-{
- init_timer(&to->timer);
- to->timer.data = (unsigned long)to;
- to->timer.function = util_timeout_func;
- to->timed_out = 0;
- init_waitqueue_head(&to->wait);
-}
-
-void util_set_timeout(util_timeout *to, __u32 timeout)
-{
- __u32 how_long;
-
- if (!timeout) {
- to->timed_out = 1;
- return ;
- }
-
- how_long = (timeout * HZ / 1000);
- if (how_long < 1)
- how_long = 1;
-
- to->timer.expires = jiffies + how_long;
- add_timer(&to->timer);
-}
-
-void util_clear_timeout(util_timeout *to)
-{
- del_timer_sync(&to->timer);
-}
-
-int __util_wait_atomic_eq(wait_queue_head_t *wq, atomic_t *var, int val, int ms)
-{
- int ret;
- util_timeout timeout;
- DECLARE_WAITQUEUE(wait, current);
- DECLARE_WAITQUEUE(to_wait, current);
-
- util_init_timeout(&timeout);
-
- if (ms) {
- util_set_timeout(&timeout, ms);
- if (timeout.timed_out) {
- util_clear_timeout(&timeout);
- }
- }
- add_wait_queue(wq, &wait);
- add_wait_queue(&timeout.wait, &to_wait);
- do {
- ret = 0;
- set_current_state(TASK_INTERRUPTIBLE);
- if (atomic_read(var)==val)
- break;
- ret = -ETIMEDOUT;
- if (timeout.timed_out)
- break;
- schedule();
- if (signal_pending(current)) {
- ret = -EINTR;
- break;
- }
- } while (1);
-
- set_current_state(TASK_RUNNING);
- remove_wait_queue(wq, &wait);
- remove_wait_queue(&timeout.wait, &to_wait);
-
- if (ms)
- util_clear_timeout(&timeout);
-
- return ret;
-}
-
-/* resizable (using chained pages) array stuff */
-void util_init_rarray(util_rarray *arr, u16 elem_size)
-{
- arr->elements = 0;
- arr->max_elem = 0;
- arr->elem_size = elem_size;
- arr->page = NULL;
-}
-
-
-void * util_rarray_idx_to_slot(util_rarray *arr, int idx)
-{
- int pgnum, pgoff;
- util_rarray_page *pg;
-
- if (idx >= arr->max_elem) {
- printk("eek! asked for %d, but only %d elements\n",
- idx, arr->max_elem);
- return NULL;
- }
-
- pgnum = idx / UTIL_RARRAY_ELEM_PER_BUF(arr);
- pgoff = idx % UTIL_RARRAY_ELEM_PER_BUF(arr);
- pg = (util_rarray_page *)arr->page;
- while (pgnum--) {
- if (!pg->next) {
- printk("eeek! no next page!\n");
- return NULL;
- }
- pg = pg->next;
- }
- return (((char *)pg->buf) + (pgoff * arr->elem_size));
-}
-
-
-void * util_get_new_rarray_slot(util_rarray *arr, int *index)
-{
- char *tmp;
- util_rarray_page *newpg, *pg;
-
- if (arr->max_elem == arr->elements) {
- newpg = (util_rarray_page *) __get_free_page(GFP_KERNEL);
- if (!newpg) {
- printk("could not grow array!!!\n");
- return NULL;
- }
- memset(newpg, 0, PAGE_SIZE);
- if (arr->page) {
- pg = (util_rarray_page *)arr->page;
- while (pg->next)
- pg = pg->next;
- pg->next = newpg;
- } else
- arr->page = newpg;
- arr->max_elem += UTIL_RARRAY_ELEM_PER_BUF(arr);
- }
-
- tmp = util_rarray_idx_to_slot(arr, arr->elements);
- if (tmp) {
- if (index)
- *index = arr->elements;
- arr->elements++;
- }
- return tmp;
-}
-
-
-int util_add_to_rarray(util_rarray *arr, void *new)
-{
- void *slot;
- int idx;
-
- slot = util_get_new_rarray_slot(arr, &idx);
- if (slot == NULL)
- return -EINVAL;
- memcpy(slot, new, arr->elem_size);
- return idx;
-}
-
-/* resizes rarray to at least newelem elements */
-int util_resize_rarray(util_rarray *arr, int newelem)
-{
- util_rarray_page *newpg, *pg;
-
- printk("util_resize_rarray: newsize=%d, maxelem=%d\n", newelem, arr->max_elem);
- while (arr->max_elem < newelem) {
- newpg = (util_rarray_page *) __get_free_page(GFP_KERNEL);
- if (!newpg) {
- printk("could not grow array!!!\n");
- return -ENOMEM;
- }
- memset(newpg, 0, PAGE_SIZE);
- if (arr->page) {
- pg = (util_rarray_page *)arr->page;
- while (pg->next)
- pg = pg->next;
- pg->next = newpg;
- } else
- arr->page = newpg;
- arr->max_elem += UTIL_RARRAY_ELEM_PER_BUF(arr);
- }
- printk("leaving util_resize_rarray: newsize=%d, maxelem=%d\n", newelem, arr->max_elem);
-
- return 0;
-}
-
-
Copied: trunk/fs/ocfs2/cluster/util.c (from rev 1811, trunk/cluster/util.c)
Deleted: trunk/fs/ocfs2/cluster/util.h
===================================================================
--- trunk/cluster/util.h 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/util.h 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,109 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * util.h
- *
- * Function prototypes
- *
- * Copyright (C) 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel
- */
-
-#ifndef CLUSTER_UTIL_H
-#define CLUSTER_UTIL_H
-
-#ifdef __KERNEL__
-#define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGHUP) | \
- sigmask(SIGINT) | sigmask(SIGQUIT))
-
-/* timeout structure taken from Ben's aio.c */
-typedef struct _util_timeout {
- struct timer_list timer;
- int timed_out;
- wait_queue_head_t wait;
-} util_timeout;
-
-void util_clear_timeout(util_timeout *to);
-void util_daemonize(char *name, int len, int shutdown_sigs);
-void util_init_timeout(util_timeout *to);
-void util_set_timeout(util_timeout *to, __u32 timeout);
-void util_show_stack(unsigned long *esp);
-void util_show_trace(unsigned long *stack);
-int util_sleep(__u32 ms);
-int __util_wait_atomic_eq(wait_queue_head_t *wq, atomic_t *var, int val, int ms);
-void util_block_sigs(sigset_t *oldsigs, unsigned long mask);
-void util_unblock_sigs(sigset_t newsig);
-
-/* exits when var == val, or on timeout */
-static inline int util_wait_atomic_eq(wait_queue_head_t *wq, atomic_t *var, int val, int timeout)
-{
- int ret = 0;
- if (atomic_read(var) != val)
- ret = __util_wait_atomic_eq(wq, var, val, timeout);
- return ret;
-}
-
-#endif /* __KERNEL__ */
-
-/* resizable array */
-typedef struct _util_rarray
-{
- void *page;
- u16 elements;
- u16 max_elem;
- u16 elem_size;
- u16 reserved1;
-} util_rarray;
-
-#define UTIL_RARRAY_PAGE_BUF_SIZE (PAGE_SIZE - offsetof(util_rarray_page, buf))
-#define UTIL_RARRAY_ELEM_PER_BUF(r) ((UTIL_RARRAY_PAGE_BUF_SIZE) / (r)->elem_size)
-typedef struct _util_rarray_page
-{
- void *next;
- char buf[0];
-} util_rarray_page;
-
-void util_init_rarray(util_rarray *arr, u16 elem_size);
-void * util_get_new_rarray_slot(util_rarray *arr, int *index);
-int util_add_to_rarray(util_rarray *arr, void *new);
-void * util_rarray_idx_to_slot(util_rarray *arr, int idx);
-int util_resize_rarray(util_rarray *arr, int newelem);
-
-#ifdef __KERNEL__
-typedef struct _util_thread_info
-{
- wait_queue_head_t thread_wq;
- atomic_t woken;
- struct task_struct *task;
- struct completion complete;
- int pid;
-} util_thread_info;
-
-
-static inline void util_thread_info_init(util_thread_info *info)
-{
- init_waitqueue_head(&info->thread_wq);
- atomic_set(&info->woken, 0);
- info->task = NULL;
- info->pid = -1;
- init_completion(&info->complete);
-}
-#endif /* __KERNEL__ */
-
-#endif /* CLUSTER_UTIL_H */
Copied: trunk/fs/ocfs2/cluster/util.h (from rev 1811, trunk/cluster/util.h)
Deleted: trunk/fs/ocfs2/cluster/warning_hack.h
===================================================================
--- trunk/cluster/warning_hack.h 2005-01-20 22:52:40 UTC (rev 1810)
+++ trunk/fs/ocfs2/cluster/warning_hack.h 2005-01-20 23:16:58 UTC (rev 1812)
@@ -1,39 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * warning_hack.h
- *
- * just to get rid of stupid warnings
- *
- * Copyright (C) 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel
- */
-
-#ifndef WARNING_HACK_H
-#define WARNING_HACK_H
-
-struct mem_dqinfo;
-struct request;
-
-extern __inline__ int generic_fls(int x);
-extern __inline__ int get_bitmask_order(unsigned int count);
-extern inline int rq_data_dir(struct request *rq);
-
-
-#endif /* WARNING_HACK_H */
Copied: trunk/fs/ocfs2/cluster/warning_hack.h (from rev 1811, trunk/cluster/warning_hack.h)
More information about the Ocfs2-commits
mailing list