[Ocfs2-commits] khackel commits r1939 - in branches/dlm-reco-mig: .
fs/ocfs2 fs/ocfs2/cluster fs/ocfs2/dlm fs/usysfs kapi-compat/include
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Fri Mar 4 16:21:30 CST 2005
Author: khackel
Date: 2005-03-04 16:21:29 -0600 (Fri, 04 Mar 2005)
New Revision: 1939
Added:
branches/dlm-reco-mig/fs/ocfs2/aio.c
branches/dlm-reco-mig/fs/ocfs2/aio.h
branches/dlm-reco-mig/kapi-compat/include/journal_access.h
Modified:
branches/dlm-reco-mig/Config.make.in
branches/dlm-reco-mig/configure.in
branches/dlm-reco-mig/fs/ocfs2/Cscope.make
branches/dlm-reco-mig/fs/ocfs2/Makefile
branches/dlm-reco-mig/fs/ocfs2/aops.c
branches/dlm-reco-mig/fs/ocfs2/cluster/heartbeat.c
branches/dlm-reco-mig/fs/ocfs2/cluster/tcp.c
branches/dlm-reco-mig/fs/ocfs2/cluster/util.c
branches/dlm-reco-mig/fs/ocfs2/cluster/util.h
branches/dlm-reco-mig/fs/ocfs2/dlm/dlmmod.c
branches/dlm-reco-mig/fs/ocfs2/dlm/dlmmod.h
branches/dlm-reco-mig/fs/ocfs2/dlm/dlmthread.c
branches/dlm-reco-mig/fs/ocfs2/dlm/userdlm.c
branches/dlm-reco-mig/fs/ocfs2/dlm/util.c
branches/dlm-reco-mig/fs/ocfs2/dlm/util.h
branches/dlm-reco-mig/fs/ocfs2/dlmglue.c
branches/dlm-reco-mig/fs/ocfs2/dlmglue.h
branches/dlm-reco-mig/fs/ocfs2/file.c
branches/dlm-reco-mig/fs/ocfs2/file.h
branches/dlm-reco-mig/fs/ocfs2/heartbeat.c
branches/dlm-reco-mig/fs/ocfs2/heartbeat.h
branches/dlm-reco-mig/fs/ocfs2/journal.c
branches/dlm-reco-mig/fs/ocfs2/mmap.c
branches/dlm-reco-mig/fs/ocfs2/mmap.h
branches/dlm-reco-mig/fs/ocfs2/ocfs.h
branches/dlm-reco-mig/fs/ocfs2/proc.c
branches/dlm-reco-mig/fs/ocfs2/suballoc.c
branches/dlm-reco-mig/fs/ocfs2/super.c
branches/dlm-reco-mig/fs/ocfs2/util.c
branches/dlm-reco-mig/fs/ocfs2/util.h
branches/dlm-reco-mig/fs/ocfs2/vote.c
branches/dlm-reco-mig/fs/ocfs2/vote.h
branches/dlm-reco-mig/fs/usysfs/dir.c
branches/dlm-reco-mig/fs/usysfs/symlink.c
Log:
o Merged revision 1915:1938 from trunk
- [1938] thread cleanup
Modified: branches/dlm-reco-mig/Config.make.in
===================================================================
--- branches/dlm-reco-mig/Config.make.in 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/Config.make.in 2005-03-04 22:21:29 UTC (rev 1939)
@@ -62,6 +62,7 @@
EXTRA_CFLAGS += @KAPI_COMPAT_CFLAGS@
MISSING_SOCK_CREATE_LITE = @MISSING_SOCK_CREATE_LITE@
+JOURNAL_ACCESS_WITH_CREDITS = @JOURNAL_ACCESS_WITH_CREDITS@
COMPAT_SAFE_WRITE = @COMPAT_SAFE_WRITE@
Modified: branches/dlm-reco-mig/configure.in
===================================================================
--- branches/dlm-reco-mig/configure.in 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/configure.in 2005-03-04 22:21:29 UTC (rev 1939)
@@ -172,8 +172,10 @@
])
AC_MSG_RESULT($kernelsourcedir)
-KERNELINC="$kernelsourcedir/include"
+KERNELSRC="$kernelsourcedir"
+KERNELINC="$KERNELSRC/include"
+
if test ! -f "$KERNELINC/linux/autoconf.h"; then
AC_MSG_ERROR(No configured kernel include tree found)
fi
@@ -232,9 +234,9 @@
AC_MSG_CHECKING([for safe write ordering])
if test "x$KERNEL_26" = "xyes"; then
have_safe_write=yes
-elif egrep "EXPORT_SYMBOL.*\(generic_file_write_nolock\);" "$kernelsrc/kernel/ksyms.c" >/dev/null 2>&1; then
+elif egrep "EXPORT_SYMBOL.*\(generic_file_write_nolock\);" "$KERNELSRC/kernel/ksyms.c" >/dev/null 2>&1; then
have_safe_write=yes
-elif egrep "EXPORT_SYMBOL.*\(do_generic_file_write\);" "$kernelsrc/kernel/ksyms.c" >/dev/null 2>&1; then
+elif egrep "EXPORT_SYMBOL.*\(do_generic_file_write\);" "$KERNELSRC/kernel/ksyms.c" >/dev/null 2>&1; then
have_safe_write=yes
COMPAT_SAFE_WRITE=yes
else
@@ -263,7 +265,7 @@
fi
AC_SUBST(MODVERSIONS)
-CPPFLAGS="-I$KERNELINC/include $saved_CPPFLAGS"
+CPPFLAGS="-I$KERNELINC $saved_CPPFLAGS"
AC_MSG_CHECKING([for NPTL support])
if test "x$KERNEL_26" = "xyes"; then
@@ -328,6 +330,17 @@
AC_MSG_RESULT(no)
fi
+AC_MSG_CHECKING([for journal access functions with a credits pointer])
+KAPI_COMPAT_HEADERS="$KAPI_COMPAT_HEADERS journal_access.h"
+ JOURNAL_ACCESS_WITH_CREDITS=
+if grep "\<int \*credits);" "$KERNELINC/linux/jbd.h" >/dev/null 2>&1 ; then
+ AC_MSG_RESULT(yes)
+ JOURNAL_ACCESS_WITH_CREDITS=yes
+else
+ AC_MSG_RESULT(no)
+fi
+AC_SUBST(JOURNAL_ACCESS_WITH_CREDITS)
+
# using -include has two advantages:
# the source doesn't need to know to include compat headers
# the compat header file names don't go through the search path
Modified: branches/dlm-reco-mig/fs/ocfs2/Cscope.make
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/Cscope.make 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/Cscope.make 2005-03-04 22:21:29 UTC (rev 1939)
@@ -1,6 +1,6 @@
-CSCOPEFILES=*.c
+CSCOPEFILES=*.c *.h
CSCOPEFILES+=$(KERNELINC)/../fs/*.c $(KERNELINC)/../fs/jbd/*.c
-CSCOPEFILES+=cluster/*.c dlm/*.c
+CSCOPEFILES+=cluster/*.c dlm/*.c cluster/*.h dlm/*.h
cscope:
rm -f cscope.*
Modified: branches/dlm-reco-mig/fs/ocfs2/Makefile
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/Makefile 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/Makefile 2005-03-04 22:21:29 UTC (rev 1939)
@@ -30,6 +30,10 @@
EXTRA_CFLAGS += -DOCFS_COMPAT_SAFE_WRITE
endif
+ifdef JOURNAL_ACCESS_WITH_CREDITS
+EXTRA_CFLAGS += -DJOURNAL_ACCESS_WITH_CREDITS
+endif
+
ifneq ($(QUIET),1)
EXTRA_CFLAGS += -DVERBOSE_BH_JBD_TRACE
EXTRA_CFLAGS += -DVERBOSE_LOCKING_TRACE
@@ -51,6 +55,7 @@
SOURCES = \
24io.c \
+ aio.c \
alloc.c \
aops.c \
buffer_head_io.c \
@@ -86,6 +91,7 @@
ocfs_compat.h \
ocfs_journal.h \
buffer_head_io.h \
+ aio.h \
alloc.h \
dcache.h \
dir.h \
Copied: branches/dlm-reco-mig/fs/ocfs2/aio.c (from rev 1938, trunk/fs/ocfs2/aio.c)
Copied: branches/dlm-reco-mig/fs/ocfs2/aio.h (from rev 1938, trunk/fs/ocfs2/aio.h)
Modified: branches/dlm-reco-mig/fs/ocfs2/aops.c
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/aops.c 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/aops.c 2005-03-04 22:21:29 UTC (rev 1939)
@@ -576,8 +576,10 @@
LOG_ENTRY ();
/* blockdev_direct_IO checks alignment for us, using */
- ret = blockdev_direct_IO (rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, ocfs_direct_IO_get_blocks, NULL);
-
+ ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
+ inode->i_sb->s_bdev, iov, offset,
+ nr_segs, ocfs_direct_IO_get_blocks,
+ NULL);
LOG_EXIT_INT (ret);
LOG_CLEAR_CONTEXT();
Modified: branches/dlm-reco-mig/fs/ocfs2/cluster/heartbeat.c
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/cluster/heartbeat.c 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/cluster/heartbeat.c 2005-03-04 22:21:29 UTC (rev 1939)
@@ -48,7 +48,6 @@
#include <linux/socket.h>
#include <linux/inet.h>
#include <linux/in.h>
-#include <linux/module.h>
#include <linux/linkage.h>
#include <linux/time.h>
@@ -61,6 +60,7 @@
#include <linux/pagemap.h>
#include <linux/file.h>
#include <linux/bitops.h>
+#include <linux/kthread.h>
#include <asm/uaccess.h>
@@ -104,8 +104,6 @@
static LIST_HEAD(hb_net_groups);
static LIST_HEAD(hb_disk_groups);
static struct task_struct *hb_task = NULL;
-static struct completion hb_complete;
-static int hb_pid = -1;
static struct hb_callback {
struct list_head list;
@@ -369,65 +367,64 @@
return 0;
}
-
static int hb_thread(void *data)
{
- void *page;
-
- page = (void *) __get_free_page(GFP_KERNEL);
- if (!page)
- return -ENOMEM;
+ void *page = data;
- util_daemonize ("hb_thread", strlen("hb_thread"), 1);
- hb_task = current;
-
- while (1) {
+ hbprintk("hb thread running\n");
+
+ while (!kthread_should_stop()) {
hb_do_disk_heartbeat(page);
- /* when we can really tear down this can wait on a wait
- * queue */
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(msecs_to_jiffies(HB_THREAD_MS));
}
- flush_scheduled_work();
- complete (&hb_complete);
- hbprintk("quitting hb thread!!!!!!\n");
+ hbprintk("hb thread exiting\n");
+ free_page((unsigned long)page);
return 0;
}
/* Launch the hb thread for the mounted volume */
static int hb_launch_thread(void)
{
- hb_pid = -1;
- hb_task = NULL;
- init_completion (&hb_complete);
+ void *page;
+ int ret;
+ page = (void *)__get_free_page(GFP_KERNEL);
+ if (!page) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
hbprintk("starting hb thread...\n");
- hb_pid = kernel_thread (hb_thread, NULL,
- CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
- if (hb_pid < 0) {
- hbprintk("unable to launch hb thread, error=%d", hb_pid);
- return -EINVAL;
+ hb_task = kthread_run(hb_thread, page, "hb_thread");
+ if (IS_ERR(hb_task)) {
+ hb_task = NULL;
+ hbprintk("unable to launch hb thread, error=%ld",
+ PTR_ERR(hb_task));
+ ret = -EINVAL;
+ goto out;
}
- hbprintk("hb thread running...\n");
- return 0;
+
+ /* hb_thread is responsible for freeing the page if it runs */
+ page = NULL;
+ ret = 0;
+
+out:
+ if (page)
+ free_page((unsigned long)page);
+ return ret;
}
static void hb_complete_thread(void)
{
- hbprintk ("waiting for hb thread to exit....");
- send_sig (SIGINT, hb_task, 0);
- wait_for_completion (&hb_complete);
- hbprintk ("hb thread exited\n");
- hb_task = NULL;
+ if (hb_task) {
+ hbprintk("waiting for hb thread to exit\n");
+ kthread_stop(hb_task);
+ hb_task = NULL;
+ }
}
-
-
-
-
-
-
static int hb_init_disk_hb_group(struct inode *group, dev_t dev, u32 bits,
u32 blocks, u64 start)
{
Modified: branches/dlm-reco-mig/fs/ocfs2/cluster/tcp.c
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/cluster/tcp.c 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/cluster/tcp.c 2005-03-04 22:21:29 UTC (rev 1939)
@@ -68,7 +68,6 @@
* - find explicit stack call to drain rx queue
* - add trivial version trading message at the start of a conn
* - go nuts adding static
- * - move recv_sock into recv_thread
* - nsc waiting is buggy, should be on socket.. wake w/err if socket dies
* - compare socks in attach_sock so both size don't close
* - implement net_remove_handlers
@@ -94,6 +93,7 @@
#include <linux/blkdev.h>
#include <linux/proc_fs.h>
#include <linux/file.h>
+#include <linux/kthread.h>
#include <asm/uaccess.h>
@@ -145,7 +145,6 @@
#define sk_state_change state_change
#endif
-struct socket *recv_sock = NULL;
static u16 ip_version, ip_port;
static struct inode *net_inode = NULL;
static u8 net_node_num;
@@ -160,9 +159,7 @@
static spinlock_t net_active_lock = SPIN_LOCK_UNLOCKED;
static LIST_HEAD(net_active_list);
-static int net_recv_pid = -1;
static struct task_struct *net_recv_task = NULL;
-static struct completion net_recv_complete;
static inline void net_abort_status_return(net_status_ctxt *nsc)
{
@@ -180,11 +177,10 @@
static void __exit net_driver_exit (void);
static int net_add_handler(net_msg_handler *nmh);
static void net_remove_handlers(void);
-static int net_init_tcp_recv_sock(void);
+static struct socket *net_init_tcp_recv_sock(void);
static int net_receive_thread(void *data);
static int net_receive(void);
-static void net_try_accept(void);
-static void net_release_tcp_sock(void);
+static void net_try_accept(struct socket *sock);
static int net_process_message(struct socket *sock, net_msg *hdr);
static int net_ioctl (struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
@@ -358,32 +354,47 @@
static int net_startup(void)
{
- net_recv_pid = -1;
- net_recv_task = NULL;
- init_completion (&net_recv_complete);
+ struct socket *sock;
+ int ret = 0;
+ /* if the thread was setting up the rx socket we'd like to have it
+ * communicate errors back to us here. us setting up the socket
+ * and passing it to the thread is easier */
+ sock = net_init_tcp_recv_sock();
+ if (IS_ERR(sock)) {
+ ret = PTR_ERR(sock);
+ goto out;
+ }
+
netprintk0("starting net receive thread...\n");
- net_recv_pid = kernel_thread (net_receive_thread, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
- if (net_recv_pid < 0) {
- netprintk("unable to launch net receive thread, error=%d\n",
- net_recv_pid);
- net_shutdown();
- return -EINVAL;
+
+ net_recv_task = kthread_run(net_receive_thread, sock, "netrecv");
+ if (IS_ERR(net_recv_task)) {
+ ret = PTR_ERR(net_recv_task);
+ net_recv_task = NULL;
+ netprintk("unable to launch net receive thread, error=%ld\n",
+ (long)ret);
+ goto out;
}
- netprintk0("net thread running...\n");
+ /* once the thread is running it has ownership of the sock */
+ sock = NULL;
+
+out:
+ if (sock)
+ sock_release(sock);
return 0;
}
static void net_shutdown(void)
{
- netprintk ("waiting for net thread to exit....\n");
- send_sig (SIGINT, net_recv_task, 0);
- wait_for_completion (&net_recv_complete);
- netprintk ("net thread exited\n");
+ if (net_recv_task) {
+ netprintk("waiting for net thread to exit....\n");
+ kthread_stop(net_recv_task);
+ }
}
-static int net_rx_should_wake(void)
+static int net_rx_should_wake(struct socket *sock)
{
int empty;
@@ -391,42 +402,26 @@
empty = list_empty(&net_active_list);
spin_unlock_bh(&net_active_lock);
- return !empty || tcp_sk(recv_sock->sk)->accept_queue;
+ return !empty || tcp_sk(sock->sk)->accept_queue;
}
static int net_receive_thread(void *data)
{
- int status;
- DECLARE_WAITQUEUE(main_wait, current);
+ struct socket *sock = data;
- util_daemonize ("netrecv", strlen("netrecv"), 1);
- net_recv_task = current;
+ netprintk0("net thread running...\n");
- status = net_init_tcp_recv_sock();
- if (status >= 0 && recv_sock) {
- add_wait_queue_exclusive(recv_sock->sk->sk_sleep, &main_wait);
- while (1) {
- net_try_accept();
- net_receive();
+ while(!kthread_should_stop()) {
+ net_try_accept(sock);
+ net_receive();
- wait_event_interruptible(*recv_sock->sk->sk_sleep,
- net_rx_should_wake());
-
- if (signal_pending(current)) {
- netprintk0("net recv thread got signal!\n");
- break;
- }
- }
- remove_wait_queue(recv_sock->sk->sk_sleep, &main_wait);
- } else {
- netprintk0("failed to initialize net_thread!\n");
+ wait_event_interruptible(*sock->sk->sk_sleep,
+ net_rx_should_wake(sock) ||
+ kthread_should_stop());
}
- /* Flush all scheduled tasks */
- flush_scheduled_work();
- net_release_tcp_sock();
- net_recv_task = NULL;
- complete (&net_recv_complete);
+ netprintk("net thread exiting\n");
+ sock_release(sock);
return 0;
}
@@ -435,7 +430,7 @@
int net_register_handler(u32 msg_type, u32 key, int flags, u32 max_len,
net_msg_handler_func *func, void *data)
{
- net_msg_handler *nmh, *found=NULL;
+ net_msg_handler *nmh;
int ret;
if (max_len > NET_MAX_PAYLOAD_BYTES) {
@@ -473,7 +468,6 @@
ret = net_add_handler(nmh);
if (ret) {
- net_put_handler(found);
netprintk("message handler for type %u, key %u already exists!!!\n",
msg_type, key);
}
@@ -817,8 +811,8 @@
if (list_empty(&net->active_item))
list_add_tail(&net->active_item, &net_active_list);
- if (recv_sock != NULL)
- wake_up(recv_sock->sk->sk_sleep);
+ if (net_recv_task)
+ wake_up_process(net_recv_task);
}
/* teardown can race with these guys and stop them in their read lock..
@@ -1452,30 +1446,31 @@
return ret;
}
-static void net_try_accept(void)
+static void net_try_accept(struct socket *sock)
{
int error, slen;
struct sockaddr_in sin;
- struct socket *sock = NULL;
+ struct socket *new_sock = NULL;
struct inode *inode = NULL;
nm_node_inode_private *priv;
- BUG_ON(recv_sock == NULL);
- error = sock_create_lite(recv_sock->sk->sk_family,
- recv_sock->sk->sk_type,
- recv_sock->sk->sk_protocol,
- &sock);
+ BUG_ON(sock == NULL);
+ error = sock_create_lite(sock->sk->sk_family,
+ sock->sk->sk_type,
+ sock->sk->sk_protocol,
+ &new_sock);
if (error)
goto out;
- sock->type = recv_sock->type;
- sock->ops = recv_sock->ops;
- error = recv_sock->ops->accept(recv_sock, sock, O_NONBLOCK);
+ new_sock->type = sock->type;
+ new_sock->ops = sock->ops;
+ error = sock->ops->accept(sock, new_sock, O_NONBLOCK);
if (error < 0)
goto out;
slen = sizeof(sin);
- error = sock->ops->getname(sock, (struct sockaddr *) &sin, &slen, 1);
+ error = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin,
+ &slen, 1);
if (error < 0)
goto out;
@@ -1485,7 +1480,7 @@
inode = nm_get_node_by_ip(sin.sin_addr.s_addr);
if (inode == NULL) {
netprintk0("connect from unknown host...\n");
- net_send_error(sock, NET_UNKNOWN_HOST);
+ net_send_error(new_sock, NET_UNKNOWN_HOST);
goto out;
}
@@ -1499,15 +1494,15 @@
"%u.%u.%u.%u:%d\n", NIPQUAD(sin.sin_addr.s_addr),
ntohs(sin.sin_port));
- error = net_attach_sock(&priv->net, sock);
+ error = net_attach_sock(&priv->net, new_sock);
if (error == -EEXIST)
- net_send_error(sock, NET_ALREADY_CONNECTED);
+ net_send_error(new_sock, NET_ALREADY_CONNECTED);
out:
if (error) {
- if (sock) {
- net_sock_drain(sock);
- sock_release(sock);
+ if (new_sock) {
+ net_sock_drain(new_sock);
+ sock_release(new_sock);
}
if (inode)
iput(inode);
@@ -1515,14 +1510,15 @@
return;
}
-static int net_init_tcp_recv_sock(void)
+static struct socket *net_init_tcp_recv_sock(void)
{
struct sockaddr_in sin;
+ struct socket *sock;
int error;
error = sock_create(net_ip_version_to_family(ip_version),
SOCK_STREAM, IPPROTO_TCP,
- &recv_sock);
+ &sock);
if (error < 0) {
netprintk("unable to create socket, error=%d\n", error);
goto bail;
@@ -1533,8 +1529,8 @@
sin.sin_addr.s_addr = htonl(INADDR_ANY);
sin.sin_port = ip_port;
- error = recv_sock->ops->bind(recv_sock, (struct sockaddr *)&sin,
- sizeof(sin));
+ error = sock->ops->bind(sock, (struct sockaddr *)&sin,
+ sizeof(sin));
if (error < 0) {
netprintk ("unable to bind socket to port %d, error=%d\n",
ntohs(ip_port), error);
@@ -1542,24 +1538,18 @@
}
/* !!! dunno about these... */
- recv_sock->sk->sk_reuse = 1;
- error = recv_sock->ops->listen(recv_sock, 64);
+ sock->sk->sk_reuse = 1;
+ error = sock->ops->listen(sock, 64);
bail:
- if (error && recv_sock) {
- sock_release(recv_sock);
- recv_sock = NULL;
+ if (error) {
+ if (sock)
+ sock_release(sock);
+ sock = ERR_PTR(error);
}
- return error;
-} /* net_init_tcp_recv_sock */
-
-static void net_release_tcp_sock(void)
-{
- if (recv_sock) {
- sock_release(recv_sock);
- recv_sock = NULL;
- }
+ BUG_ON(sock == NULL);
+ return sock;
}
MODULE_LICENSE("GPL");
Modified: branches/dlm-reco-mig/fs/ocfs2/cluster/util.c
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/cluster/util.c 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/cluster/util.c 2005-03-04 22:21:29 UTC (rev 1939)
@@ -67,48 +67,6 @@
#endif
}
-/*
- * util_daemonize()
- *
- */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-/* yes, len is unused but kept here for backwards compatibility. */
-void util_daemonize (char *name, int len, int shutdown_sigs)
-{
- sigset_t tmpsig;
-
- daemonize (name);
-
- if (shutdown_sigs) {
- /* Unblock SIGKILL, SIGSTOP, SIGHUP and SIGINT */
- sigemptyset(&tmpsig);
- sigaddsetmask(&tmpsig, SHUTDOWN_SIGS);
- sigprocmask(SIG_UNBLOCK, &tmpsig, NULL);
- }
-
- return;
-} /* util_daemonize */
-#else
-void util_daemonize (char *name, int len, int shutdown_sigs)
-{
- daemonize ();
- reparent_to_init ();
-
- if (len > 0) {
- if (len > 15)
- BUG();
- strncpy (current->comm, name, len);
- current->comm[len] = '\0';
- }
-
- if (shutdown_sigs)
- util_block_sigs(NULL, SHUTDOWN_SIGS);
- else
- util_block_sigs(NULL, 0);
- return;
-} /* util_daemonize */
-#endif
-
/* prefetch has been declared to allow to build in debug mode */
#ifdef DEBUG
#ifndef ARCH_HAS_PREFETCH
Modified: branches/dlm-reco-mig/fs/ocfs2/cluster/util.h
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/cluster/util.h 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/cluster/util.h 2005-03-04 22:21:29 UTC (rev 1939)
@@ -40,7 +40,6 @@
} util_timeout;
void util_clear_timeout(util_timeout *to);
-void util_daemonize(char *name, int len, int shutdown_sigs);
void util_init_timeout(util_timeout *to);
void util_set_timeout(util_timeout *to, __u32 timeout);
void util_show_stack(unsigned long *esp);
@@ -75,25 +74,4 @@
void * util_rarray_idx_to_slot(util_rarray *arr, int idx);
int util_resize_rarray(util_rarray *arr, int newelem);
-#ifdef __KERNEL__
-typedef struct _util_thread_info
-{
- wait_queue_head_t thread_wq;
- atomic_t woken;
- struct task_struct *task;
- struct completion complete;
- int pid;
-} util_thread_info;
-
-
-static inline void util_thread_info_init(util_thread_info *info)
-{
- init_waitqueue_head(&info->thread_wq);
- atomic_set(&info->woken, 0);
- info->task = NULL;
- info->pid = -1;
- init_completion(&info->complete);
-}
-#endif /* __KERNEL__ */
-
#endif /* CLUSTER_UTIL_H */
Modified: branches/dlm-reco-mig/fs/ocfs2/dlm/dlmmod.c
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/dlm/dlmmod.c 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/dlm/dlmmod.c 2005-03-04 22:21:29 UTC (rev 1939)
@@ -576,8 +576,7 @@
/* if the network code had any unregister calls, they would be here. */
- if (dlm->thread.task)
- dlm_complete_thread(dlm);
+ dlm_complete_thread(dlm);
/* We've left the domain. Now we can take ourselves out of the
* list and allow the kref stuff to help us free the
@@ -650,8 +649,8 @@
INIT_LIST_HEAD(&dlm->reco.received);
INIT_LIST_HEAD(&dlm->master_list);
INIT_LIST_HEAD(&dlm->mle_hb_events);
- util_thread_info_init(&dlm->thread);
- util_thread_info_init(&dlm->reco.thread);
+ dlm->dlm_thread_task = NULL;
+ init_waitqueue_head(&dlm->dlm_thread_wq);
init_rwsem(&dlm->recovery_sem);
/* this eats the reference we got above. */
Modified: branches/dlm-reco-mig/fs/ocfs2/dlm/dlmmod.h
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/dlm/dlmmod.h 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/dlm/dlmmod.h 2005-03-04 22:21:29 UTC (rev 1939)
@@ -191,7 +191,6 @@
u8 dead_node;
u8 sending_node;
u32 next_seq;
- util_thread_info thread;
unsigned long node_map[BITS_TO_LONGS(NM_MAX_NODES)];
} dlm_recovery_ctxt;
@@ -209,7 +208,6 @@
spinlock_t spinlock;
struct rw_semaphore recovery_sem;
char *name;
- util_thread_info thread;
struct inode *group;
u32 key;
u8 group_index;
@@ -231,6 +229,8 @@
unsigned int num_joins;
struct hb_callback_func dlm_hb_up;
struct hb_callback_func dlm_hb_down;
+ struct task_struct *dlm_thread_task;
+ wait_queue_head_t dlm_thread_wq;
};
#define DLM_LOCK_RES_UNINITED 0x00000001
Modified: branches/dlm-reco-mig/fs/ocfs2/dlm/dlmthread.c
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/dlm/dlmthread.c 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/dlm/dlmthread.c 2005-03-04 22:21:29 UTC (rev 1939)
@@ -39,6 +39,7 @@
#include <linux/socket.h>
#include <linux/inet.h>
#include <linux/timer.h>
+#include <linux/kthread.h>
#include "util.h"
@@ -56,6 +57,7 @@
extern u8 dlm_global_index;
static int dlm_thread(void *data);
+struct task_struct *dlm_thread_task;
#define dlm_lock_is_remote(dlm, lock) ((lock)->ml.node != (dlm)->group_index)
@@ -245,50 +247,54 @@
spin_unlock(&dlm->spinlock);
}
- /* wake the dlm thread */
- atomic_set(&dlm->thread.woken, 1);
- wake_up(&dlm->thread.thread_wq);
+ wake_up(&dlm->dlm_thread_wq);
}
/* Launch the NM thread for the mounted volume */
int dlm_launch_thread(dlm_ctxt *dlm)
{
dlmprintk0("starting dlm thread...\n");
- dlm->thread.pid = kernel_thread (dlm_thread, dlm,
- CLONE_FS | CLONE_FILES |
- CLONE_SIGHAND);
- if (dlm->thread.pid < 0) {
- dlmprintk("unable to launch dlm thread, error=%d",
- dlm->thread.pid);
+
+ dlm->dlm_thread_task = kthread_run(dlm_thread, dlm, "dlm_thread");
+ if (IS_ERR(dlm->dlm_thread_task)) {
+ dlm->dlm_thread_task = NULL;
+ dlmprintk("unable to launch dlm thread, error=%ld",
+ PTR_ERR(dlm->dlm_thread_task));
return -EINVAL;
}
- dlmprintk("dlm thread running for %s...\n", dlm->name);
+
return 0;
}
void dlm_complete_thread(dlm_ctxt *dlm)
{
- dlmprintk0 ("waiting for dlm thread to exit....");
- send_sig (SIGINT, dlm->thread.task, 0);
- wait_for_completion (&dlm->thread.complete);
- dlmprintk0 ("dlm thread exited\n");
- dlm->thread.task = NULL;
+ if (dlm->dlm_thread_task) {
+ dlmprintk0("waiting for dlm thread to exit\n");
+ kthread_stop(dlm->dlm_thread_task);
+ dlm->dlm_thread_task = NULL;
+ }
}
+static int dlm_dirty_list_empty(dlm_ctxt *dlm)
+{
+ int empty;
+ spin_lock(&dlm->spinlock);
+ empty = list_empty(&dlm->dirty_list);
+ spin_unlock(&dlm->spinlock);
+
+ return empty;
+}
-
static int dlm_thread(void *data)
{
struct list_head *iter, *tmpiter;
dlm_lock_resource *res;
dlm_ctxt *dlm = data;
- util_daemonize ("dlm_thread", strlen("dlm_thread"), 1);
- dlm->thread.task = current;
+ dlmprintk("dlm thread running for %s...\n", dlm->name);
- while (1) {
- atomic_set(&dlm->thread.woken, 0);
+ while (!kthread_should_stop()) {
down_read(&dlm->recovery_sem);
spin_lock(&dlm->spinlock);
@@ -307,17 +313,11 @@
spin_unlock(&dlm->spinlock);
up_read(&dlm->recovery_sem);
- wait_event_interruptible(dlm->thread.thread_wq,
- atomic_read(&dlm->thread.woken));
-
- if (signal_pending(current)) {
- dlmprintk("DLM thread got signal while waiting\n");
- break;
- }
+ wait_event_interruptible(dlm->dlm_thread_wq,
+ !dlm_dirty_list_empty(dlm) ||
+ kthread_should_stop());
}
- flush_scheduled_work();
- complete (&dlm->thread.complete);
- dlmprintk0("quitting DLM thread!!!!!!\n");
+ dlmprintk0("quitting DLM thread\n");
return 0;
}
Modified: branches/dlm-reco-mig/fs/ocfs2/dlm/userdlm.c
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/dlm/userdlm.c 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/dlm/userdlm.c 2005-03-04 22:21:29 UTC (rev 1939)
@@ -170,9 +170,8 @@
if (!(lockres->l_flags & USER_LOCK_QUEUED)) {
user_dlm_grab_inode_ref(lockres);
- PREPARE_WORK(&lockres->l_work,
- user_dlm_unblock_lock,
- lockres);
+ INIT_WORK(&lockres->l_work, user_dlm_unblock_lock,
+ lockres);
queue_work(user_dlm_worker, &lockres->l_work);
lockres->l_flags |= USER_LOCK_QUEUED;
Modified: branches/dlm-reco-mig/fs/ocfs2/dlm/util.c
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/dlm/util.c 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/dlm/util.c 2005-03-04 22:21:29 UTC (rev 1939)
@@ -65,48 +65,6 @@
#endif
}
-/*
- * util_daemonize()
- *
- */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-/* yes, len is unused but kept here for backwards compatibility. */
-void util_daemonize (char *name, int len, int shutdown_sigs)
-{
- sigset_t tmpsig;
-
- daemonize (name);
-
- if (shutdown_sigs) {
- /* Unblock SIGKILL, SIGSTOP, SIGHUP and SIGINT */
- sigemptyset(&tmpsig);
- sigaddsetmask(&tmpsig, SHUTDOWN_SIGS);
- sigprocmask(SIG_UNBLOCK, &tmpsig, NULL);
- }
-
- return;
-} /* util_daemonize */
-#else
-void util_daemonize (char *name, int len, int shutdown_sigs)
-{
- daemonize ();
- reparent_to_init ();
-
- if (len > 0) {
- if (len > 15)
- BUG();
- strncpy (current->comm, name, len);
- current->comm[len] = '\0';
- }
-
- if (shutdown_sigs)
- util_block_sigs(NULL, SHUTDOWN_SIGS);
- else
- util_block_sigs(NULL, 0);
- return;
-} /* util_daemonize */
-#endif
-
/* prefetch has been declared to allow to build in debug mode */
#ifdef DEBUG
#ifndef ARCH_HAS_PREFETCH
Modified: branches/dlm-reco-mig/fs/ocfs2/dlm/util.h
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/dlm/util.h 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/dlm/util.h 2005-03-04 22:21:29 UTC (rev 1939)
@@ -43,7 +43,6 @@
} util_timeout;
void util_clear_timeout(util_timeout *to);
-void util_daemonize(char *name, int len, int shutdown_sigs);
void util_init_timeout(util_timeout *to);
void util_set_timeout(util_timeout *to, __u32 timeout);
void util_show_stack(unsigned long *esp);
@@ -78,25 +77,4 @@
void * util_rarray_idx_to_slot(util_rarray *arr, int idx);
int util_resize_rarray(util_rarray *arr, int newelem);
-#ifdef __KERNEL__
-typedef struct _util_thread_info
-{
- wait_queue_head_t thread_wq;
- atomic_t woken;
- struct task_struct *task;
- struct completion complete;
- int pid;
-} util_thread_info;
-
-
-static inline void util_thread_info_init(util_thread_info *info)
-{
- init_waitqueue_head(&info->thread_wq);
- atomic_set(&info->woken, 0);
- info->task = NULL;
- info->pid = -1;
- init_completion(&info->complete);
-}
-#endif /* __KERNEL__ */
-
#endif /* CLUSTER_UTIL_H */
Modified: branches/dlm-reco-mig/fs/ocfs2/dlmglue.c
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/dlmglue.c 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/dlmglue.c 2005-03-04 22:21:29 UTC (rev 1939)
@@ -31,6 +31,7 @@
#include <linux/mm.h>
#include <linux/smp_lock.h>
#include <linux/crc32.h>
+#include <linux/kthread.h>
#include <cluster/util.h>
#include <cluster/clcommon.h>
@@ -174,14 +175,16 @@
static int ocfs2_cluster_lock(ocfs_super *osb,
ocfs2_lock_res *lockres,
int level,
- int lkm_flags);
+ int lkm_flags,
+ ocfs2_lock_callback cb,
+ unsigned long cb_data);
static void ocfs2_cluster_unlock(ocfs_super *osb,
ocfs2_lock_res *lockres,
int level);
static inline void ocfs2_generic_handle_downconvert_action(ocfs2_lock_res *lockres);
static inline void ocfs2_generic_handle_convert_action(ocfs2_lock_res *lockres);
static inline void ocfs2_generic_handle_attach_action(ocfs2_lock_res *lockres);
-static void ocfs2_generic_handle_bast(ocfs2_lock_res *lockres, int level);
+static int ocfs2_generic_handle_bast(ocfs2_lock_res *lockres, int level);
static inline void ocfs2_handle_meta_convert_action(struct inode *inode,
ocfs2_lock_res *lockres);
static void ocfs2_inc_inode_seq(ocfs_super *osb,
@@ -323,6 +326,7 @@
res->l_type = type;
res->l_level = LKM_IVMODE;
INIT_LIST_HEAD(&res->l_blocked_list);
+ INIT_LIST_HEAD(&res->l_flag_cb_list);
res->l_priv = priv;
LOG_EXIT();
}
@@ -449,6 +453,37 @@
return new_level;
}
+/* XXX must be called with lockres->l_lock held */
+static void lockres_set_flags(ocfs2_lock_res *lockres, unsigned long newflags)
+{
+ struct list_head *pos, *tmp;
+ struct ocfs2_lockres_flag_callback *fcb;
+
+ lockres->l_flags = newflags;
+
+ list_for_each_safe(pos, tmp, &lockres->l_flag_cb_list) {
+ fcb = list_entry(pos, struct ocfs2_lockres_flag_callback,
+ fc_lockres_item);
+ if ((lockres->l_flags & fcb->fc_flag_mask) !=
+ fcb->fc_flag_goal)
+ continue;
+
+ list_del_init(&fcb->fc_lockres_item);
+ fcb->fc_cb(0, fcb->fc_data);
+ if (fcb->fc_free_once_called)
+ kfree(fcb);
+ }
+}
+
+static void lockres_or_flags(ocfs2_lock_res *lockres, unsigned long or)
+{
+ lockres_set_flags(lockres, lockres->l_flags | or);
+}
+static void lockres_clear_flags(ocfs2_lock_res *lockres, unsigned long clear)
+{
+ lockres_set_flags(lockres, lockres->l_flags & ~clear);
+}
+
static inline void ocfs2_generic_handle_downconvert_action(ocfs2_lock_res *lockres)
{
LOG_ENTRY();
@@ -462,9 +497,9 @@
if (lockres->l_level <=
ocfs2_highest_compat_lock_level(lockres->l_blocking)) {
lockres->l_blocking = LKM_NLMODE;
- lockres->l_flags &= ~OCFS2_LOCK_BLOCKED;
+ lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
}
- lockres->l_flags &= ~OCFS2_LOCK_BUSY;
+ lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
LOG_EXIT();
}
@@ -511,10 +546,10 @@
* *anything* however should mark ourselves as needing an
* update */
if (lockres->l_level == LKM_NLMODE)
- lockres->l_flags |= OCFS2_LOCK_NEEDS_REFRESH;
+ lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
lockres->l_level = lockres->l_requested;
- lockres->l_flags &= ~OCFS2_LOCK_BUSY;
+ lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
LOG_EXIT();
}
@@ -542,11 +577,11 @@
if (lockres->l_requested > LKM_NLMODE &&
!(lockres->l_flags & OCFS2_LOCK_LOCAL))
- lockres->l_flags |= OCFS2_LOCK_NEEDS_REFRESH;
+ lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
lockres->l_level = lockres->l_requested;
- lockres->l_flags |= OCFS2_LOCK_ATTACHED;
- lockres->l_flags &= ~OCFS2_LOCK_BUSY;
+ lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED);
+ lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
LOG_EXIT();
}
@@ -570,6 +605,7 @@
OCFS_ASSERT(ocfs2_is_inode_lock(lockres));
spin_lock(&lockres->l_lock);
+
lksb = &(lockres->l_lksb);
if (lksb->status != DLM_NORMAL) {
LOG_ERROR_ARGS("ocfs2_inode_ast_func: lksb status value of %u "
@@ -604,7 +640,7 @@
/* data locking ignores refresh flag for now. */
if (lockres->l_type == OCFS_TYPE_DATA)
- lockres->l_flags &= ~OCFS2_LOCK_NEEDS_REFRESH;
+ lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
/* set it to something invalid so if we get called again we
* can catch it. */
@@ -615,17 +651,29 @@
LOG_EXIT();
}
-static void ocfs2_generic_handle_bast(ocfs2_lock_res *lockres, int level)
+static int ocfs2_generic_handle_bast(ocfs2_lock_res *lockres, int level)
{
+ int needs_downconvert = 0;
LOG_ENTRY();
- spin_lock(&lockres->l_lock);
+ assert_spin_locked(&lockres->l_lock);
+
lockres->l_flags |= OCFS2_LOCK_BLOCKED;
- if (level > lockres->l_blocking)
+
+ if (level > lockres->l_blocking) {
+ /* only schedule a downconvert if we haven't already scheduled
+ * one that goes low enough to satisfy the level we're
+ * blocking. this also catches the case where we get
+ * duplicate BASTs */
+ if (ocfs2_highest_compat_lock_level(level) <
+ ocfs2_highest_compat_lock_level(lockres->l_blocking))
+ needs_downconvert = 1;
+
lockres->l_blocking = level;
- spin_unlock(&lockres->l_lock);
+ }
- LOG_EXIT();
+ LOG_EXIT_STATUS(needs_downconvert);
+ return needs_downconvert;
}
static void ocfs2_inode_bast_func(void *opaque, int level)
@@ -633,6 +681,7 @@
ocfs2_lock_res *lockres = opaque;
struct inode *inode;
ocfs_super *osb;
+ int needs_downconvert;
LOG_ENTRY();
@@ -646,13 +695,18 @@
OCFS_ASSERT(level > LKM_NLMODE);
- ocfs2_generic_handle_bast(lockres, level);
+ spin_lock(&lockres->l_lock);
- ocfs2_schedule_blocked_inode_lock(inode, lockres);
+ needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
+ if (needs_downconvert)
+ ocfs2_schedule_blocked_inode_lock(inode, lockres);
+
+ spin_unlock(&lockres->l_lock);
+
ocfs2_kick_vote_thread(osb);
-
/* TODO: Is a wake_up call here really necessary? */
wake_up(&lockres->l_event);
+
LOG_EXIT();
}
@@ -701,15 +755,19 @@
{
ocfs2_lock_res *lockres = opaque;
ocfs_super *osb;
+ int needs_downconvert;
LOG_ENTRY();
osb = ocfs2_lock_res_super(lockres);
dprintk("Superblock BAST fired\n");
- ocfs2_generic_handle_bast(lockres, level);
+ spin_lock(&lockres->l_lock);
+ needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
+ if (needs_downconvert)
+ ocfs2_schedule_blocked_lock(osb, lockres);
+ spin_unlock(&lockres->l_lock);
- ocfs2_schedule_blocked_lock(osb, lockres);
ocfs2_kick_vote_thread(osb);
wake_up(&lockres->l_event);
@@ -721,7 +779,7 @@
{
LOG_ENTRY();
spin_lock(&lockres->l_lock);
- lockres->l_flags &= ~OCFS2_LOCK_BUSY;
+ lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
if (convert)
lockres->l_action = OCFS2_AST_INVALID;
else
@@ -751,7 +809,7 @@
lockres->l_action = OCFS2_AST_ATTACH;
lockres->l_requested = level;
- lockres->l_flags |= OCFS2_LOCK_BUSY;
+ lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
spin_unlock(&lockres->l_lock);
status = dlmlock(osb->dlm,
@@ -814,6 +872,18 @@
LOG_EXIT();
}
+static void lockres_add_flag_callback(ocfs2_lock_res *lockres,
+ struct ocfs2_lockres_flag_callback *fcb,
+ unsigned long mask, unsigned long goal)
+{
+ BUG_ON(!list_empty(&fcb->fc_lockres_item));
+ BUG_ON(fcb->fc_cb == NULL);
+
+ list_add_tail(&fcb->fc_lockres_item, &lockres->l_flag_cb_list);
+ fcb->fc_flag_mask = mask;
+ fcb->fc_flag_goal = goal;
+}
+
/* predict what lock level we'll be dropping down to on behalf
* of another node, and return true if the currently wanted
* level will be compatible with it. */
@@ -825,21 +895,58 @@
return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking);
}
+/* these are generic and could be used elsewhere */
+struct ocfs2_status_completion {
+ int sc_status;
+ struct completion sc_complete;
+};
+
+static void ocfs2_status_completion_cb(int rc, unsigned long data)
+{
+ struct ocfs2_status_completion *sc;
+
+ sc = (struct ocfs2_status_completion *)data;
+ sc->sc_status = rc;
+ complete(&sc->sc_complete);
+}
+
static int ocfs2_cluster_lock(ocfs_super *osb,
ocfs2_lock_res *lockres,
int level,
- int lkm_flags)
+ int lkm_flags,
+ ocfs2_lock_callback cb,
+ unsigned long cb_data)
{
+ struct ocfs2_lockres_flag_callback _fcb, *fcb = &_fcb;
+ struct ocfs2_status_completion sc;
+ dlm_status status;
int ret;
int catch_signals = 1;
- dlm_status status;
LOG_ENTRY();
+ if (cb != NULL) {
+ fcb = kmalloc(sizeof(*fcb), GFP_NOFS);
+ if (fcb == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ fcb->fc_cb = cb;
+ fcb->fc_data = cb_data;
+ fcb->fc_free_once_called = 1;
+ } else {
+ init_completion(&sc.sc_complete);
+ fcb->fc_cb = ocfs2_status_completion_cb;
+ fcb->fc_data = (unsigned long)≻
+ fcb->fc_free_once_called = 0;
+ }
+
+ INIT_LIST_HEAD(&fcb->fc_lockres_item);
+
again:
if (catch_signals && signal_pending(current)) {
ret = -EINTR;
- goto bail;
+ goto out;
}
spin_lock(&lockres->l_lock);
@@ -851,10 +958,9 @@
level > lockres->l_level) {
/* is someone sitting in dlm_lock? If so, wait on
* them. */
- spin_unlock(&lockres->l_lock);
-
- ocfs2_wait_on_busy_lock(lockres);
- goto again;
+ lockres_add_flag_callback(lockres, fcb, OCFS2_LOCK_BUSY, 0);
+ ret = -EIOCBQUEUED;
+ goto unlock;
}
if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
@@ -863,7 +969,7 @@
ret = ocfs2_lock_create(osb, lockres, LKM_NLMODE, 0);
if (ret < 0) {
LOG_ERROR_STATUS(ret);
- goto bail;
+ goto out;
}
goto again;
}
@@ -872,10 +978,9 @@
!ocfs2_may_continue_on_blocked_lock(lockres, level)) {
/* is the lock is currently blocked on behalf of
* another node */
- spin_unlock(&lockres->l_lock);
-
- ocfs2_wait_on_blocked_lock(lockres);
- goto again;
+ lockres_add_flag_callback(lockres, fcb, OCFS2_LOCK_BLOCKED, 0);
+ ret = -EIOCBQUEUED;
+ goto unlock;
}
if (level > lockres->l_level) {
@@ -908,27 +1013,37 @@
ret = -ENOENT;
}
ocfs2_recover_from_dlm_error(lockres, 1);
- goto bail;
+ goto out;
}
dprintk("lock %s, successfull return from dlmlock\n",
lockres->l_name);
- ocfs2_wait_on_busy_lock(lockres);
-
/* At this point we've gone inside the dlm and need to
* complete our work regardless. */
catch_signals = 0;
+
+ /* wait for busy to clear and carry on */
goto again;
}
/* Ok, if we get here then we're good to go. */
ocfs2_inc_holders(lockres, level);
+ ret = 0;
+unlock:
spin_unlock(&lockres->l_lock);
+out:
+ if (ret == -EIOCBQUEUED && fcb->fc_cb == ocfs2_status_completion_cb) {
+ wait_for_completion(&sc.sc_complete);
+ ret = sc.sc_status;
+ if (ret == 0)
+ goto again;
+ }
- ret = 0;
-bail:
+ if (ret && fcb != NULL && fcb != &_fcb)
+ kfree(fcb);
+
LOG_EXIT_STATUS(ret);
return ret;
}
@@ -972,10 +1087,10 @@
lockres = &OCFS_I(inode)->ip_meta_lockres;
OCFS_ASSERT(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
- lockres->l_flags |= OCFS2_LOCK_LOCAL;
+ lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
status = ocfs2_lock_create(osb, lockres, LKM_EXMODE, LKM_LOCAL);
- lockres->l_flags &= ~OCFS2_LOCK_LOCAL;
+ lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
@@ -983,10 +1098,10 @@
lockres = &OCFS_I(inode)->ip_data_lockres;
OCFS_ASSERT(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
- lockres->l_flags |= OCFS2_LOCK_LOCAL;
+ lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
status = ocfs2_lock_create(osb, lockres, LKM_EXMODE, LKM_LOCAL);
- lockres->l_flags &= ~OCFS2_LOCK_LOCAL;
+ lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
@@ -1015,7 +1130,8 @@
level = write ? LKM_EXMODE : LKM_PRMODE;
- status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0);
+ status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0,
+ NULL, 0);
if (status < 0 && status != -EINTR)
LOG_ERROR_STATUS(status);
@@ -1209,7 +1325,7 @@
}
/* Ok, I'll be the one to refresh this lock. */
- lockres->l_flags |= OCFS2_LOCK_REFRESHING;
+ lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING);
spin_unlock(&lockres->l_lock);
status = 1;
@@ -1226,9 +1342,9 @@
LOG_ENTRY();
spin_lock(&lockres->l_lock);
- lockres->l_flags &= ~OCFS2_LOCK_REFRESHING;
+ lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING);
if (!status)
- lockres->l_flags &= ~OCFS2_LOCK_NEEDS_REFRESH;
+ lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
spin_unlock(&lockres->l_lock);
wake_up(&lockres->l_event);
@@ -1298,22 +1414,24 @@
return status;
}
-int ocfs2_meta_lock_flags(struct inode *inode,
- ocfs_journal_handle *handle,
- struct buffer_head **ret_bh,
- int ex,
- int flags)
+/*
+ * returns < 0 error if the callback will never be called, otherwise
+ * the result of the lock will be communicated via the callback.
+ */
+int ocfs2_meta_lock_flags_async(struct inode *inode,
+ struct buffer_head **ret_bh,
+ int ex,
+ int flags,
+ ocfs2_lock_callback cb,
+ unsigned long cb_data)
{
int status, level, dlm_flags;
ocfs2_lock_res *lockres;
ocfs_super *osb = OCFS2_SB(inode->i_sb);
- struct buffer_head *bh = NULL;
+ struct buffer_head *local_bh = NULL;
OCFS_ASSERT(inode);
- if (handle && !ex)
- BUG();
-
LOG_ENTRY();
dprintk("inode %llu, take %s META lock\n", OCFS_I(inode)->ip_blkno,
@@ -1333,7 +1451,8 @@
if (flags & OCFS2_META_LOCK_NOQUEUE)
dlm_flags |= LKM_NOQUEUE;
- status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags);
+ status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, cb,
+ cb_data);
if (status < 0) {
if (status != -EINTR && status != -EAGAIN)
LOG_ERROR_STATUS(status);
@@ -1349,15 +1468,46 @@
ocfs_node_map_is_empty(osb,
&osb->recovery_map));
- status = ocfs2_meta_lock_update(inode, &bh);
+ /* it's pretty weak to do this possibly sync read here, but until
+ * we have a real async version of it it's as good a place as any */
+ if (ret_bh == NULL)
+ ret_bh = &local_bh;
+ status = ocfs2_meta_lock_update(inode, ret_bh);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
}
+bail:
+ if (local_bh)
+ brelse(local_bh);
+ LOG_EXIT_STATUS(status);
+ return status;
+}
+
+/* grabs the meta lock synchronusly. */
+int ocfs2_meta_lock_flags(struct inode *inode,
+ ocfs_journal_handle *handle,
+ struct buffer_head **ret_bh,
+ int ex,
+ int flags)
+{
+ struct buffer_head *bh = NULL;
+ int status;
+
+ LOG_ENTRY();
+
+ BUG_ON(handle && !ex);
+
+ status = ocfs2_meta_lock_flags_async(inode, ret_bh, ex, flags,
+ NULL, 0);
+ if (status)
+ goto bail;
+
if (ret_bh && !bh) {
/* caller wants a buffer head but we haven't read it yet. */
- status = ocfs_read_block(osb, OCFS_I(inode)->ip_blkno, &bh,
+ status = ocfs_read_block(OCFS2_SB(inode->i_sb),
+ OCFS_I(inode)->ip_blkno, &bh,
OCFS_BH_CACHED, inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
@@ -1407,7 +1557,7 @@
LOG_ENTRY();
- status = ocfs2_cluster_lock(osb, lockres, level, 0);
+ status = ocfs2_cluster_lock(osb, lockres, level, 0, NULL, 0);
if (status < 0) {
if (status != -EINTR)
LOG_ERROR_STATUS(status);
@@ -1449,22 +1599,21 @@
int ocfs2_dlm_init(ocfs_super *osb)
{
- int status, pid;
+ int status;
u32 dlm_key;
dlm_ctxt *dlm = NULL;
LOG_ENTRY();
/* launch vote thread */
- init_completion (&osb->vote_event_init);
- pid = kernel_thread(ocfs2_vote_thread, osb,
- CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
- if (pid < 0) {
- status = pid;
+ osb->vote_task = kthread_run(ocfs2_vote_thread, osb, "ocfs2vote-%d",
+ osb->osb_id);
+ if (IS_ERR(osb->vote_task)) {
+ status = PTR_ERR(osb->vote_task);
+ osb->vote_task = NULL;
LOG_ERROR_STATUS(status);
goto bail;
}
- wait_for_completion(&osb->vote_event_init);
/* used by the dlm code to make message headers unique, each
* node in this domain must agree on this. */
@@ -1492,24 +1641,14 @@
void ocfs2_dlm_shutdown(ocfs_super *osb)
{
- int wait_on_vote_task = 0;
-
LOG_ENTRY();
ocfs2_drop_super_lock(osb);
- /* needs to be able to deal with the dlm being in many
- * different states. */
- spin_lock(&osb->vote_task_lock);
if (osb->vote_task) {
- osb->vote_exit = 1;
- ocfs2_kick_vote_thread(osb);
- wait_on_vote_task = 1;
+ kthread_stop(osb->vote_task);
+ osb->vote_task = NULL;
}
- spin_unlock(&osb->vote_task_lock);
- if (wait_on_vote_task)
- wait_for_completion(&osb->vote_event_complete);
-
ocfs2_lock_res_free(&osb->super_lockres);
dlm_unregister_domain(osb->dlm);
LOG_EXIT();
@@ -1539,7 +1678,7 @@
BUG();
}
lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
- lockres->l_flags &= ~OCFS2_LOCK_BUSY;
+ lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
spin_unlock(&lockres->l_lock);
wake_up(&lockres->l_event);
@@ -1566,14 +1705,14 @@
goto bail;
}
- lockres->l_flags &= ~OCFS2_LOCK_ATTACHED;
+ lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED);
/* make sure we never get here while waiting for an ast to
* fire. */
OCFS_ASSERT(lockres->l_action == OCFS2_AST_INVALID);
/* is this necessary? */
- lockres->l_flags |= OCFS2_LOCK_BUSY;
+ lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK;
spin_unlock(&lockres->l_lock);
@@ -1676,7 +1815,7 @@
lockres->l_action = OCFS2_AST_DOWNCONVERT;
lockres->l_requested = new_level;
- lockres->l_flags |= OCFS2_LOCK_BUSY;
+ lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
spin_unlock(&lockres->l_lock);
if (lvb)
Modified: branches/dlm-reco-mig/fs/ocfs2/dlmglue.h
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/dlmglue.h 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/dlmglue.h 2005-03-04 22:21:29 UTC (rev 1939)
@@ -103,6 +103,12 @@
struct buffer_head **ret_bh,
int ex,
int flags);
+int ocfs2_meta_lock_flags_async(struct inode *inode,
+ struct buffer_head **ret_bh,
+ int ex,
+ int flags,
+ ocfs2_lock_callback cb,
+ unsigned long cb_data);
void ocfs2_meta_unlock(struct inode *inode,
int ex);
int ocfs2_super_lock(ocfs_super *osb,
Modified: branches/dlm-reco-mig/fs/ocfs2/file.c
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/file.c 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/file.c 2005-03-04 22:21:29 UTC (rev 1939)
@@ -38,6 +38,7 @@
#include "ocfs.h"
#include "ocfs2.h"
+#include "aio.h"
#include "alloc.h"
#include "dir.h"
#include "dlmglue.h"
@@ -174,6 +175,39 @@
return (err < 0) ? -EIO : 0;
} /* ocfs_sync_file */
+static void ocfs2_update_inode_size(struct inode *inode,
+ u64 new_size)
+{
+ struct super_block *sb = inode->i_sb;
+
+ i_size_write(inode, new_size);
+ inode->i_blocks = (new_size + sb->s_blocksize - 1) >>
+ sb->s_blocksize_bits;
+}
+
+void ocfs2_file_finish_extension(struct inode *inode, loff_t newsize,
+ unsigned should_zero)
+{
+ LOG_TRACE_STR("Generic_file_write ok, asking for OIN update now");
+ ocfs2_update_inode_size(inode, newsize);
+
+ if (!should_zero) {
+ /*
+ * This leaves dirty data in holes.
+ * Caveat Emptor.
+ */
+ OCFS_I(inode)->ip_mmu_private = newsize;
+ } else {
+ int status = ocfs2_zero_extend(inode);
+ /*
+ * Don't overwrite the result of
+ * generic_file_write
+ */
+ if (status)
+ LOG_ERROR_ARGS("Unable to pre-zero extension of inode (%d)", status);
+ }
+}
+
/*
* ocfs_file_write()
* Linux 2.6 TODO: Remove all O_DIRECT conditionals here, they are no longer
@@ -183,18 +217,11 @@
size_t count, loff_t *ppos)
{
int ret = 0;
- int extended = 0;
ocfs_super *osb = NULL;
struct dentry *dentry = filp->f_dentry;
struct inode *inode = dentry->d_inode;
- int status;
- u64 newsize;
- struct super_block *sb = inode->i_sb;
- int do_direct_io = 0;
int sector_size;
- int have_i_sem = 0;
- int level = filp->f_flags & O_APPEND;
- loff_t saved_ppos;
+ struct ocfs2_write_lock_info info = {0, };
DECLARE_BUFFER_LOCK_CTXT(ctxt);
LOG_SET_CONTEXT(WRITE);
@@ -219,137 +246,11 @@
osb = OCFS_SB(inode->i_sb);
sector_size = 1 << osb->s_sectsize_bits;
- down(&inode->i_sem);
- have_i_sem = 1;
-
- ret = ocfs2_setup_io_locks(inode->i_sb, inode, (char *) buf,
- count, &ctxt);
- if (ret < 0) {
- LOG_ERROR_STATUS(ret);
+ ret = ocfs_write_lock_maybe_extend(filp, buf, count, ppos, &info,
+ &ctxt);
+ if (ret)
goto bail;
- }
- /* This will lock everyone in the context who's order puts
- * them before us. */
- ret = ocfs2_lock_buffer_inodes(&ctxt, inode);
- if (ret < 0) {
- if (ret != -EINTR)
- LOG_ERROR_STATUS(ret);
- goto bail;
- }
-
- ctxt.b_lock_data_write = 1;
-lock:
- status = ocfs2_meta_lock(inode, NULL, NULL, level);
- if (status < 0) {
- if (status != -EINTR)
- LOG_ERROR_STATUS(status);
- ret = status;
- goto bail;
- }
- /* to handle extending writes, we do a bit of our own locking
- * here, but we setup the ctxt do unlock for us (as well as
- * handle locking everything else. */
- if (level)
- ctxt.b_lock_meta_write = 1;
-
- /* work on a copy of ppos until we're sure that we won't have
- * to recalculate it due to relocking. */
- saved_ppos = *ppos;
-
- if (filp->f_flags & O_APPEND) {
- saved_ppos = i_size_read(inode);
- LOG_TRACE_ARGS("O_APPEND: inode->i_size=%llu\n", saved_ppos);
-
- /* ugh, work around some applications which open
- * everything O_DIRECT + O_APPEND and really don't
- * mean to use O_DIRECT. */
-#warning this is wrong wrong wrong
- filp->f_flags &= ~O_DIRECT;
- }
-
- if (filp->f_flags & O_DIRECT) {
- /* anything special for o_direct? */
- LOG_TRACE_STR ("O_DIRECT");
- if ((saved_ppos & (sector_size - 1)) ||
- (count & (sector_size - 1)) ||
- ((unsigned long)buf & (sector_size - 1))) {
- do_direct_io = 0;
- filp->f_flags |= O_SYNC;
- } else {
- do_direct_io = 1;
- }
- }
- ctxt.b_lock_direct = do_direct_io;
-
- newsize = count + saved_ppos;
- if (filp->f_flags & O_APPEND)
- newsize = count + i_size_read(inode);
-
- LOG_TRACE_ARGS ("ppos=%llu newsize=%llu cursize=%llu\n",
- saved_ppos, newsize, i_size_read(inode));
-
- if (newsize > i_size_read(inode)) {
- if (!level) {
- /* we want an extend, but need a higher
- * level cluster lock. */
- LOG_TRACE_ARGS("inode %llu, had a PR, looping back "
- "for EX\n", OCFS_I(inode)->ip_blkno);
- ocfs2_meta_unlock(inode, level);
- level = 1;
- goto lock;
- }
- extended = 1;
-
- LOG_TRACE_ARGS("Writing at EOF, will need more allocation: "
- "i_size=%llu, need=%llu\n",
- i_size_read(inode), newsize);
-
- status = ocfs_extend_file(osb, inode, newsize);
- if (status < 0) {
- if (status != -EINTR && status != -ENOSPC) {
- LOG_ERROR_STATUS (status);
- LOG_ERROR_ARGS ("Failed to extend file from "
- "%llu to %llu",
- *ppos, newsize);
- ret = -ENOSPC;
- } else
- ret = status;
-
- ocfs2_meta_unlock(inode, level);
- goto bail_unlock;
- }
- }
-
- /* we've got whatever cluster lock is appropriate now, so we
- * can stuff *ppos back. */
- *ppos = saved_ppos;
-
- if (!do_direct_io) {
- status = ocfs2_data_lock(inode, 1);
- if (status < 0) {
- if (status != -EINTR)
- LOG_ERROR_STATUS(status);
- ret = status;
-
- ocfs2_meta_unlock(inode, level);
- goto bail_unlock;
- }
- }
-
- /* Alright, fool the io locking stuff into thinking it's
- * handled our inode for us. We can now count on it to do the
- * unlock for us. */
- ctxt.b_target->ba_locked = 1;
-
- /* This will lock everyone who's order puts them *after* our inode. */
- ret = ocfs2_lock_buffer_inodes(&ctxt, NULL);
- if (ret < 0) {
- if (ret != -EINTR)
- LOG_ERROR_STATUS(ret);
- goto bail_unlock;
- }
-
down_read(&OCFS_I(inode)->ip_alloc_sem);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
/*
@@ -363,7 +264,7 @@
/* ick. seems to be our only way of toggling
* directio for 2.6 */
unsigned int savedflags = filp->f_flags;
- if (do_direct_io)
+ if (info.wl_do_direct_io)
filp->f_flags |= O_DIRECT;
else
filp->f_flags &= ~O_DIRECT;
@@ -371,46 +272,28 @@
filp->f_flags = savedflags;
}
#else
- if (do_direct_io)
+ if (info.wl_do_direct_io)
ret = ocfs_rw_direct (WRITE, filp, (char *) buf, count, ppos);
else
ret = generic_file_write_nolock (filp, buf, count, ppos);
#endif
up_read(&OCFS_I(inode)->ip_alloc_sem);
- if (extended) {
- LOG_TRACE_STR
- ("Generic_file_write ok, asking for OIN update now");
- i_size_write(inode, newsize);
- inode->i_blocks = (newsize + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
- if (do_direct_io) {
- /*
- * This leaves dirty data in holes.
- * Caveat Emptor.
- */
- OCFS_I(inode)->ip_mmu_private = i_size_read(inode);
- } else {
- status = ocfs2_zero_extend(inode);
- /*
- * Don't overwrite the result of
- * generic_file_write
- */
- if (status)
- LOG_ERROR_ARGS("Unable to pre-zero extension of inode (%d)", status);
- }
- }
-
-bail_unlock:
- ocfs2_unlock_buffer_inodes(&ctxt);
-
bail:
- if (have_i_sem)
+ /* we might have to finish up extentions that were performed before
+ * an error was returned by, say, data locking */
+ if (info.wl_extended)
+ ocfs2_file_finish_extension(inode, info.wl_newsize,
+ !info.wl_do_direct_io);
+ if (info.wl_unlock_ctxt)
+ ocfs2_unlock_buffer_inodes(&ctxt);
+ if (info.wl_have_i_sem)
up(&inode->i_sem);
LOG_EXIT_INT (ret);
LOG_CLEAR_CONTEXT();
return ret;
-} /* ocfs_file_write */
+}
/*
* ocfs_file_read()
@@ -424,6 +307,7 @@
ocfs_super *osb = NULL;
struct dentry *dentry = filp->f_dentry;
struct inode *inode = dentry->d_inode;
+ ocfs2_backing_inode *target_binode;
int status = 0;
int do_direct_io = 0;
int sector_size;
@@ -454,14 +338,16 @@
do_direct_io = 1;
}
}
- ctxt.b_lock_direct = do_direct_io;
- ret = ocfs2_setup_io_locks(inode->i_sb, inode, buf, count, &ctxt);
+ ret = ocfs2_setup_io_locks(inode->i_sb, inode, buf, count, &ctxt,
+ &target_binode);
if (ret < 0) {
LOG_ERROR_STATUS(status);
goto bail;
}
+ target_binode->ba_lock_data = do_direct_io ? 0 : 1;
+
ret = ocfs2_lock_buffer_inodes(&ctxt, NULL);
if (ret < 0) {
if (ret != -EINTR)
@@ -516,8 +402,8 @@
.open = ocfs_file_open,
.ioctl = ocfs_ioctl,
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
- .aio_read = generic_file_aio_read,
- .aio_write = generic_file_aio_write,
+ .aio_read = ocfs2_file_aio_read,
+ .aio_write = ocfs2_file_aio_write,
#else
.aio_read = ocfs_aio_read,
.aio_write = ocfs_aio_write,
@@ -867,10 +753,15 @@
*
* Ok, this function is heavy on the goto's - we need to clean it up a
* bit.
+ *
+ * *bytes_extended is a measure of how much was added to
+ * dinode->i_size, NOT how much allocated was actually added to the
+ * file. It will always be correct, even when we return an error.
*/
int ocfs_extend_file(ocfs_super *osb,
- struct inode *inode,
- u64 new_i_size)
+ struct inode *inode,
+ u64 new_i_size,
+ u64 *bytes_extended)
{
int status = 0;
int restart_func = 0;
@@ -879,6 +770,7 @@
int credits, num_free_extents;
unsigned int overalloc_bits = 0;
u32 clusters_to_add;
+ u64 new_fe_size;
struct buffer_head *bh = NULL;
ocfs2_dinode *fe;
ocfs_journal_handle *handle = NULL;
@@ -888,6 +780,8 @@
LOG_ENTRY_ARGS("(new_i_size=%llu)\n", new_i_size);
+ *bytes_extended = 0;
+
/* setattr sometimes calls us like this. */
if (new_i_size == 0)
goto leave;
@@ -908,7 +802,7 @@
fe = (ocfs2_dinode *) bh->b_data;
OCFS_ASSERT(IS_VALID_FILE_ENTRY(fe));
- OCFS_ASSERT(i_size_read(inode) == fe->i_size);
+ OCFS_ASSERT(i_size_read(inode) == (fe->i_size - *bytes_extended));
OCFS_ASSERT(new_i_size >= i_size_read(inode));
if (i_size_read(inode) == new_i_size)
@@ -1019,11 +913,13 @@
} else {
OCFS_ASSERT(why == RESTART_TRANS);
+ new_fe_size = ocfs2_clusters_to_bytes(osb->sb,
+ fe->i_clusters);
+ *bytes_extended += new_fe_size - fe->i_size;
/* update i_size in case we crash after the
* extend_trans */
- fe->i_size =
- ocfs2_clusters_to_bytes(osb->sb,
- fe->i_clusters);
+ fe->i_size = new_fe_size;
+
fe->i_mtime = OCFS_CURRENT_TIME;
status = ocfs_journal_dirty(handle, bh);
@@ -1056,11 +952,13 @@
no_alloc:
/* this may not be the end of our allocation so only update
* i_size to what's appropriate. */
- if (new_i_size > ocfs2_clusters_to_bytes(osb->sb, fe->i_clusters))
- fe->i_size = ocfs2_clusters_to_bytes(osb->sb, fe->i_clusters);
- else
- fe->i_size = new_i_size;
+ new_fe_size = ocfs2_clusters_to_bytes(osb->sb, fe->i_clusters);
+ if (new_i_size < new_fe_size)
+ new_fe_size = new_i_size;
+ *bytes_extended += new_fe_size - fe->i_size;
+ fe->i_size = new_fe_size;
+
LOG_TRACE_ARGS("fe: i_clusters = %u, i_size=%llu\n",
fe->i_clusters, fe->i_size);
@@ -1100,6 +998,7 @@
restart_func = 0;
goto restart_all;
}
+
LOG_EXIT_STATUS (status);
return status;
} /* ocfs_extend_file */
@@ -1112,7 +1011,7 @@
{
int status = 0;
int unlock = 0;
- u64 newsize;
+ u64 newsize, bytes_added;
struct inode *inode = dentry->d_inode;
struct super_block *sb = inode->i_sb;
ocfs_super *osb = OCFS2_SB(sb);
@@ -1156,19 +1055,32 @@
if (attr->ia_valid & ATTR_SIZE &&
newsize != i_size_read(inode)) {
+ bytes_added = 0;
+
if (i_size_read(inode) > newsize)
status = ocfs_truncate_file(osb, newsize, inode);
else
- status = ocfs_extend_file(osb, inode, newsize);
- if (status < 0) {
+ status = ocfs_extend_file(osb, inode, newsize,
+ &bytes_added);
+ if (status < 0 && (!bytes_added)) {
if (status != -EINTR && status != -ENOSPC)
LOG_ERROR_STATUS(status);
status = -ENOSPC;
goto bail;
}
+
+ /* partial extend, we continue with what we've got. */
+ if (status < 0 && status != -ENOSPC && status != -EINTR)
+ LOG_ERROR_ARGS("status return of %d extending inode "
+ "%llu\n", status,
+ OCFS_I(inode)->ip_blkno);
+ status = 0;
+
+ newsize = bytes_added + i_size_read(inode);
+ if (bytes_added)
+ ocfs2_update_inode_size(inode, newsize);
+
spin_lock(&OCFS_I(inode)->ip_lock);
- i_size_write(inode, newsize);
- inode->i_blocks = (newsize + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
if (OCFS_I(inode)->ip_flags & OCFS_INODE_OPEN_DIRECT) {
/* This is a total broken hack for O_DIRECT crack */
OCFS_I(inode)->ip_mmu_private = i_size_read(inode);
Modified: branches/dlm-reco-mig/fs/ocfs2/file.h
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/file.h 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/file.h 2005-03-04 22:21:29 UTC (rev 1939)
@@ -43,11 +43,12 @@
struct _ocfs2_alloc_context *data_ac,
struct _ocfs2_alloc_context *meta_ac,
enum ocfs2_alloc_restarted *reason);
-int ocfs_extend_file(ocfs_super *osb,
- struct inode *inode,
- u64 new_i_size);
int ocfs_setattr(struct dentry *dentry, struct iattr *attr);
int ocfs_sync_inode(struct inode *inode);
+int ocfs_extend_file(ocfs_super *osb,
+ struct inode *inode,
+ u64 new_i_size,
+ u64 *bytes_extended);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
int ocfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -59,4 +60,7 @@
struct buffer_head *fe_bh,
u64 new_i_size);
+void ocfs2_file_finish_extension(struct inode *inode, loff_t newsize,
+ unsigned should_zero);
+
#endif /* OCFS2_FILE_H */
Modified: branches/dlm-reco-mig/fs/ocfs2/heartbeat.c
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/heartbeat.c 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/heartbeat.c 2005-03-04 22:21:29 UTC (rev 1939)
@@ -62,25 +62,21 @@
int node_num,
void *data);
-static void __ocfs_node_map_dup(ocfs_super *osb,
- ocfs_node_map *target,
- ocfs_node_map *from);
static inline void __ocfs_node_map_set_bit(ocfs_node_map *map,
int bit);
static inline void __ocfs_node_map_clear_bit(ocfs_node_map *map,
int bit);
static inline int __ocfs_node_map_is_empty(ocfs_node_map *map);
-static void __ocfs_node_map_dup(ocfs_super *osb,
- ocfs_node_map *target,
+static void __ocfs_node_map_dup(ocfs_node_map *target,
ocfs_node_map *from);
static void __ocfs_node_map_set(ocfs_node_map *target, ocfs_node_map *from);
void ocfs2_init_node_maps(ocfs_super *osb)
{
spin_lock_init(&osb->node_map_lock);
- ocfs_node_map_init(osb, &osb->mounted_map);
- ocfs_node_map_init(osb, &osb->recovery_map);
- ocfs_node_map_init(osb, &osb->umount_map);
+ ocfs_node_map_init(&osb->mounted_map);
+ ocfs_node_map_init(&osb->recovery_map);
+ ocfs_node_map_init(&osb->umount_map);
}
static void ocfs2_hb_node_down_cb(struct inode *group,
@@ -163,10 +159,9 @@
/* special case -1 for now
* TODO: should *really* make sure the calling func never passes -1!! */
-void ocfs_node_map_init(ocfs_super *osb,
- ocfs_node_map *map)
+void ocfs_node_map_init(ocfs_node_map *map)
{
- map->num_nodes = osb->max_nodes;
+ map->num_nodes = OCFS_NODE_MAP_MAX_NODES;
memset(map->map, 0, BITS_TO_LONGS(OCFS_NODE_MAP_MAX_NODES) *
sizeof(unsigned long));
}
@@ -242,12 +237,11 @@
return ret;
}
-static void __ocfs_node_map_dup(ocfs_super *osb,
- ocfs_node_map *target,
+static void __ocfs_node_map_dup(ocfs_node_map *target,
ocfs_node_map *from)
{
OCFS_ASSERT(from->num_nodes > 0);
- ocfs_node_map_init(osb, target);
+ ocfs_node_map_init(target);
__ocfs_node_map_set(target, from);
}
@@ -260,7 +254,7 @@
int ret;
spin_lock(&osb->node_map_lock);
- __ocfs_node_map_dup(osb, &temp, target);
+ __ocfs_node_map_dup(&temp, target);
__ocfs_node_map_clear_bit(&temp, bit);
ret = __ocfs_node_map_is_empty(&temp);
spin_unlock(&osb->node_map_lock);
Modified: branches/dlm-reco-mig/fs/ocfs2/heartbeat.h
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/heartbeat.h 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/heartbeat.h 2005-03-04 22:21:29 UTC (rev 1939)
@@ -33,7 +33,7 @@
/* node map functions - used to keep track of mounted and in-recovery
* nodes. */
-void ocfs_node_map_init(ocfs_super *osb, ocfs_node_map *map);
+void ocfs_node_map_init(ocfs_node_map *map);
int ocfs_node_map_is_empty(ocfs_super *osb,
ocfs_node_map *map);
void ocfs_node_map_set_bit(ocfs_super *osb,
Modified: branches/dlm-reco-mig/fs/ocfs2/journal.c
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/journal.c 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/journal.c 2005-03-04 22:21:29 UTC (rev 1939)
@@ -29,6 +29,7 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
+#include <linux/kthread.h>
#include "ocfs_log.h"
#include "ocfs.h"
@@ -60,7 +61,6 @@
static int __ocfs_recovery_thread(void *arg);
static int ocfs_commit_cache (ocfs_super * osb);
static int ocfs_wait_on_mount(ocfs_super *osb);
-static int ocfs2_commit_thread_sleep(ocfs_super *osb);
static void ocfs_handle_cleanup_locks(ocfs_journal *journal,
ocfs_journal_handle *handle,
int set_id);
@@ -502,11 +502,11 @@
switch (type) {
case OCFS_JOURNAL_ACCESS_CREATE:
case OCFS_JOURNAL_ACCESS_WRITE:
- status = journal_get_write_access(handle->k_handle, bh, NULL);
+ status = kapi_journal_get_write_access(handle->k_handle, bh);
break;
case OCFS_JOURNAL_ACCESS_UNDO:
- status = journal_get_undo_access(handle->k_handle, bh, NULL);
+ status = kapi_journal_get_undo_access(handle->k_handle, bh);
break;
default:
@@ -778,14 +778,11 @@
/* The OCFS_JOURNAL_IN_SHUTDOWN will signal to commit_cache to not
* drop the trans_lock (which we want to hold until we
* completely destroy the journal. */
- if (osb->commit && osb->commit->c_task) {
+ if (osb->commit_task) {
/* Wait for the commit thread */
LOG_TRACE_STR ("Waiting for ocfs2commit to exit....");
- atomic_set (&osb->needs_checkpoint, 1);
- wake_up (&osb->checkpoint_event);
- wait_for_completion(&osb->commit->c_complete);
- osb->commit->c_task = NULL;
- kfree(osb->commit);
+ kthread_stop(osb->commit_task);
+ osb->commit_task = NULL;
}
OCFS_ASSERT(atomic_read(&(osb->journal->num_trans)) == 0);
@@ -818,7 +815,6 @@
{
int status = 0;
int olderr = 0;
- int child_pid;
ocfs_super *osb;
LOG_ENTRY();
@@ -848,22 +844,15 @@
}
/* Launch the commit thread */
- osb->commit = kmalloc(sizeof(ocfs_commit_task), GFP_KERNEL);
- if (osb->commit == NULL) {
- LOG_ERROR_STATUS(status = -ENOMEM);
+ osb->commit_task = kthread_run(ocfs_commit_thread, osb, "ocfs2cmt-%d",
+ osb->osb_id);
+ if (IS_ERR(osb->commit_task)) {
+ status = PTR_ERR(osb->commit_task);
+ osb->commit_task = NULL;
+ LOG_ERROR_ARGS ("unable to launch ocfs2commit thread, "
+ "error=%d", status);
goto done;
- }
- memset(osb->commit, 0, sizeof(ocfs_commit_task));
- child_pid = kernel_thread (ocfs_commit_thread, osb,
- CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
- if (child_pid < 0) {
- LOG_ERROR_ARGS ("unable to launch ocfs2commit thread, error=%d",
- child_pid);
- status = child_pid;
- goto done;
- } else {
- init_completion (&osb->commit->c_complete);
- }
+ }
done:
LOG_EXIT_STATUS(status);
@@ -969,13 +958,9 @@
ocfs_super *osb = arg;
int status = 0;
int node_num;
- char proc[16];
LOG_ENTRY();
- sprintf (proc, "ocfs2rec-%d", osb->osb_id);
- ocfs_daemonize (proc, strlen(proc), 0);
-
status = ocfs_wait_on_mount(osb);
if (status < 0) {
if (status == -EBUSY)
@@ -1020,12 +1005,17 @@
goto restart;
}
- osb->recovery_launched = 0;
+ osb->recovery_thread_task = NULL;
+ mb(); /* sync with ocfs2_recovery_thread_running */
wake_up(&osb->recovery_event);
up(&osb->recovery_lock);
LOG_EXIT_STATUS(status);
+ /* no one is callint kthread_stop() for us so the kthread() api
+ * requires that we call do_exit(). And it isn't exported, but
+ * complete_and_exit() seems to be a minimal wrapper around it. */
+ complete_and_exit(NULL, status);
return status;
}
@@ -1035,19 +1025,25 @@
node_num, osb->node_num);
down(&osb->recovery_lock);
- if (!osb->disable_recovery) {
- /* People waiting on recovery will wait on
- * the recovery map to empty. */
- ocfs_recovery_map_set(osb, node_num);
+ if (osb->disable_recovery)
+ goto out;
+ /* People waiting on recovery will wait on
+ * the recovery map to empty. */
+ ocfs_recovery_map_set(osb, node_num);
- LOG_TRACE_STR("starting recovery thread...");
+ LOG_TRACE_STR("starting recovery thread...");
- if (!osb->recovery_launched) {
- kernel_thread(__ocfs_recovery_thread, osb,
- CLONE_VM | CLONE_FS | CLONE_FILES);
- osb->recovery_launched = 1;
- }
+ if (osb->recovery_thread_task)
+ goto out;
+
+ osb->recovery_thread_task = kthread_run(__ocfs_recovery_thread, osb,
+ "ocfs2rec-%d", osb->osb_id);
+ if (IS_ERR(osb->recovery_thread_task)) {
+ LOG_ERROR_STATUS((int)PTR_ERR(osb->recovery_thread_task));
+ osb->recovery_thread_task = NULL;
}
+
+out:
up(&osb->recovery_lock);
wake_up(&osb->recovery_event);
@@ -1455,104 +1451,42 @@
goto retry;
}
-static int ocfs2_commit_thread_sleep(ocfs_super *osb)
-{
- int status;
- signed long timeout = OCFS_CHECKPOINT_INTERVAL;
- DECLARE_WAITQUEUE(wait, current);
-
- if (atomic_read(&osb->needs_checkpoint))
- return 0;
-
- status = 0;
- add_wait_queue(&osb->checkpoint_event, &wait);
- while (1) {
- set_current_state(TASK_INTERRUPTIBLE);
-
- if (atomic_read(&osb->needs_checkpoint))
- break;
-
- if (!signal_pending(current)) {
- timeout = schedule_timeout(timeout);
- if (!timeout) {
- status = -ETIMEDOUT;
- break;
- }
- continue;
- }
- status = -EINTR;
- break;
- }
- set_current_state(TASK_RUNNING);
- remove_wait_queue(&osb->checkpoint_event, &wait);
-
- return status;
-}
-
int ocfs_commit_thread(void *arg)
{
- int status = 0, misses = 0;
+ int status;
ocfs_super *osb = arg;
- ocfs_commit_task *commit = osb->commit;
- char name[16];
ocfs_journal *journal = osb->journal;
- siginfo_t info;
- sprintf (name, "ocfs2cmt-%d", osb->osb_id);
- ocfs_daemonize (name, strlen(name), 0);
+ /* we can trust num_trans here because _should_stop() is only set in
+ * shutdown and nobody other than ourselves should be able to start
+ * transactions. committing on shutdown might take a few iterations
+ * as final transactions put deleted inodes on the list */
+ while (!(kthread_should_stop() &&
+ atomic_read(&journal->num_trans) == 0)) {
- commit->c_task = current;
+ status = wait_event_timeout(osb->checkpoint_event,
+ atomic_read(&journal->num_trans) ||
+ kthread_should_stop(),
+ OCFS_CHECKPOINT_INTERVAL);
+ if (status == 0)
+ LOG_TRACE_STR("timed out");
+ else if (status == -ERESTARTSYS)
+ LOG_TRACE_STR("signal pending\n");
+ else
+ LOG_TRACE_STR("woken");
- misses = 0;
- while (1) {
- status = ocfs2_commit_thread_sleep(osb);
- atomic_set (&osb->needs_checkpoint, 0);
-
- switch (status) {
- case -ETIMEDOUT:
- LOG_TRACE_STR("timed out");
- break;
- case -EINTR:
- LOG_ERROR_STR("Commit thread got a signal!");
- /* ignore the actual signal */
- if (signal_pending(current)) {
- dequeue_signal_lock(current,
- ¤t->blocked,
- &info);
- }
- break;
- case 0:
- LOG_TRACE_STR("woken\n");
- break;
- default:
- LOG_ERROR_STR("invalid status!\n");
- break;
- }
-
-skip_sleep:
status = ocfs_commit_cache(osb);
if (status < 0)
LOG_ERROR_STATUS(status);
- /* journal shutdown has asked me to sync up and then
- * exit. We might still have transactions pending as
- * our last commit might've put deleted inodes on the
- * list so we loop back around. */
- if (journal->state == OCFS_JOURNAL_IN_SHUTDOWN) {
- /* we can trust num_trans here because we're
- * in shutdown and nobody other than ourselves
- * should be able to start more. */
- if (atomic_read(&journal->num_trans) == 0)
- break;
#ifdef VERBOSE_COMMIT_THREAD
+ if (kthread_should_stop() && atomic_read(&journal->num_trans)){
printk("(%u) commit_thread: %u transactions pending "
- "on shutdown\n",
- current->pid, atomic_read(&journal->num_trans));
-#endif
- goto skip_sleep;
+ "on shutdown\n", current->pid,
+ atomic_read(&journal->num_trans));
}
+#endif
}
- complete (&(commit->c_complete));
return 0;
}
Modified: branches/dlm-reco-mig/fs/ocfs2/mmap.c
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/mmap.c 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/mmap.c 2005-03-04 22:21:29 UTC (rev 1939)
@@ -49,7 +49,8 @@
static inline struct rb_node * __ocfs2_buffer_lock_ctxt_root(
ocfs2_buffer_lock_ctxt *ctxt);
static int ocfs2_buffer_lock_ctxt_insert(ocfs2_buffer_lock_ctxt *ctxt,
- struct inode *inode);
+ struct inode *inode,
+ ocfs2_backing_inode **binode_ret);
static int ocfs2_fill_ctxt_from_buf(struct super_block *sb,
struct inode *target_inode,
char *buf,
@@ -163,7 +164,8 @@
}
static int ocfs2_buffer_lock_ctxt_insert(ocfs2_buffer_lock_ctxt *ctxt,
- struct inode *inode)
+ struct inode *inode,
+ ocfs2_backing_inode **binode_ret)
{
u64 blkno;
ocfs2_backing_inode *tmp, *binode;
@@ -194,14 +196,15 @@
return 0; /* Don't insert duplicates */
}
- binode = kmalloc(sizeof(ocfs2_backing_inode), GFP_KERNEL);
+ binode = kcalloc(1, sizeof(ocfs2_backing_inode), GFP_KERNEL);
if (!binode)
return -ENOMEM;
- memset(binode, 0, sizeof(ocfs2_backing_inode));
binode->ba_inode = inode;
- binode->ba_locked = 0;
ocfs2_init_io_marker(&binode->ba_task);
+ if (binode_ret)
+ *binode_ret = binode;
+
rb_link_node(&binode->ba_node, parent, p);
rb_insert_color(&binode->ba_node, &ctxt->b_inodes);
@@ -231,7 +234,8 @@
if (inode->i_sb == sb &&
inode != target_inode) {
status = ocfs2_buffer_lock_ctxt_insert(ctxt,
- inode);
+ inode,
+ NULL);
if (status < 0)
goto bail;
}
@@ -246,12 +250,12 @@
struct inode *target_inode,
char *buf,
size_t size,
- ocfs2_buffer_lock_ctxt *ctxt)
+ ocfs2_buffer_lock_ctxt *ctxt,
+ ocfs2_backing_inode **target_binode)
{
int skip_sem = current->flags & PF_DUMPCORE;
int status;
struct mm_struct *mm = current->mm;
- struct rb_node *first;
OCFS_ASSERT(mm);
@@ -260,15 +264,12 @@
OCFS_ASSERT(!__ocfs2_buffer_lock_ctxt_root(ctxt));
- /* We always insert target because it might not be backing
- part of the buffer - but it needs to be in there so that
- it's lock gets ordered with everything else */
- status = ocfs2_buffer_lock_ctxt_insert(ctxt, target_inode);
+ /* We always insert target because it might not be backing part of the
+ * buffer - but it needs to be in there so that it's lock gets ordered
+ * with everything else */
+ status = ocfs2_buffer_lock_ctxt_insert(ctxt, target_inode,
+ target_binode);
if (!status) {
- /* The assert above guarantees that this will work. */
- ctxt->b_target = rb_entry(__ocfs2_buffer_lock_ctxt_root(ctxt),
- ocfs2_backing_inode, ba_node);
-
/* Now fill the tree with any inodes that back this
* buffer. If target inode is in there, it will be
* skipped over. */
@@ -285,14 +286,46 @@
goto bail;
}
- first = rb_first(&ctxt->b_inodes);
- ctxt->b_head = rb_entry(first, ocfs2_backing_inode, ba_node);
-
status = 0;
bail:
return status;
}
+/* starting from pos, which can be null for the first call, give the
+ * next buffer that needs unlocking. we return null when there are none
+ * left or we see last_inode */
+static ocfs2_backing_inode *ocfs2_next_unlocked(ocfs2_buffer_lock_ctxt *ctxt,
+ struct inode *last_inode,
+ ocfs2_backing_inode *pos)
+{
+ ocfs2_backing_inode *binode = NULL;
+ struct rb_node *node = NULL;
+
+ if (pos == NULL) {
+ if (ctxt->b_next_unlocked)
+ binode = ctxt->b_next_unlocked;
+ else
+ node = rb_first(&ctxt->b_inodes);
+ } else
+ node = rb_next(&pos->ba_node);
+
+ if (node)
+ binode = rb_entry(node, ocfs2_backing_inode, ba_node);
+
+ if (binode && last_inode && binode->ba_inode == last_inode)
+ binode = NULL;
+
+ /* this is just an optimization to skip nodes in the tree
+ * that we've already seen. If we're moving from one we've locked
+ * to one we haven't then we mark this node in the ctxt so that
+ * we'll return to it in a future after, say, hitting last_inode
+ * or EIOCBQUEUED in lock_buffer_inodes */
+ if (pos && pos->ba_locked && binode)
+ ctxt->b_next_unlocked = binode;
+
+ return binode;
+}
+
/* Will take locks on all inodes in the ctxt up until 'last_inode'. If
* last_inode is NULL, then we take locks on everything. We mark lock
* status on the context so we skip any that have already been
@@ -303,43 +336,40 @@
int ocfs2_lock_buffer_inodes(ocfs2_buffer_lock_ctxt *ctxt,
struct inode *last_inode)
{
- int status, meta_level, data_level;
- ocfs2_backing_inode *binode;
+ int status, data_level;
+ ocfs2_backing_inode *binode = NULL;
struct inode *inode;
- struct rb_node *node;
- binode = ctxt->b_head;
-
- while(binode) {
+ while((binode = ocfs2_next_unlocked(ctxt, last_inode, binode))) {
+ /* the tricksy caller might have locked inodes themselves
+ * between calls. */
+ if (binode->ba_locked)
+ continue;
inode = binode->ba_inode;
- if (inode == last_inode)
- break;
- if (binode->ba_locked)
- goto skip_locking;
-
- meta_level = 0;
- if (ocfs2_buffer_lock_is_target(ctxt, inode))
- meta_level = ctxt->b_lock_meta_write;
-
- status = ocfs2_meta_lock(inode, NULL, NULL, meta_level);
- if (status < 0) {
- if (status != -EINTR)
- LOG_ERROR_STATUS(status);
- goto bail;
+ if (!binode->ba_meta_locked) {
+ status = ocfs2_meta_lock_flags_async(inode, NULL,
+ binode->ba_lock_meta_level,
+ 0,
+ ctxt->b_cb,
+ ctxt->b_cb_data);
+ if (status < 0) {
+ if (status != -EINTR)
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ binode->ba_meta_locked = 1;
}
- /* If we're doing direct IO, then skip data locking on
- * the target. */
- if (!ocfs2_buffer_lock_is_target(ctxt, inode) ||
- !ctxt->b_lock_direct) {
- data_level = 0;
- if (ocfs2_buffer_lock_is_target(ctxt, inode))
- data_level = ctxt->b_lock_data_write;
-
+ /* ba_lock_data isn't set for direct io */
+ if (binode->ba_lock_data) {
+ data_level = binode->ba_lock_data_level;
status = ocfs2_data_lock(inode, data_level);
if (status < 0) {
- ocfs2_meta_unlock(inode, meta_level);
+ if (status == -EIOCBQUEUED)
+ goto bail;
+ ocfs2_meta_unlock(inode,
+ binode->ba_lock_meta_level);
if (status != -EINTR)
LOG_ERROR_STATUS(status);
@@ -347,17 +377,9 @@
}
}
ocfs2_add_io_marker(inode, &binode->ba_task);
-
binode->ba_locked = 1;
-skip_locking:
- node = rb_next(&binode->ba_node);
- binode = NULL;
- if (node)
- binode = rb_entry(node, ocfs2_backing_inode, ba_node);
}
- ctxt->b_head = binode;
-
status = 0;
bail:
return status;
@@ -365,44 +387,233 @@
void ocfs2_unlock_buffer_inodes(ocfs2_buffer_lock_ctxt *ctxt)
{
- int level;
ocfs2_backing_inode *binode;
- struct inode *inode;
- struct rb_node *node, *tmp;
+ struct rb_node *node;
- node = rb_first(&ctxt->b_inodes);
- while(node) {
+ /* dlm locks don't mask ints.. this should be lower down */
+ BUG_ON(in_interrupt());
+
+ /* unlock in reverse order to minimize waking forward lockers */
+ while ((node = rb_last(&ctxt->b_inodes)) != NULL) {
binode = rb_entry(node, ocfs2_backing_inode, ba_node);
- if (!binode->ba_locked)
- goto skip_unlock;
- inode = binode->ba_inode;
- ocfs2_del_io_marker(inode, &binode->ba_task);
+ ocfs2_del_io_marker(binode->ba_inode, &binode->ba_task);
- if (!ocfs2_buffer_lock_is_target(ctxt, inode) ||
- !ctxt->b_lock_direct) {
- level = 0;
- if (ocfs2_buffer_lock_is_target(ctxt, inode))
- level = ctxt->b_lock_data_write;
+ if (binode->ba_locked && binode->ba_lock_data)
+ ocfs2_data_unlock(binode->ba_inode,
+ binode->ba_lock_data_level);
- ocfs2_data_unlock(inode, level);
+ if (binode->ba_locked || binode->ba_meta_locked)
+ ocfs2_meta_unlock(binode->ba_inode,
+ binode->ba_lock_meta_level);
+
+ rb_erase(node, &ctxt->b_inodes);
+ kfree(binode);
+ }
+
+ ctxt->b_next_unlocked = NULL;
+}
+
+/*
+ * This builds up the locking state that will be used by a write. both normal
+ * file writes and AIO writes come in through here. This function does no
+ * teardown on its own. The caller must examine the info struct to see if it
+ * needs to release locks or i_sem, etc. This function is also restartable in
+ * that it can return EIOCBQUEUED if it would have blocked in the dlm. It
+ * stores its partial progress in the info struct so the caller can call back
+ * in when it thinks the dlm won't block any more. Thus, the caller must zero
+ * the info struct before calling in the first time.
+ */
+ssize_t ocfs_write_lock_maybe_extend(struct file *filp, const char *buf,
+ size_t count, loff_t *ppos,
+ struct ocfs2_write_lock_info *info,
+ ocfs2_buffer_lock_ctxt *ctxt)
+{
+ int ret = 0;
+ ocfs_super *osb = NULL;
+ struct dentry *dentry = filp->f_dentry;
+ struct inode *inode = dentry->d_inode;
+ int status;
+ int sector_size;
+ int level = filp->f_flags & O_APPEND;
+ loff_t saved_ppos;
+ u64 bytes_added = 0;
+
+ osb = OCFS_SB(inode->i_sb);
+ sector_size = 1 << osb->s_sectsize_bits;
+
+ /* the target inode is different from the other inodes. in o_direct it
+ * doesn't get a data lock and when appending it gets a level 1 meta
+ * lock. we use target_binode to set its flags accordingly */
+ if (info->wl_target_binode == NULL) {
+ ret = ocfs2_setup_io_locks(inode->i_sb, inode, (char *) buf,
+ count, ctxt,
+ &info->wl_target_binode);
+ if (ret < 0) {
+ BUG_ON(ret == -EIOCBQUEUED);
+ LOG_ERROR_STATUS(ret);
+ goto bail;
}
+ }
- level = 0;
- if (ocfs2_buffer_lock_is_target(ctxt, inode))
- level = ctxt->b_lock_meta_write;
+ /* This will lock everyone in the context who's order puts
+ * them before us. */
+ if (!info->wl_have_before) {
+ info->wl_unlock_ctxt = 1;
+ ret = ocfs2_lock_buffer_inodes(ctxt, inode);
+ if (ret < 0) {
+ if (ret != -EINTR)
+ LOG_ERROR_STATUS(ret);
+ goto bail;
+ }
+ info->wl_have_before = 1;
+ /* we're writing so get an ex data cluster lock */
+ info->wl_target_binode->ba_lock_data_level = 1;
+ }
- ocfs2_meta_unlock(inode, level);
+ if (!info->wl_have_i_sem) {
+ down(&inode->i_sem);
+ info->wl_have_i_sem = 1;
+ }
-skip_unlock:
- tmp = node;
- node = rb_next(node);
+lock:
+ if (!info->wl_have_target_meta) {
+ status = ocfs2_meta_lock(inode, NULL, NULL, level);
+ if (status < 0) {
+ if (status != -EINTR)
+ LOG_ERROR_STATUS(status);
+ ret = status;
+ goto bail;
+ }
+ info->wl_have_target_meta = 1;
+ }
+ /* to handle extending writes, we do a bit of our own locking
+ * here, but we setup the ctxt do unlock for us (as well as
+ * handle locking everything else. */
+ if (level)
+ info->wl_target_binode->ba_lock_meta_level = 1;
- rb_erase(tmp, &ctxt->b_inodes);
- kfree(binode);
+ /* work on a copy of ppos until we're sure that we won't have
+ * to recalculate it due to relocking. */
+ saved_ppos = *ppos;
+
+ if (filp->f_flags & O_APPEND) {
+ saved_ppos = i_size_read(inode);
+ LOG_TRACE_ARGS("O_APPEND: inode->i_size=%llu\n", saved_ppos);
+
+ /* ugh, work around some applications which open
+ * everything O_DIRECT + O_APPEND and really don't
+ * mean to use O_DIRECT. */
+#warning this is wrong wrong wrong
+ filp->f_flags &= ~O_DIRECT;
}
- ctxt->b_target = ctxt->b_head = NULL;
+ if (filp->f_flags & O_DIRECT) {
+ /* anything special for o_direct? */
+ LOG_TRACE_STR ("O_DIRECT");
+ if ((saved_ppos & (sector_size - 1)) ||
+ (count & (sector_size - 1)) ||
+ ((unsigned long)buf & (sector_size - 1))) {
+ info->wl_do_direct_io = 0;
+ filp->f_flags |= O_SYNC;
+ } else {
+ info->wl_do_direct_io = 1;
+ }
+ }
+ info->wl_target_binode->ba_lock_data = info->wl_do_direct_io ? 0 : 1;
+
+ info->wl_newsize = count + saved_ppos;
+ if (filp->f_flags & O_APPEND)
+ info->wl_newsize = count + i_size_read(inode);
+
+ LOG_TRACE_ARGS ("ppos=%llu newsize=%llu cursize=%llu\n",
+ saved_ppos, info->wl_newsize, i_size_read(inode));
+
+ if (info->wl_newsize > i_size_read(inode)) {
+ if (!level) {
+ /* we want an extend, but need a higher
+ * level cluster lock. */
+ LOG_TRACE_ARGS("inode %llu, had a PR, looping back "
+ "for EX\n", OCFS_I(inode)->ip_blkno);
+ ocfs2_meta_unlock(inode, level);
+ info->wl_have_target_meta = 0;
+ level = 1;
+ goto lock;
+ }
+ info->wl_extended = 1;
+
+ LOG_TRACE_ARGS("Writing at EOF, will need more allocation: "
+ "i_size=%llu, need=%llu\n",
+ i_size_read(inode), info->wl_newsize);
+
+ /* If we extend AT ALL here then we update our state
+ * and continue the write call, regardless of error --
+ * this is basically a short write. */
+ status = ocfs_extend_file(osb, inode, info->wl_newsize,
+ &bytes_added);
+ if (status < 0 && (!bytes_added)) {
+ if (status != -EINTR && status != -ENOSPC) {
+ LOG_ERROR_STATUS (status);
+ LOG_ERROR_ARGS("Failed to extend inode %llu "
+ "from %llu to %llu",
+ OCFS_I(inode)->ip_blkno,
+ *ppos, info->wl_newsize);
+ }
+ ret = status;
+
+ info->wl_have_target_meta = 0;
+ ocfs2_meta_unlock(inode, level);
+ goto bail;
+ }
+
+ /* We need to recalulate newsize and count according
+ * to what extend could give us. If we got the whole
+ * extend then this doesn't wind up changing the
+ * values. */
+ info->wl_newsize = i_size_read(inode) + bytes_added;
+ count = info->wl_newsize - saved_ppos;
+
+ if (status < 0 && status != -ENOSPC && status != -EINTR)
+ LOG_ERROR_ARGS("status return of %d extending inode "
+ "%llu\n", status,
+ OCFS_I(inode)->ip_blkno);
+ status = 0;
+ }
+
+ /* we've got whatever cluster lock is appropriate now, so we
+ * can stuff *ppos back. */
+ *ppos = saved_ppos;
+
+ if (!info->wl_do_direct_io && !info->wl_have_data_lock) {
+ status = ocfs2_data_lock(inode, 1);
+ if (status < 0) {
+ if (status != -EINTR)
+ LOG_ERROR_STATUS(status);
+ ret = status;
+
+ info->wl_have_target_meta = 0;
+ ocfs2_meta_unlock(inode, level);
+ goto bail;
+ }
+ info->wl_have_data_lock = 1;
+ }
+
+ /* Alright, fool the io locking stuff into thinking it's
+ * handled our inode for us. We can now count on it to do the
+ * unlock for us. */
+ info->wl_target_binode->ba_locked = 1;
+
+ /* This will lock everyone who's order puts them *after* our inode. */
+ ret = ocfs2_lock_buffer_inodes(ctxt, NULL);
+ if (ret < 0) {
+ if (ret != -EINTR)
+ LOG_ERROR_STATUS(ret);
+ goto bail;
+ }
+
+bail:
+ LOG_EXIT_INT(ret);
+ return ret;
}
#if 0
Modified: branches/dlm-reco-mig/fs/ocfs2/mmap.h
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/mmap.h 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/mmap.h 2005-03-04 22:21:29 UTC (rev 1939)
@@ -42,7 +42,8 @@
ocfs_inode_private *oip = OCFS_I(inode);
spin_lock(&oip->ip_lock);
- list_del_init(&task->io_list);
+ if (!list_empty(&task->io_list))
+ list_del_init(&task->io_list);
spin_unlock(&oip->ip_lock);
}
@@ -70,46 +71,61 @@
typedef struct _ocfs2_backing_inode {
struct rb_node ba_node;
struct inode *ba_inode;
- int ba_locked;
+ unsigned ba_meta_locked:1, /* meta is locked */
+ ba_locked:1, /* both are locked */
+ ba_lock_data:1, /* should lock data */
+ ba_lock_meta_level:1,
+ ba_lock_data_level:1;
struct _ocfs2_io_marker ba_task;
} ocfs2_backing_inode;
/* Used to manage the locks taken during I/O. */
typedef struct _ocfs2_buffer_lock_ctxt {
- /* target flags */
- unsigned b_lock_direct:1,
- b_lock_meta_write:1,
- b_lock_data_write:1;
struct rb_root b_inodes;
- ocfs2_backing_inode *b_target;
- ocfs2_backing_inode *b_head;
+ ocfs2_backing_inode *b_next_unlocked;
+ ocfs2_lock_callback b_cb;
+ unsigned long b_cb_data;
} ocfs2_buffer_lock_ctxt;
-#define __BUFFERLOCK_INITIALIZER(name) { \
+#define __BUFFERLOCK_INITIALIZER { \
.b_inodes = RB_ROOT, \
- .b_target = NULL, \
- .b_head = NULL }
+ .b_next_unlocked = NULL, \
+ .b_cb = NULL, \
+ .b_cb_data = 0 }
#define DECLARE_BUFFER_LOCK_CTXT(name) \
- ocfs2_buffer_lock_ctxt name = __BUFFERLOCK_INITIALIZER(name)
+ ocfs2_buffer_lock_ctxt name = __BUFFERLOCK_INITIALIZER
+#define INIT_BUFFER_LOCK_CTXT(ctxt) \
+ *(ctxt) = (ocfs2_buffer_lock_ctxt) __BUFFERLOCK_INITIALIZER
+
int ocfs2_setup_io_locks(struct super_block *sb,
struct inode *target_inode,
char *buf,
size_t size,
- ocfs2_buffer_lock_ctxt *ctxt);
+ ocfs2_buffer_lock_ctxt *ctxt,
+ ocfs2_backing_inode **target_binode);
int ocfs2_lock_buffer_inodes(ocfs2_buffer_lock_ctxt *ctxt,
struct inode *last_inode);
void ocfs2_unlock_buffer_inodes(struct _ocfs2_buffer_lock_ctxt *ctxt);
-static inline int ocfs2_buffer_lock_is_target(ocfs2_buffer_lock_ctxt *ctxt,
- struct inode *inode)
-{
- if (!ctxt->b_target)
- return 0;
- return inode == ctxt->b_target->ba_inode;
-}
+struct ocfs2_write_lock_info {
+ u64 wl_newsize;
+ unsigned wl_extended:1,
+ wl_do_direct_io:1,
+ wl_have_i_sem:1,
+ wl_unlock_ctxt:1,
+ wl_have_before:1,
+ wl_have_target_meta:1,
+ wl_have_data_lock:1;
+ ocfs2_backing_inode *wl_target_binode;
+};
+ssize_t ocfs_write_lock_maybe_extend(struct file *filp, const char *buf,
+ size_t count, loff_t *ppos,
+ struct ocfs2_write_lock_info *info,
+ ocfs2_buffer_lock_ctxt *ctxt);
+
#endif /* OCFS2_MMAP_H */
Modified: branches/dlm-reco-mig/fs/ocfs2/ocfs.h
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/ocfs.h 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/ocfs.h 2005-03-04 22:21:29 UTC (rev 1939)
@@ -163,15 +163,30 @@
struct ocfs2_lock_res_ops;
+typedef void (*ocfs2_lock_callback)(int status, unsigned long data);
+
+struct ocfs2_lockres_flag_callback {
+ struct list_head fc_lockres_item;
+ unsigned fc_free_once_called:1;
+
+ unsigned long fc_flag_mask;
+ unsigned long fc_flag_goal;
+
+ ocfs2_lock_callback fc_cb;
+ unsigned long fc_data;
+};
+
+
typedef struct _ocfs2_lock_res {
void *l_priv;
struct ocfs2_lock_res_ops *l_ops;
spinlock_t l_lock;
struct list_head l_blocked_list;
+ struct list_head l_flag_cb_list;
enum ocfs2_lock_type l_type;
- int l_flags;
+ unsigned long l_flags;
char *l_name;
int l_level;
unsigned int l_ro_holders;
@@ -186,6 +201,7 @@
int l_blocking;
wait_queue_head_t l_event;
+
} ocfs2_lock_res;
/* OCFS2 Inode Private Data */
@@ -271,12 +287,6 @@
}
ocfs_vol_state;
-typedef struct _ocfs_commit_task
-{
- struct completion c_complete;
- struct task_struct *c_task;
-} ocfs_commit_task;
-
typedef struct _ocfs_alloc_stats
{
atomic_t moves;
@@ -301,7 +311,7 @@
{
struct list_head osb_next; /* list of ocfs_super(s) */
u32 osb_id; /* id used by the proc interface */
- ocfs_commit_task *commit;
+ struct task_struct *commit_task;
struct super_block *sb;
struct inode *root_inode;
struct inode *sys_root_inode;
@@ -344,7 +354,7 @@
atomic_t vol_state;
struct semaphore recovery_lock;
- int recovery_launched;
+ struct task_struct *recovery_thread_task;
int disable_recovery;
wait_queue_head_t checkpoint_event;
atomic_t needs_checkpoint;
@@ -373,8 +383,8 @@
spinlock_t vote_task_lock;
struct task_struct *vote_task;
wait_queue_head_t vote_event;
- atomic_t wake_vote_task;
- int vote_exit;
+ unsigned long vote_wake_sequence;
+ unsigned long vote_work_sequence;
struct list_head blocked_lock_list;
unsigned long blocked_lock_count;
@@ -382,9 +392,6 @@
struct list_head vote_list;
int vote_count;
- struct completion vote_event_complete;
- struct completion vote_event_init;
-
u32 net_key;
spinlock_t net_response_lock;
unsigned int net_response_ids;
Modified: branches/dlm-reco-mig/fs/ocfs2/proc.c
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/proc.c 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/proc.c 2005-03-04 22:21:29 UTC (rev 1939)
@@ -432,7 +432,7 @@
osb = data;
if (osb) {
- for (i = 0; i < osb->max_nodes; i++) {
+ for (i = 0; i < OCFS_NODE_MAP_MAX_NODES; i++) {
mount = ocfs_node_map_test_bit(osb, &osb->mounted_map, i) ? 'M' : ' ';
len += sprintf(page + len, "%2d %c\n", i, mount);
}
Modified: branches/dlm-reco-mig/fs/ocfs2/suballoc.c
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/suballoc.c 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/suballoc.c 2005-03-04 22:21:29 UTC (rev 1939)
@@ -321,7 +321,8 @@
&bit_off,
&num_bits);
if (status < 0) {
- LOG_ERROR_STATUS(status);
+ if (status != -ENOSPC)
+ LOG_ERROR_STATUS(status);
goto bail;
}
@@ -456,7 +457,8 @@
status = ocfs_block_group_alloc(osb, alloc_inode, bh);
if (status < 0) {
- LOG_ERROR_STATUS(status);
+ if (status != -ENOSPC)
+ LOG_ERROR_STATUS(status);
goto bail;
}
/* You should never ask for this much metadata */
@@ -558,7 +560,8 @@
status = ocfs_reserve_suballoc_bits(osb, *ac);
if (status < 0) {
- LOG_ERROR_STATUS(status);
+ if (status != -ENOSPC)
+ LOG_ERROR_STATUS(status);
goto bail;
}
Modified: branches/dlm-reco-mig/fs/ocfs2/super.c
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/super.c 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/super.c 2005-03-04 22:21:29 UTC (rev 1939)
@@ -1010,6 +1010,15 @@
} /* ocfs_mount_volume */
+/* we can't grab the goofy sem lock from inside wait_event, so we use
+ * memory barriers to make sure that we'll see the null task before
+ * being woken up */
+static int ocfs2_recovery_thread_running(ocfs_super *osb)
+{
+ mb();
+ return osb->recovery_thread_task != NULL;
+}
+
/*
* ocfs_dismount_volume()
*
@@ -1031,13 +1040,8 @@
* running ones to exit. Do this before setting the vol_state. */
down(&osb->recovery_lock);
osb->disable_recovery = 1;
- while (osb->recovery_launched) {
- up(&osb->recovery_lock);
- LOG_TRACE_STR("Waiting on a recovery thread to complete.");
- schedule();
- down(&osb->recovery_lock);
- }
up(&osb->recovery_lock);
+ wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
ocfs_journal_shutdown(osb);
@@ -1135,9 +1139,10 @@
}
init_waitqueue_head(&osb->recovery_event);
- atomic_set(&osb->wake_vote_task, 0);
spin_lock_init(&osb->vote_task_lock);
init_waitqueue_head(&osb->vote_event);
+ osb->vote_work_sequence = 0;
+ osb->vote_wake_sequence = 0;
INIT_LIST_HEAD(&osb->blocked_lock_list);
osb->blocked_lock_count = 0;
INIT_LIST_HEAD(&osb->vote_list);
@@ -1171,7 +1176,7 @@
init_MUTEX (&(osb->recovery_lock));
osb->disable_recovery = 0;
- osb->recovery_launched = 0;
+ osb->recovery_thread_task = NULL;
init_waitqueue_head (&osb->checkpoint_event);
atomic_set (&osb->needs_checkpoint, 0);
Modified: branches/dlm-reco-mig/fs/ocfs2/util.c
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/util.c 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/util.c 2005-03-04 22:21:29 UTC (rev 1939)
@@ -69,45 +69,6 @@
#endif
}
-/*
- * ocfs_daemonize()
- *
- */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-/* yes, len is unused but kept here for backwards compatibility. */
-void ocfs_daemonize (char *name, int len, int shutdown_sigs)
-{
- sigset_t tmpsig;
-
- daemonize (name);
-
- if (shutdown_sigs) {
- /* Unblock SIGKILL, SIGSTOP, SIGHUP and SIGINT */
- sigemptyset(&tmpsig);
- sigaddsetmask(&tmpsig, SHUTDOWN_SIGS);
- sigprocmask(SIG_UNBLOCK, &tmpsig, NULL);
- }
-} /* ocfs_daemonize */
-#else
-void ocfs_daemonize (char *name, int len, int shutdown_sigs)
-{
- daemonize ();
- reparent_to_init ();
-
- if (len > 0) {
- if (len > 15)
- BUG();
- strncpy (current->comm, name, len);
- current->comm[len] = '\0';
- }
-
- if (shutdown_sigs)
- ocfs_block_sigs(NULL, SHUTDOWN_SIGS);
- else
- ocfs_block_sigs(NULL, 0);
-} /* ocfs_daemonize */
-#endif
-
/* prefetch has been declared to allow to build in debug mode */
#ifdef DEBUG
#ifndef ARCH_HAS_PREFETCH
Modified: branches/dlm-reco-mig/fs/ocfs2/util.h
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/util.h 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/util.h 2005-03-04 22:21:29 UTC (rev 1939)
@@ -26,7 +26,6 @@
#ifndef OCFS2_UTIL_H
#define OCFS2_UTIL_H
-void ocfs_daemonize(char *name, int len, int shutdown_sigs);
void ocfs_show_stack(unsigned long *esp);
void ocfs_show_trace(unsigned long *stack);
void ocfs_block_sigs(sigset_t *oldsigs, unsigned long mask);
Modified: branches/dlm-reco-mig/fs/ocfs2/vote.c
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/vote.c 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/vote.c 2005-03-04 22:21:29 UTC (rev 1939)
@@ -29,6 +29,7 @@
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/smp_lock.h>
+#include <linux/kthread.h>
#include <cluster/util.h>
#include <cluster/clcommon.h>
@@ -327,6 +328,10 @@
LOG_ENTRY();
spin_lock(&osb->vote_task_lock);
+ /* grab this early so we know to try again if a state change and
+ * wake happens part-way through our work */
+ osb->vote_work_sequence = osb->vote_wake_sequence;
+
processed = osb->blocked_lock_count;
while (processed) {
OCFS_ASSERT(!list_empty(&osb->blocked_lock_list));
@@ -363,60 +368,56 @@
LOG_EXIT();
}
-static inline int ocfs2_vote_thread_has_work(ocfs_super *osb)
+
+static int ocfs2_vote_thread_lists_empty(ocfs_super *osb)
{
- if (list_empty(&osb->blocked_lock_list) &&
- list_empty(&osb->vote_list))
- return 0;
+ int empty = 0;
- return 1;
+ spin_lock(&osb->vote_task_lock);
+ if (list_empty(&osb->blocked_lock_list) &&
+ list_empty(&osb->vote_list))
+ empty = 1;
+
+ spin_unlock(&osb->vote_task_lock);
+ return empty;
}
+static int ocfs2_vote_thread_should_wake(ocfs_super *osb)
+{
+ int should_wake = 0;
+
+ spin_lock(&osb->vote_task_lock);
+ if (osb->vote_work_sequence != osb->vote_wake_sequence)
+ should_wake = 1;
+ spin_unlock(&osb->vote_task_lock);
+
+ return should_wake;
+}
+
int ocfs2_vote_thread(void *arg)
{
int status = 0;
ocfs_super *osb = arg;
- char proc[16];
- sprintf (proc, "ocfs2vote-%d", osb->osb_id);
- ocfs_daemonize (proc, strlen(proc), 0);
+ /* only quit once we've been asked to stop and there is no more
+ * work available */
+ while (!(kthread_should_stop() &&
+ ocfs2_vote_thread_lists_empty(osb))) {
- spin_lock(&osb->vote_task_lock);
- osb->vote_task = current;
- init_completion (&osb->vote_event_complete);
-
- complete(&osb->vote_event_init);
-
- while (1) {
- if (osb->vote_exit) {
- if (!ocfs2_vote_thread_has_work(osb))
- break;
- /* don't want to sleep if we're supposed to quit. */
- atomic_set(&osb->wake_vote_task, 1);
- }
- spin_unlock(&osb->vote_task_lock);
-
wait_event_interruptible(osb->vote_event,
- atomic_read(&osb->wake_vote_task));
+ ocfs2_vote_thread_should_wake(osb) ||
+ kthread_should_stop());
- atomic_set(&osb->wake_vote_task, 0);
-
LOG_TRACE_STR("vote_thread: awoken");
ocfs2_vote_thread_do_work(osb);
- spin_lock(&osb->vote_task_lock);
}
osb->vote_task = NULL;
- spin_unlock(&osb->vote_task_lock);
-
- complete(&osb->vote_event_complete);
-
return status;
}
-static ocfs2_net_wait_ctxt *ocfs2_new_net_wait_ctxt(ocfs_super *osb,
- unsigned int response_id)
+static ocfs2_net_wait_ctxt *ocfs2_new_net_wait_ctxt(unsigned int response_id)
{
ocfs2_net_wait_ctxt *w;
@@ -429,7 +430,7 @@
INIT_LIST_HEAD(&w->n_list);
init_waitqueue_head(&w->n_event);
- ocfs_node_map_init(osb, &w->n_node_map);
+ ocfs_node_map_init(&w->n_node_map);
w->n_response_id = response_id;
bail:
return w;
@@ -509,7 +510,7 @@
LOG_ENTRY();
- w = ocfs2_new_net_wait_ctxt(osb, response_id);
+ w = ocfs2_new_net_wait_ctxt(response_id);
if (!w) {
status = -ENOMEM;
LOG_ERROR_STATUS(status);
Modified: branches/dlm-reco-mig/fs/ocfs2/vote.h
===================================================================
--- branches/dlm-reco-mig/fs/ocfs2/vote.h 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/ocfs2/vote.h 2005-03-04 22:21:29 UTC (rev 1939)
@@ -30,7 +30,11 @@
int ocfs2_vote_thread(void *arg);
static inline void ocfs2_kick_vote_thread(ocfs_super *osb)
{
- atomic_set(&osb->wake_vote_task, 1);
+ spin_lock(&osb->vote_task_lock);
+ /* make sure the voting thread gets a swipe at whatever changes
+ * the caller may have made to the voting state */
+ osb->vote_wake_sequence++;
+ spin_unlock(&osb->vote_task_lock);
wake_up(&osb->vote_event);
}
Modified: branches/dlm-reco-mig/fs/usysfs/dir.c
===================================================================
--- branches/dlm-reco-mig/fs/usysfs/dir.c 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/usysfs/dir.c 2005-03-04 22:21:29 UTC (rev 1939)
@@ -673,29 +673,27 @@
if (!dentry)
goto out_release;
+ d_add(dentry, NULL);
+
err = usysfs_create_dir(&k->kobj, dentry);
- if (err)
- goto out_dput;
+ if (!err) {
+ err = populate_dir(&k->kobj);
+ if (!err)
+ dentry = NULL;
+ else {
+ usysfs_remove_dir(&k->kobj);
+ d_delete(dentry);
+ }
+ }
- d_rehash(dentry);
up(&usysfs_sb->s_root->d_inode->i_sem);
- err = populate_dir(&k->kobj);
- if (err) {
- down(&usysfs_sb->s_root->d_inode->i_sem);
- usysfs_remove_dir(&k->kobj);
- up(&usysfs_sb->s_root->d_inode->i_sem);
- d_delete(dentry);
- goto out_dput;
+ if (dentry) {
+ dput(dentry);
+out_release:
+ usysfs_release_fs();
}
- return 0;
-
-out_dput:
- dput(dentry);
-out_release:
- usysfs_release_fs();
-
return err;
}
Modified: branches/dlm-reco-mig/fs/usysfs/symlink.c
===================================================================
--- branches/dlm-reco-mig/fs/usysfs/symlink.c 2005-03-04 20:26:31 UTC (rev 1938)
+++ branches/dlm-reco-mig/fs/usysfs/symlink.c 2005-03-04 22:21:29 UTC (rev 1939)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
* symlink.c - operations for usysfs symlinks.
*/
@@ -95,6 +97,53 @@
}
+int usysfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+{
+ int ret;
+ struct nameidata nd;
+ struct kobject *parent_kobj;
+ struct kobject *target_kobj;
+ struct ukobj_type *uktype;
+
+ if (dentry->d_parent == usysfs_sb->s_root)
+ return -EPERM;
+
+ parent_kobj = usysfs_get_kobject(dentry->d_parent);
+ uktype = to_uktype(parent_kobj->ktype);
+
+ if (!uktype || !uktype->allow_link) {
+ kobject_put(parent_kobj);
+ return -EPERM; /* What lack-of-symlink returns */
+ }
+
+ ret = path_lookup(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
+ if (ret) {
+ kobject_put(parent_kobj);
+ return ret;
+ }
+
+ ret = -ENOENT;
+ target_kobj = usysfs_get_kobject(nd.dentry);
+ if (!target_kobj)
+ goto out_release;
+
+ ret = uktype->allow_link(parent_kobj, target_kobj);
+ if (ret)
+ goto out_target;
+
+ return 0;
+
+out_target:
+ kobject_put(target_kobj);
+
+out_release:
+ path_release(&nd);
+
+ kobject_put(parent_kobj);
+
+ return ret;
+}
+
/**
* usysfs_remove_link - remove symlink in object's directory.
* @kobj: object we're acting for.
Copied: branches/dlm-reco-mig/kapi-compat/include/journal_access.h (from rev 1938, trunk/kapi-compat/include/journal_access.h)
More information about the Ocfs2-commits
mailing list