diff -X /home/cel/src/linux/dont-diff -Naurp 00-stock/fs/nfs/direct.c 16-read-eof/fs/nfs/direct.c --- 00-stock/fs/nfs/direct.c 2004-02-17 22:59:23.000000000 -0500 +++ 16-read-eof/fs/nfs/direct.c 2004-02-20 17:17:12.000000000 -0500 @@ -429,9 +429,7 @@ nfs_direct_IO(int rw, struct kiocb *iocb if (!is_sync_kiocb(iocb)) goto out; - result = nfs_revalidate_inode(NFS_SERVER(inode), inode); - if (result < 0) - goto out; + up(&inode->i_sem); switch (rw) { case READ: @@ -452,6 +450,8 @@ nfs_direct_IO(int rw, struct kiocb *iocb break; } + down(&inode->i_sem); + out: dprintk("NFS: direct_IO result=%d\n", result); return result; diff -X /home/cel/src/linux/dont-diff -Naurp 00-stock/fs/nfs/inode.c 16-read-eof/fs/nfs/inode.c --- 00-stock/fs/nfs/inode.c 2004-02-17 22:59:06.000000000 -0500 +++ 16-read-eof/fs/nfs/inode.c 2004-02-20 17:18:35.000000000 -0500 @@ -47,11 +47,8 @@ * their needs. People that do NFS over a slow network, might for * instance want to reduce it to something closer to 1 for improved * interactive response. - * - * For the moment, though, we instead set it to RPC_MAXREQS, which - * is the maximum number of simultaneous RPC requests on the wire. */ -#define NFS_MAX_READAHEAD RPC_MAXREQS +#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) void nfs_zap_caches(struct inode *); static void nfs_invalidate_inode(struct inode *); diff -X /home/cel/src/linux/dont-diff -Naurp 00-stock/fs/nfs/nfs3proc.c 16-read-eof/fs/nfs/nfs3proc.c --- 00-stock/fs/nfs/nfs3proc.c 2004-02-17 22:57:56.000000000 -0500 +++ 16-read-eof/fs/nfs/nfs3proc.c 2004-02-20 17:21:09.000000000 -0500 @@ -739,11 +739,10 @@ nfs3_read_done(struct rpc_task *task) } static void -nfs3_proc_read_setup(struct nfs_read_data *data, unsigned int count) +nfs3_proc_read_setup(struct nfs_read_data *data) { struct rpc_task *task = &data->task; struct inode *inode = data->inode; - struct nfs_page *req; int flags; struct rpc_message msg = { .rpc_proc = &nfs3_procedures[NFS3PROC_READ], @@ -751,17 +750,7 @@ nfs3_proc_read_setup(struct nfs_read_dat .rpc_resp = &data->res, .rpc_cred = data->cred, }; - - req = nfs_list_entry(data->pages.next); - data->args.fh = NFS_FH(inode); - data->args.offset = req_offset(req); - data->args.pgbase = req->wb_pgbase; - data->args.pages = data->pagevec; - data->args.count = count; - data->res.fattr = &data->fattr; - data->res.count = count; - data->res.eof = 0; - + /* N.B. Do we need to test? Never called for swapfile inode */ flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); @@ -787,11 +776,10 @@ nfs3_write_done(struct rpc_task *task) } static void -nfs3_proc_write_setup(struct nfs_write_data *data, unsigned int count, int how) +nfs3_proc_write_setup(struct nfs_write_data *data, int how) { struct rpc_task *task = &data->task; struct inode *inode = data->inode; - struct nfs_page *req; int stable; int flags; struct rpc_message msg = { @@ -808,17 +796,7 @@ nfs3_proc_write_setup(struct nfs_write_d stable = NFS_DATA_SYNC; } else stable = NFS_UNSTABLE; - - req = nfs_list_entry(data->pages.next); - data->args.fh = NFS_FH(inode); - data->args.offset = req_offset(req); - data->args.pgbase = req->wb_pgbase; - data->args.count = count; data->args.stable = stable; - data->args.pages = data->pagevec; - data->res.fattr = &data->fattr; - data->res.count = count; - data->res.verf = &data->verf; /* Set the initial flags for the task. */ flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; @@ -845,7 +823,7 @@ nfs3_commit_done(struct rpc_task *task) } static void -nfs3_proc_commit_setup(struct nfs_write_data *data, u64 start, u32 len, int how) +nfs3_proc_commit_setup(struct nfs_write_data *data, int how) { struct rpc_task *task = &data->task; struct inode *inode = data->inode; @@ -857,13 +835,6 @@ nfs3_proc_commit_setup(struct nfs_write_ .rpc_cred = data->cred, }; - data->args.fh = NFS_FH(data->inode); - data->args.offset = start; - data->args.count = len; - data->res.count = len; - data->res.fattr = &data->fattr; - data->res.verf = &data->verf; - /* Set the initial flags for the task. */ flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; diff -X /home/cel/src/linux/dont-diff -Naurp 00-stock/fs/nfs/nfs4proc.c 16-read-eof/fs/nfs/nfs4proc.c --- 00-stock/fs/nfs/nfs4proc.c 2004-02-17 22:58:53.000000000 -0500 +++ 16-read-eof/fs/nfs/nfs4proc.c 2004-02-20 17:21:09.000000000 -0500 @@ -1535,7 +1535,7 @@ nfs4_read_done(struct rpc_task *task) } static void -nfs4_proc_read_setup(struct nfs_read_data *data, unsigned int count) +nfs4_proc_read_setup(struct nfs_read_data *data) { struct rpc_task *task = &data->task; struct rpc_message msg = { @@ -1548,14 +1548,6 @@ nfs4_proc_read_setup(struct nfs_read_dat struct nfs_page *req = nfs_list_entry(data->pages.next); int flags; - data->args.fh = NFS_FH(inode); - data->args.offset = req_offset(req); - data->args.pgbase = req->wb_pgbase; - data->args.pages = data->pagevec; - data->args.count = count; - data->res.fattr = &data->fattr; - data->res.count = count; - data->res.eof = 0; data->timestamp = jiffies; data->lockowner = req->wb_lockowner; @@ -1623,7 +1615,7 @@ nfs4_write_done(struct rpc_task *task) } static void -nfs4_proc_write_setup(struct nfs_write_data *data, unsigned int count, int how) +nfs4_proc_write_setup(struct nfs_write_data *data, int how) { struct rpc_task *task = &data->task; struct rpc_message msg = { @@ -1644,16 +1636,8 @@ nfs4_proc_write_setup(struct nfs_write_d stable = NFS_DATA_SYNC; } else stable = NFS_UNSTABLE; - - data->args.fh = NFS_FH(inode); - data->args.offset = req_offset(req); - data->args.pgbase = req->wb_pgbase; - data->args.count = count; data->args.stable = stable; - data->args.pages = data->pagevec; - data->res.fattr = &data->fattr; - data->res.count = count; - data->res.verf = &data->verf; + data->timestamp = jiffies; data->lockowner = req->wb_lockowner; @@ -1690,7 +1674,7 @@ nfs4_commit_done(struct rpc_task *task) } static void -nfs4_proc_commit_setup(struct nfs_write_data *data, u64 start, u32 len, int how) +nfs4_proc_commit_setup(struct nfs_write_data *data, int how) { struct rpc_task *task = &data->task; struct rpc_message msg = { @@ -1702,13 +1686,6 @@ nfs4_proc_commit_setup(struct nfs_write_ struct inode *inode = data->inode; int flags; - data->args.fh = NFS_FH(data->inode); - data->args.offset = start; - data->args.count = len; - data->res.count = len; - data->res.fattr = &data->fattr; - data->res.verf = &data->verf; - /* Set the initial flags for the task. */ flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; diff -X /home/cel/src/linux/dont-diff -Naurp 00-stock/fs/nfs/pagelist.c 16-read-eof/fs/nfs/pagelist.c --- 00-stock/fs/nfs/pagelist.c 2004-02-17 22:59:27.000000000 -0500 +++ 16-read-eof/fs/nfs/pagelist.c 2004-02-20 17:26:40.000000000 -0500 @@ -88,6 +88,7 @@ nfs_create_request(struct file *file, st * long write-back delay. This will be adjusted in * update_nfs_request below if the region is not locked. */ req->wb_page = page; + atomic_set(&req->wb_complete, 0); req->wb_index = page->index; page_cache_get(page); req->wb_offset = offset; diff -X /home/cel/src/linux/dont-diff -Naurp 00-stock/fs/nfs/proc.c 16-read-eof/fs/nfs/proc.c --- 00-stock/fs/nfs/proc.c 2004-02-17 22:59:29.000000000 -0500 +++ 16-read-eof/fs/nfs/proc.c 2004-02-20 17:21:09.000000000 -0500 @@ -543,11 +543,10 @@ nfs_read_done(struct rpc_task *task) } static void -nfs_proc_read_setup(struct nfs_read_data *data, unsigned int count) +nfs_proc_read_setup(struct nfs_read_data *data) { struct rpc_task *task = &data->task; struct inode *inode = data->inode; - struct nfs_page *req; int flags; struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_READ], @@ -555,17 +554,7 @@ nfs_proc_read_setup(struct nfs_read_data .rpc_resp = &data->res, .rpc_cred = data->cred, }; - - req = nfs_list_entry(data->pages.next); - data->args.fh = NFS_FH(inode); - data->args.offset = req_offset(req); - data->args.pgbase = req->wb_pgbase; - data->args.pages = data->pagevec; - data->args.count = count; - data->res.fattr = &data->fattr; - data->res.count = count; - data->res.eof = 0; - + /* N.B. Do we need to test? Never called for swapfile inode */ flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); @@ -589,11 +578,10 @@ nfs_write_done(struct rpc_task *task) } static void -nfs_proc_write_setup(struct nfs_write_data *data, unsigned int count, int how) +nfs_proc_write_setup(struct nfs_write_data *data, int how) { struct rpc_task *task = &data->task; struct inode *inode = data->inode; - struct nfs_page *req; int flags; struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_WRITE], @@ -603,17 +591,7 @@ nfs_proc_write_setup(struct nfs_write_da }; /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ - - req = nfs_list_entry(data->pages.next); - data->args.fh = NFS_FH(inode); - data->args.offset = req_offset(req); - data->args.pgbase = req->wb_pgbase; - data->args.count = count; data->args.stable = NFS_FILE_SYNC; - data->args.pages = data->pagevec; - data->res.fattr = &data->fattr; - data->res.count = count; - data->res.verf = &data->verf; /* Set the initial flags for the task. */ flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; @@ -628,7 +606,7 @@ nfs_proc_write_setup(struct nfs_write_da } static void -nfs_proc_commit_setup(struct nfs_write_data *data, u64 start, u32 len, int how) +nfs_proc_commit_setup(struct nfs_write_data *data, int how) { BUG(); } diff -X /home/cel/src/linux/dont-diff -Naurp 00-stock/fs/nfs/read.c 16-read-eof/fs/nfs/read.c --- 00-stock/fs/nfs/read.c 2004-02-17 22:57:11.000000000 -0500 +++ 16-read-eof/fs/nfs/read.c 2004-02-20 17:44:11.000000000 -0500 @@ -154,77 +154,209 @@ nfs_readpage_async(struct file *file, st return 0; } +static void +nfs_readpage_release(struct nfs_page *req) +{ + unlock_page(req->wb_page); + + nfs_clear_request(req); + nfs_release_request(req); + nfs_unlock_request(req); + + dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", + req->wb_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_inode), + req->wb_bytes, + (long long)req_offset(req)); +} + /* * Set up the NFS read request struct */ -static void -nfs_read_rpcsetup(struct list_head *head, struct nfs_read_data *data) +static inline void +nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, + unsigned int count, unsigned int offset) { struct inode *inode; - struct nfs_page *req; - struct page **pages; - unsigned int count; - pages = data->pagevec; - count = 0; - while (!list_empty(head)) { - req = nfs_list_entry(head->next); - nfs_list_remove_request(req); - nfs_list_add_request(req, &data->pages); - *pages++ = req->wb_page; - count += req->wb_bytes; - } - req = nfs_list_entry(data->pages.next); data->inode = inode = req->wb_inode; data->cred = req->wb_cred; - NFS_PROTO(inode)->read_setup(data, count); + data->args.fh = NFS_FH(inode); + data->args.offset = req_offset(req) + offset; + data->args.pgbase = req->wb_pgbase + offset; + data->args.pages = data->pagevec; + data->args.count = count; + + data->res.fattr = &data->fattr; + data->res.count = count; + data->res.eof = 0; + + NFS_PROTO(inode)->read_setup(data); - dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu.\n", + dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n", data->task.tk_pid, inode->i_sb->s_id, (long long)NFS_FILEID(inode), count, - (unsigned long long)req_offset(req)); + data->args.offset); } static void nfs_async_read_error(struct list_head *head) { struct nfs_page *req; - struct page *page; while (!list_empty(head)) { req = nfs_list_entry(head->next); - page = req->wb_page; nfs_list_remove_request(req); - SetPageError(page); - unlock_page(page); - nfs_clear_request(req); - nfs_release_request(req); - nfs_unlock_request(req); + SetPageError(req->wb_page); + nfs_readpage_release(req); + } +} + +/* + * Start an async read operation + */ +static inline void +nfs_execute_read(struct nfs_read_data *data) +{ + struct rpc_clnt *clnt = NFS_CLIENT(data->inode); + sigset_t oldset; + + rpc_clnt_sigmask(clnt, &oldset); + lock_kernel(); + rpc_execute(&data->task); + unlock_kernel(); + rpc_clnt_sigunmask(clnt, &oldset); +} + +/* + * Currently we assume we're reading the whole page + */ +static unsigned int +nfs_read_count(struct nfs_page *req, unsigned rsize) +{ + loff_t i_size = i_size_read(req->wb_inode); + unsigned long end_index = (i_size + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; + unsigned long index = req->wb_page->index + 1; + + if (index < end_index) + return PAGE_CACHE_SIZE / rsize; + + if (index == end_index) { + unsigned bytes = i_size & (PAGE_CACHE_SIZE - 1); + if (bytes) + return (bytes + rsize - 1) / rsize; + else + return PAGE_CACHE_SIZE / rsize; + } + return 0; +} + +/* + * Generate multiple requests to fill a single page. + * + * We optimize to reduce the number of read operations on the wire. If we + * detect that we're reading a page, or an area of a page, that is past the + * end of file, we do not generate NFS read operations but just clear the + * parts of the page that would have come back zero from the server anyway. + * + * We rely on the cached value of i_size to make this determination; another + * client can fill pages on the server past our cached end-of-file, but we + * won't see the new data until our attribute cache is updated. This is more + * or less conventional NFS client behavior. + */ +static int +nfs_pagein_multi(struct list_head *head, struct inode *inode) +{ + unsigned int requests, offset, rsize = NFS_SERVER(inode)->rsize; + struct nfs_page *req = nfs_list_entry(head->next); + struct page *page = req->wb_page; + struct nfs_read_data *data; + LIST_HEAD(list); + + nfs_list_remove_request(req); + + requests = nfs_read_count(req, rsize); + while (requests--) { + data = nfs_readdata_alloc(); + if (!data) + goto out_bad; + list_add(&data->pages, &list); + } + + atomic_inc(&req->wb_complete); + + offset = 0; + while (!list_empty(&list)) { + data = list_entry(list.next, struct nfs_read_data, pages); + list_del_init(&data->pages); + + data->req = req; + data->pagevec[0] = page; + atomic_inc(&req->wb_complete); + + nfs_read_rpcsetup(req, data, rsize, offset); + + nfs_execute_read(data); + + offset += rsize; + } + + if (offset < PAGE_CACHE_SIZE) + memclear_highpage_flush(page, offset, + PAGE_CACHE_SIZE - offset); + + if (atomic_dec_and_test(&req->wb_complete)) { + if (!PageError(page)) + SetPageUptodate(page); + nfs_readpage_release(req); + } + + return 0; + +out_bad: + while (!list_empty(&list)) { + data = list_entry(list.next, struct nfs_read_data, pages); + list_del_init(&data->pages); + nfs_readdata_free(data); } + SetPageError(page); + nfs_readpage_release(req); + return -ENOMEM; } static int nfs_pagein_one(struct list_head *head, struct inode *inode) { - struct rpc_clnt *clnt = NFS_CLIENT(inode); + struct nfs_page *req; + struct page **pages; struct nfs_read_data *data; - sigset_t oldset; + unsigned int count; + + if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) + return nfs_pagein_multi(head, inode); data = nfs_readdata_alloc(); if (!data) goto out_bad; - nfs_read_rpcsetup(head, data); + pages = data->pagevec; + count = 0; + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_list_add_request(req, &data->pages); + *pages++ = req->wb_page; + count += req->wb_bytes; + } + req = nfs_list_entry(data->pages.next); - /* Start the async call */ - rpc_clnt_sigmask(clnt, &oldset); - lock_kernel(); - rpc_execute(&data->task); - unlock_kernel(); - rpc_clnt_sigunmask(clnt, &oldset); + nfs_read_rpcsetup(req, data, count, 0); + + nfs_execute_read(data); return 0; out_bad: nfs_async_read_error(head); @@ -254,54 +386,87 @@ nfs_pagein_list(struct list_head *head, } /* - * This is the callback from RPC telling us whether a reply was - * received or some error occurred (timeout or socket shutdown). + * Handle a read reply that fills part of a page. */ -void -nfs_readpage_result(struct rpc_task *task) +static void +nfs_readpage_result_partial(struct nfs_read_data *data, int status) { - struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata; - unsigned int count = data->res.count; + struct nfs_page *req = data->req; + struct page *page = req->wb_page; + + if (status >= 0) { + unsigned int request = data->args.count; + unsigned int result = data->res.count; + + if (result < request) { + memclear_highpage_flush(page, + data->args.pgbase + result, + request - result); + if (!data->res.eof) + SetPageError(page); + } + } else + SetPageError(page); - dprintk("NFS: %4d nfs_readpage_result, (status %d)\n", - task->tk_pid, task->tk_status); + if (atomic_dec_and_test(&req->wb_complete)) { + if (!PageError(page)) + SetPageUptodate(page); + nfs_readpage_release(req); + } +} + +/* + * Handle a read reply that fills one or more pages at once. + */ +static void +nfs_readpage_result_full(struct nfs_read_data *data, int status) +{ + unsigned int count = data->res.count; while (!list_empty(&data->pages)) { struct nfs_page *req = nfs_list_entry(data->pages.next); struct page *page = req->wb_page; nfs_list_remove_request(req); - if (task->tk_status >= 0) { + if (status >= 0) { if (count < PAGE_CACHE_SIZE) { memclear_highpage_flush(page, req->wb_pgbase + count, req->wb_bytes - count); - + if (!data->res.eof) + SetPageError(page); count = 0; } else count -= PAGE_CACHE_SIZE; SetPageUptodate(page); } else SetPageError(page); - unlock_page(page); - - dprintk("NFS: read (%s/%Ld %d@%Ld)\n", - req->wb_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_inode), - req->wb_bytes, - (long long)req_offset(req)); - nfs_clear_request(req); - nfs_release_request(req); - nfs_unlock_request(req); + nfs_readpage_release(req); } } /* + * This is the callback from RPC telling us whether a reply was + * received or some error occurred (timeout or socket shutdown). + */ +void +nfs_readpage_result(struct rpc_task *task) +{ + struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata; + int status = task->tk_status; + + dprintk("NFS: %4d nfs_readpage_result, (status %d)\n", + task->tk_pid, status); + + if (NFS_SERVER(data->inode)->rsize >= PAGE_CACHE_SIZE) + nfs_readpage_result_full(data, status); + else + nfs_readpage_result_partial(data, status); +} + +/* * Read a page over NFS. - * We read the page synchronously in the following cases: - * - The NFS rsize is smaller than PAGE_CACHE_SIZE. We could kludge our way - * around this by creating several consecutive read requests, but - * that's hardly worth it. + * We read the page synchronously in the following case: * - The error flag is set for this page. This happens only when a * previous async read operation failed. */ @@ -324,7 +489,7 @@ nfs_readpage(struct file *file, struct p if (error) goto out_error; - if (!PageError(page) && NFS_SERVER(inode)->rsize >= PAGE_CACHE_SIZE) { + if (!PageError(page)) { error = nfs_readpage_async(file, inode, page); goto out; } @@ -346,13 +511,6 @@ struct nfs_readdesc { }; static int -readpage_sync_filler(void *data, struct page *page) -{ - struct nfs_readdesc *desc = (struct nfs_readdesc *)data; - return nfs_readpage_sync(desc->filp, page->mapping->host, page); -} - -static int readpage_async_filler(void *data, struct page *page) { struct nfs_readdesc *desc = (struct nfs_readdesc *)data; @@ -380,14 +538,16 @@ nfs_readpages(struct file *filp, struct .filp = filp, .head = &head, }; - struct nfs_server *server = NFS_SERVER(mapping->host); - int is_sync = server->rsize < PAGE_CACHE_SIZE; + struct inode *inode = mapping->host; + struct nfs_server *server = NFS_SERVER(inode); int ret; - ret = read_cache_pages(mapping, pages, - is_sync ? readpage_sync_filler : - readpage_async_filler, - &desc); + dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + nr_pages); + + ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); if (!list_empty(&head)) { int err = nfs_pagein_list(&head, server->rpages); if (!ret) diff -X /home/cel/src/linux/dont-diff -Naurp 00-stock/fs/nfs/write.c 16-read-eof/fs/nfs/write.c --- 00-stock/fs/nfs/write.c 2004-02-17 22:58:51.000000000 -0500 +++ 16-read-eof/fs/nfs/write.c 2004-02-20 17:42:41.000000000 -0500 @@ -258,8 +258,7 @@ nfs_writepage(struct page *page, struct goto out; do_it: lock_kernel(); - if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && !IS_SYNC(inode) && - inode_referenced) { + if (!IS_SYNC(inode) && inode_referenced) { err = nfs_writepage_async(NULL, inode, page, 0, offset); if (err >= 0) err = 0; @@ -682,11 +681,7 @@ nfs_updatepage(struct file *file, struct dentry->d_parent->d_name.name, dentry->d_name.name, count, (long long)(page_offset(page) +offset)); - /* - * If wsize is smaller than page size, update and write - * page synchronously. - */ - if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE || IS_SYNC(inode)) { + if (IS_SYNC(inode)) { status = nfs_writepage_sync(file, inode, page, offset, count); if (status > 0) { if (offset == 0 && status == PAGE_CACHE_SIZE) @@ -736,43 +731,145 @@ done: return status; } +static void +nfs_writepage_release(struct nfs_page *req) +{ + end_page_writeback(req->wb_page); + + if (!PageError(req->wb_page)) { + if (NFS_NEED_RESCHED(req)) { + nfs_mark_request_dirty(req); + goto out; + } else if (NFS_NEED_COMMIT(req)) { + nfs_mark_request_commit(req); + goto out; + } + } + nfs_inode_remove_request(req); + +out: + nfs_clear_commit(req); + nfs_clear_reschedule(req); + nfs_unlock_request(req); +} + /* * Set up the argument/result storage required for the RPC call. */ -static void -nfs_write_rpcsetup(struct list_head *head, struct nfs_write_data *data, int how) +static inline void +nfs_write_rpcsetup(struct nfs_page *req, struct nfs_write_data *data, + unsigned int count, unsigned int offset, int how) { struct rpc_task *task = &data->task; struct inode *inode; - struct nfs_page *req; - struct page **pages; - unsigned int count; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ - pages = data->pagevec; - count = 0; - while (!list_empty(head)) { - req = nfs_list_entry(head->next); - nfs_list_remove_request(req); - nfs_list_add_request(req, &data->pages); - SetPageWriteback(req->wb_page); - *pages++ = req->wb_page; - count += req->wb_bytes; - } - req = nfs_list_entry(data->pages.next); data->inode = inode = req->wb_inode; data->cred = req->wb_cred; - NFS_PROTO(inode)->write_setup(data, count, how); + data->args.fh = NFS_FH(inode); + data->args.offset = req_offset(req) + offset; + data->args.pgbase = req->wb_pgbase + offset; + data->args.count = count; + data->args.pages = data->pagevec; + data->res.fattr = &data->fattr; + data->res.count = count; + data->res.verf = &data->verf; + + NFS_PROTO(inode)->write_setup(data, how); dprintk("NFS: %4d initiated write call (req %s/%Ld, %u bytes @ offset %Lu)\n", task->tk_pid, inode->i_sb->s_id, (long long)NFS_FILEID(inode), count, - (unsigned long long)req_offset(req)); + data->args.offset); +} + +static inline void +nfs_execute_write(struct nfs_write_data *data) +{ + struct rpc_clnt *clnt = NFS_CLIENT(data->inode); + sigset_t oldset; + + rpc_clnt_sigmask(clnt, &oldset); + lock_kernel(); + rpc_execute(&data->task); + unlock_kernel(); + rpc_clnt_sigunmask(clnt, &oldset); +} + +/* + * Compute how many wsize write operations it will take to + * get our write request onto the server. + */ +static inline unsigned int +nfs_write_count(struct nfs_page *req, unsigned wsize) +{ + return (req->wb_bytes + wsize - 1) / wsize; +} + +/* + * Generate multiple small requests to write out a single + * contiguous dirty area on one page. + */ +static int +nfs_flush_multi(struct list_head *head, struct inode *inode, int how) +{ + struct nfs_page *req = nfs_list_entry(head->next); + struct page *page = req->wb_page; + struct nfs_write_data *data; + unsigned int requests, offset, wsize = NFS_SERVER(inode)->wsize; + int remaining = req->wb_bytes; + LIST_HEAD(list); + + nfs_list_remove_request(req); + + requests = nfs_write_count(req, wsize); + while (requests--) { + data = nfs_writedata_alloc(); + if (!data) + goto out_bad; + list_add(&data->pages, &list); + } + + atomic_inc(&req->wb_complete); + SetPageWriteback(req->wb_page); + + offset = 0; + while (!list_empty(&list)) { + data = list_entry(list.next, struct nfs_write_data, pages); + list_del_init(&data->pages); + + data->req = req; + data->pagevec[0] = page; + atomic_inc(&req->wb_complete); + + nfs_write_rpcsetup(req, data, (remaining < wsize ? + remaining : wsize), offset, how); + + nfs_execute_write(data); + + offset += wsize; + remaining -= wsize; + } + + if (atomic_dec_and_test(&req->wb_complete)) + nfs_writepage_release(req); + + return 0; + +out_bad: + while (!list_empty(&list)) { + data = list_entry(list.next, struct nfs_write_data, pages); + list_del_init(&data->pages); + nfs_writedata_free(data); + } + nfs_mark_request_dirty(req); + nfs_unlock_request(req); + return -ENOMEM; } /* @@ -786,22 +883,34 @@ nfs_write_rpcsetup(struct list_head *hea static int nfs_flush_one(struct list_head *head, struct inode *inode, int how) { - struct rpc_clnt *clnt = NFS_CLIENT(inode); + struct nfs_page *req; + struct page **pages; struct nfs_write_data *data; - sigset_t oldset; + unsigned int count; + + if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE) + return nfs_flush_multi(head, inode, how); data = nfs_writedata_alloc(); if (!data) goto out_bad; + pages = data->pagevec; + count = 0; + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_list_add_request(req, &data->pages); + SetPageWriteback(req->wb_page); + *pages++ = req->wb_page; + count += req->wb_bytes; + } + req = nfs_list_entry(data->pages.next); + /* Set up the argument struct */ - nfs_write_rpcsetup(head, data, how); + nfs_write_rpcsetup(req, data, count, 0, how); - rpc_clnt_sigmask(clnt, &oldset); - lock_kernel(); - rpc_execute(&data->task); - unlock_kernel(); - rpc_clnt_sigunmask(clnt, &oldset); + nfs_execute_write(data); return 0; out_bad: while (!list_empty(head)) { @@ -840,6 +949,100 @@ nfs_flush_list(struct list_head *head, i return error; } +/* + * Handle a write reply that flushed part of a page. + */ +static void +nfs_writeback_done_partial(struct nfs_write_data *data, int status) +{ + struct nfs_page *req = data->req; + struct page *page = req->wb_page; + + dprintk("NFS: write (%s/%Ld %d@%Ld)", + req->wb_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_inode), + req->wb_bytes, + (long long)req_offset(req)); + + if (status < 0) { + ClearPageUptodate(page); + SetPageError(page); + if (req->wb_file) + req->wb_file->f_error = status; + dprintk(", error = %d\n", status); + } else { +#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) + if (data->verf.committed < NFS_FILE_SYNC) { + if (!NFS_NEED_COMMIT(req)) { + nfs_defer_commit(req); + memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); + dprintk(" defer commit\n"); + } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) { + nfs_defer_reschedule(req); + dprintk(" server reboot detected\n"); + } + } else +#endif + dprintk(" OK\n"); + } + + if (atomic_dec_and_test(&req->wb_complete)) + nfs_writepage_release(req); +} + +/* + * Handle a write reply that flushes a whole page. + * + * FIXME: There is an inherent race with invalidate_inode_pages and + * writebacks since the page->count is kept > 1 for as long + * as the page has a write request pending. + */ +static void +nfs_writeback_done_full(struct nfs_write_data *data, int status) +{ + struct nfs_page *req; + struct page *page; + + /* Update attributes as result of writeback. */ + while (!list_empty(&data->pages)) { + req = nfs_list_entry(data->pages.next); + nfs_list_remove_request(req); + page = req->wb_page; + + dprintk("NFS: write (%s/%Ld %d@%Ld)", + req->wb_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_inode), + req->wb_bytes, + (long long)req_offset(req)); + + if (status < 0) { + ClearPageUptodate(page); + SetPageError(page); + if (req->wb_file) + req->wb_file->f_error = status; + end_page_writeback(page); + nfs_inode_remove_request(req); + dprintk(", error = %d\n", status); + goto next; + } + end_page_writeback(page); + +#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) + if (data->args.stable != NFS_UNSTABLE || data->verf.committed == NFS_FILE_SYNC) { + nfs_inode_remove_request(req); + dprintk(" OK\n"); + goto next; + } + memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); + nfs_mark_request_commit(req); + dprintk(" marked for commit\n"); +#else + nfs_inode_remove_request(req); +#endif + next: + nfs_unlock_request(req); + } +} /* * This function is called when the WRITE call is complete. @@ -850,8 +1053,6 @@ nfs_writeback_done(struct rpc_task *task struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; struct nfs_writeargs *argp = &data->args; struct nfs_writeres *resp = &data->res; - struct nfs_page *req; - struct page *page; dprintk("NFS: %4d nfs_writeback_done (status %d)\n", task->tk_pid, task->tk_status); @@ -890,50 +1091,10 @@ nfs_writeback_done(struct rpc_task *task } #endif - /* - * Update attributes as result of writeback. - * FIXME: There is an inherent race with invalidate_inode_pages and - * writebacks since the page->count is kept > 1 for as long - * as the page has a write request pending. - */ - while (!list_empty(&data->pages)) { - req = nfs_list_entry(data->pages.next); - nfs_list_remove_request(req); - page = req->wb_page; - - dprintk("NFS: write (%s/%Ld %d@%Ld)", - req->wb_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_inode), - req->wb_bytes, - (long long)req_offset(req)); - - if (task->tk_status < 0) { - ClearPageUptodate(page); - SetPageError(page); - if (req->wb_file) - req->wb_file->f_error = task->tk_status; - end_page_writeback(page); - nfs_inode_remove_request(req); - dprintk(", error = %d\n", task->tk_status); - goto next; - } - end_page_writeback(page); - -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) - if (argp->stable != NFS_UNSTABLE || data->verf.committed == NFS_FILE_SYNC) { - nfs_inode_remove_request(req); - dprintk(" OK\n"); - goto next; - } - memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); - nfs_mark_request_commit(req); - dprintk(" marked for commit\n"); -#else - nfs_inode_remove_request(req); -#endif - next: - nfs_unlock_request(req); - } + if (NFS_SERVER(data->inode)->wsize >= PAGE_CACHE_SIZE) + nfs_writeback_done_full(data, task->tk_status); + else + nfs_writeback_done_partial(data, task->tk_status); } @@ -941,7 +1102,7 @@ nfs_writeback_done(struct rpc_task *task /* * Set up the argument/result storage required for the RPC call. */ -static void +static inline void nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data, int how) { struct rpc_task *task = &data->task; @@ -971,7 +1132,14 @@ nfs_commit_rpcsetup(struct list_head *he data->inode = inode; data->cred = first->wb_cred; - NFS_PROTO(inode)->commit_setup(data, start, len, how); + data->args.fh = NFS_FH(data->inode); + data->args.offset = start; + data->args.count = len; + data->res.count = len; + data->res.fattr = &data->fattr; + data->res.verf = &data->verf; + + NFS_PROTO(inode)->commit_setup(data, how); dprintk("NFS: %4d initiated commit call\n", task->tk_pid); } @@ -982,10 +1150,8 @@ nfs_commit_rpcsetup(struct list_head *he int nfs_commit_list(struct list_head *head, int how) { - struct rpc_clnt *clnt; struct nfs_write_data *data; struct nfs_page *req; - sigset_t oldset; data = nfs_commit_alloc(); @@ -994,13 +1160,8 @@ nfs_commit_list(struct list_head *head, /* Set up the argument struct */ nfs_commit_rpcsetup(head, data, how); - clnt = NFS_CLIENT(data->inode); - rpc_clnt_sigmask(clnt, &oldset); - lock_kernel(); - rpc_execute(&data->task); - unlock_kernel(); - rpc_clnt_sigunmask(clnt, &oldset); + nfs_execute_write(data); return 0; out_bad: while (!list_empty(head)) { diff -X /home/cel/src/linux/dont-diff -Naurp 00-stock/include/linux/nfs_page.h 16-read-eof/include/linux/nfs_page.h --- 00-stock/include/linux/nfs_page.h 2004-02-17 22:57:15.000000000 -0500 +++ 16-read-eof/include/linux/nfs_page.h 2004-02-20 17:26:40.000000000 -0500 @@ -17,10 +17,14 @@ #include #include +#include + /* * Valid flags for a dirty buffer */ #define PG_BUSY 0 +#define PG_NEED_COMMIT 1 +#define PG_NEED_RESCHED 2 struct nfs_page { struct list_head wb_list, /* Defines state of page: */ @@ -31,6 +35,7 @@ struct nfs_page { struct rpc_cred *wb_cred; struct nfs4_state *wb_state; struct page *wb_page; /* page to read in/write out */ + atomic_t wb_complete; /* i/os we're waiting for */ wait_queue_head_t wb_wait; /* wait queue */ unsigned long wb_index; /* Offset >> PAGE_CACHE_SHIFT */ unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */ @@ -42,6 +47,8 @@ struct nfs_page { }; #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) +#define NFS_NEED_COMMIT(req) (test_bit(PG_NEED_COMMIT,&(req)->wb_flags)) +#define NFS_NEED_RESCHED(req) (test_bit(PG_NEED_RESCHED,&(req)->wb_flags)) extern struct nfs_page *nfs_create_request(struct file *, struct inode *, struct page *, @@ -115,6 +122,38 @@ nfs_list_remove_request(struct nfs_page req->wb_list_head = NULL; } +static inline int +nfs_defer_commit(struct nfs_page *req) +{ + if (test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) + return 0; + return 1; +} + +static inline void +nfs_clear_commit(struct nfs_page *req) +{ + smp_mb__before_clear_bit(); + clear_bit(PG_NEED_COMMIT, &req->wb_flags); + smp_mb__after_clear_bit(); +} + +static inline int +nfs_defer_reschedule(struct nfs_page *req) +{ + if (test_and_set_bit(PG_NEED_RESCHED, &req->wb_flags)) + return 0; + return 1; +} + +static inline void +nfs_clear_reschedule(struct nfs_page *req) +{ + smp_mb__before_clear_bit(); + clear_bit(PG_NEED_RESCHED, &req->wb_flags); + smp_mb__after_clear_bit(); +} + static inline struct nfs_page * nfs_list_entry(struct list_head *head) { diff -X /home/cel/src/linux/dont-diff -Naurp 00-stock/include/linux/nfs_xdr.h 16-read-eof/include/linux/nfs_xdr.h --- 00-stock/include/linux/nfs_xdr.h 2004-02-17 22:59:14.000000000 -0500 +++ 16-read-eof/include/linux/nfs_xdr.h 2004-02-20 17:26:40.000000000 -0500 @@ -656,6 +656,8 @@ struct nfs4_compound { #endif /* CONFIG_NFS_V4 */ +struct nfs_page; + struct nfs_read_data { int flags; struct rpc_task task; @@ -664,6 +666,7 @@ struct nfs_read_data { fl_owner_t lockowner; struct nfs_fattr fattr; /* fattr storage */ struct list_head pages; /* Coalesced read requests */ + struct nfs_page *req; /* multi ops per nfs_page */ struct page *pagevec[NFS_READ_MAXIOV]; struct nfs_readargs args; struct nfs_readres res; @@ -681,6 +684,7 @@ struct nfs_write_data { struct nfs_fattr fattr; struct nfs_writeverf verf; struct list_head pages; /* Coalesced requests we wish to flush */ + struct nfs_page *req; /* multi ops per nfs_page */ struct page *pagevec[NFS_WRITE_MAXIOV]; struct nfs_writeargs args; /* argument struct */ struct nfs_writeres res; /* result struct */ @@ -737,9 +741,9 @@ struct nfs_rpc_ops { int (*pathconf) (struct nfs_server *, struct nfs_fh *, struct nfs_pathconf *); u32 * (*decode_dirent)(u32 *, struct nfs_entry *, int plus); - void (*read_setup) (struct nfs_read_data *, unsigned int count); - void (*write_setup) (struct nfs_write_data *, unsigned int count, int how); - void (*commit_setup) (struct nfs_write_data *, u64 start, u32 len, int how); + void (*read_setup) (struct nfs_read_data *); + void (*write_setup) (struct nfs_write_data *, int how); + void (*commit_setup) (struct nfs_write_data *, int how); int (*file_open) (struct inode *, struct file *); int (*file_release) (struct inode *, struct file *); void (*request_init)(struct nfs_page *, struct file *); diff -X /home/cel/src/linux/dont-diff -Naurp 00-stock/include/linux/sunrpc/debug.h 16-read-eof/include/linux/sunrpc/debug.h --- 00-stock/include/linux/sunrpc/debug.h 2004-02-17 22:57:30.000000000 -0500 +++ 16-read-eof/include/linux/sunrpc/debug.h 2004-02-20 17:18:35.000000000 -0500 @@ -92,6 +92,8 @@ enum { CTL_NFSDEBUG, CTL_NFSDDEBUG, CTL_NLMDEBUG, + CTL_SLOTTABLE_UDP, + CTL_SLOTTABLE_TCP, }; #endif /* _LINUX_SUNRPC_DEBUG_H_ */ diff -X /home/cel/src/linux/dont-diff -Naurp 00-stock/include/linux/sunrpc/xprt.h 16-read-eof/include/linux/sunrpc/xprt.h --- 00-stock/include/linux/sunrpc/xprt.h 2004-02-17 22:58:49.000000000 -0500 +++ 16-read-eof/include/linux/sunrpc/xprt.h 2004-02-20 17:18:35.000000000 -0500 @@ -28,16 +28,18 @@ * * Upper procedures may check whether a request would block waiting for * a free RPC slot by using the RPC_CONGESTED() macro. - * - * Note: on machines with low memory we should probably use a smaller - * MAXREQS value: At 32 outstanding reqs with 8 megs of RAM, fragment - * reassembly will frequently run out of memory. - */ -#define RPC_MAXCONG (16) -#define RPC_MAXREQS RPC_MAXCONG -#define RPC_CWNDSCALE (256) -#define RPC_MAXCWND (RPC_MAXCONG * RPC_CWNDSCALE) + */ +extern unsigned int xprt_udp_slot_table_entries; +extern unsigned int xprt_tcp_slot_table_entries; + +#define RPC_MIN_SLOT_TABLE (2U) +#define RPC_DEF_SLOT_TABLE (16U) +#define RPC_MAX_SLOT_TABLE (128U) + +#define RPC_CWNDSHIFT (8U) +#define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT) #define RPC_INITCWND RPC_CWNDSCALE +#define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) /* Default timeout values */ @@ -147,7 +149,7 @@ struct rpc_xprt { struct rpc_wait_queue pending; /* requests in flight */ struct rpc_wait_queue backlog; /* waiting for slot */ struct rpc_rqst * free; /* free slots */ - struct rpc_rqst slot[RPC_MAXREQS]; + unsigned int max_reqs; /* total slots */ unsigned long sockstate; /* Socket state */ unsigned char shutdown : 1, /* being shut down */ nocong : 1, /* no congestion control */ diff -X /home/cel/src/linux/dont-diff -Naurp 00-stock/net/sunrpc/auth_unix.c 16-read-eof/net/sunrpc/auth_unix.c --- 00-stock/net/sunrpc/auth_unix.c 2004-02-17 22:58:26.000000000 -0500 +++ 16-read-eof/net/sunrpc/auth_unix.c 2004-02-20 17:18:00.000000000 -0500 @@ -13,6 +13,7 @@ #include #include #include +#include #define NFS_NGROUPS 16 @@ -149,19 +150,14 @@ unx_marshal(struct rpc_task *task, u32 * struct rpc_clnt *clnt = task->tk_client; struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred; u32 *base, *hold; - int i, n; + int i; *p++ = htonl(RPC_AUTH_UNIX); base = p++; *p++ = htonl(jiffies/HZ); - /* - * Copy the UTS nodename captured when the client was created. - */ - n = clnt->cl_nodelen; - *p++ = htonl(n); - memcpy(p, clnt->cl_nodename, n); - p += (n + 3) >> 2; + /* Copy the UTS nodename captured when the client was created */ + p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen); /* Note: we don't use real uid if it involves raising privilege */ if (ruid && cred->uc_puid != 0 && cred->uc_pgid != 0) { diff -X /home/cel/src/linux/dont-diff -Naurp 00-stock/net/sunrpc/clnt.c 16-read-eof/net/sunrpc/clnt.c --- 00-stock/net/sunrpc/clnt.c 2004-02-17 22:57:29.000000000 -0500 +++ 16-read-eof/net/sunrpc/clnt.c 2004-02-20 17:18:00.000000000 -0500 @@ -611,9 +611,6 @@ call_encode(struct rpc_task *task) rcvbuf->page_len = 0; rcvbuf->len = bufsiz; - /* Zero buffer so we have automatic zero-padding of opaque & string */ - memset(task->tk_buffer, 0, bufsiz); - /* Encode header and provided arguments */ encode = task->tk_msg.rpc_proc->p_encode; if (!(p = call_header(task))) { diff -X /home/cel/src/linux/dont-diff -Naurp 00-stock/net/sunrpc/sunrpc_syms.c 16-read-eof/net/sunrpc/sunrpc_syms.c --- 00-stock/net/sunrpc/sunrpc_syms.c 2004-02-17 22:59:19.000000000 -0500 +++ 16-read-eof/net/sunrpc/sunrpc_syms.c 2004-02-20 17:18:35.000000000 -0500 @@ -63,6 +63,8 @@ EXPORT_SYMBOL(rpc_mkpipe); EXPORT_SYMBOL(xprt_create_proto); EXPORT_SYMBOL(xprt_destroy); EXPORT_SYMBOL(xprt_set_timeout); +EXPORT_SYMBOL(xprt_udp_slot_table_entries); +EXPORT_SYMBOL(xprt_tcp_slot_table_entries); /* Client credential cache */ EXPORT_SYMBOL(rpcauth_register); diff -X /home/cel/src/linux/dont-diff -Naurp 00-stock/net/sunrpc/sysctl.c 16-read-eof/net/sunrpc/sysctl.c --- 00-stock/net/sunrpc/sysctl.c 2004-02-17 22:57:14.000000000 -0500 +++ 16-read-eof/net/sunrpc/sysctl.c 2004-02-20 17:18:35.000000000 -0500 @@ -1,7 +1,7 @@ /* * linux/net/sunrpc/sysctl.c * - * Sysctl interface to sunrpc module. This is for debugging only now. + * Sysctl interface to sunrpc module. * * I would prefer to register the sunrpc table below sys/net, but that's * impossible at the moment. @@ -19,6 +19,7 @@ #include #include #include +#include /* * Declare the debug flags here @@ -117,6 +118,9 @@ done: return 0; } +static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; +static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; + static ctl_table debug_table[] = { { .ctl_name = CTL_RPCDEBUG, @@ -150,6 +154,28 @@ static ctl_table debug_table[] = { .mode = 0644, .proc_handler = &proc_dodebug }, + { + .ctl_name = CTL_SLOTTABLE_UDP, + .procname = "udp_slot_table_entries", + .data = &xprt_udp_slot_table_entries, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &min_slot_table_size, + .extra2 = &max_slot_table_size + }, + { + .ctl_name = CTL_SLOTTABLE_TCP, + .procname = "tcp_slot_table_entries", + .data = &xprt_tcp_slot_table_entries, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &min_slot_table_size, + .extra2 = &max_slot_table_size + }, { .ctl_name = 0 } }; diff -X /home/cel/src/linux/dont-diff -Naurp 00-stock/net/sunrpc/xprt.c 16-read-eof/net/sunrpc/xprt.c --- 00-stock/net/sunrpc/xprt.c 2004-02-17 22:58:42.000000000 -0500 +++ 16-read-eof/net/sunrpc/xprt.c 2004-02-20 17:18:35.000000000 -0500 @@ -339,8 +339,8 @@ xprt_adjust_cwnd(struct rpc_xprt *xprt, /* The (cwnd >> 1) term makes sure * the result gets rounded properly. */ cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd; - if (cwnd > RPC_MAXCWND) - cwnd = RPC_MAXCWND; + if (cwnd > RPC_MAXCWND(xprt)) + cwnd = RPC_MAXCWND(xprt); __xprt_lock_write_next(xprt); } else if (result == -ETIMEDOUT) { cwnd >>= 1; @@ -1427,6 +1427,9 @@ xprt_set_timeout(struct rpc_timeout *to, to->to_exponential = 0; } +unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; +unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE << 2; + /* * Initialize an RPC client */ @@ -1434,21 +1437,28 @@ static struct rpc_xprt * xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) { struct rpc_xprt *xprt; + unsigned int entries, xprt_size; struct rpc_rqst *req; int i; dprintk("RPC: setting up %s transport...\n", proto == IPPROTO_UDP? "UDP" : "TCP"); - if ((xprt = kmalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) + entries = (proto == IPPROTO_TCP)? + xprt_tcp_slot_table_entries : xprt_udp_slot_table_entries; + xprt_size = sizeof(struct rpc_xprt) + sizeof(struct rpc_rqst) * entries; + + if ((xprt = kmalloc(xprt_size, GFP_KERNEL)) == NULL) return NULL; - memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */ + memset(xprt, 0, xprt_size); + xprt->free = (struct rpc_rqst *)(xprt + 1); + xprt->max_reqs = entries; xprt->addr = *ap; xprt->prot = proto; xprt->stream = (proto == IPPROTO_TCP)? 1 : 0; if (xprt->stream) { - xprt->cwnd = RPC_MAXCWND; + xprt->cwnd = RPC_MAXCWND(xprt); xprt->nocong = 1; } else xprt->cwnd = RPC_INITCWND; @@ -1476,16 +1486,18 @@ xprt_setup(int proto, struct sockaddr_in INIT_RPC_WAITQ(&xprt->backlog, "xprt_backlog"); /* initialize free list */ - for (i = 0, req = xprt->slot; i < RPC_MAXREQS-1; i++, req++) + entries--; + req = xprt->free; + for (i = 0; i < entries; i++, req++) req->rq_next = req + 1; req->rq_next = NULL; - xprt->free = xprt->slot; /* Check whether we want to use a reserved port */ xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; - dprintk("RPC: created transport %p\n", xprt); - + dprintk("RPC: created transport %p with %u slots\n", xprt, + xprt->max_reqs); + return xprt; } @@ -1566,11 +1578,11 @@ xprt_sock_setbufsize(struct rpc_xprt *xp return; if (xprt->rcvsize) { sk->sk_userlocks |= SOCK_RCVBUF_LOCK; - sk->sk_rcvbuf = xprt->rcvsize * RPC_MAXCONG * 2; + sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs * 2; } if (xprt->sndsize) { sk->sk_userlocks |= SOCK_SNDBUF_LOCK; - sk->sk_sndbuf = xprt->sndsize * RPC_MAXCONG * 2; + sk->sk_sndbuf = xprt->sndsize * xprt->max_reqs * 2; sk->sk_write_space(sk); } }