Subject: [PATCH] NFS: getdents(3) hints Category: Scalability enhancement Description: When an application invokes getdents(3) on a directory stored in NFS, the directory cache logic always searches from the beginning of the directory to find the cookie in question. For large directories, this is significant overhead, and means that a single walk through the directory using getdents(3) calls can be more than O(n!). This patch adds a page index hint to the directory search algorithm so that getdents(3) can start where it left off, rather than walking the entire directory from the beginning each time it is called. Test-plan: Connectathon, "rm -rf" on a large directory tree; tar and untar. fs/nfs/dir.c | 57 ++++++++++++++++++++++++++++++++++++++++++--------- 1 files changed, 47 insertions(+), 10 deletions(-) Signed-off-by: Chuck Lever Applies-to: 2.6.8.1 Created: Tue, 17 Aug 2004 23:42:00 -0400 System-ID: Linux climax.citi.umich.edu 2.6.8.1 #1 SMP Tue Aug 17 15:53:50 EDT 2004 i386 diff -X /home/cel/src/linux/dont-diff -Naurp 09-rpc_call_sync/fs/nfs/dir.c 20-nfs_readdir-hint/fs/nfs/dir.c --- 09-rpc_call_sync/fs/nfs/dir.c 2004-08-16 13:23:35.986904000 -0400 +++ 20-nfs_readdir-hint/fs/nfs/dir.c 2004-08-17 13:41:24.197165000 -0400 @@ -104,6 +104,7 @@ nfs_opendir(struct inode *inode, struct /* Call generic open code in order to cache credentials */ if (!res) res = nfs_open(inode, filp); + filp->f_ra.prev_page = 0; unlock_kernel(); return res; } @@ -417,6 +418,7 @@ int uncached_readdir(nfs_readdir_descrip */ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { + int res, pass = 0; struct dentry *dentry = filp->f_dentry; struct inode *inode = dentry->d_inode; nfs_readdir_descriptor_t my_desc, @@ -424,22 +426,35 @@ static int nfs_readdir(struct file *filp struct nfs_entry my_entry; struct nfs_fh fh; struct nfs_fattr fattr; - long res; lock_kernel(); res = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (res < 0) { unlock_kernel(); + dfprintk(VFS, "NFS: nfs_readdir() revalidation failed, error %d\n", + res); return res; } + dfprintk(VFS, "NFS: nfs_readdir(%s/%s) starting at cookie %Lu\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + (long long)filp->f_pos); + /* - * filp->f_pos points to the file offset in the page cache. - * but if the cache has meanwhile been zapped, we need to - * read from the last dirent to revalidate f_pos - * itself. + * Restore our hint about which page to start searching. If we're + * already starting at page zero, use only one search pass. */ + desc->page_index = filp->f_ra.prev_page; + if (desc->page_index == 0) + pass = 1; + + /* + * filp->f_pos contains the cookie where the last search ended. + * If the cache has meanwhile been zapped, we need to read from the + * last dirent to revalidate f_pos itself. + */ + retry: memset(desc, 0, sizeof(*desc)); desc->file = filp; @@ -454,6 +469,7 @@ static int nfs_readdir(struct file *filp desc->entry = &my_entry; while(!desc->entry->eof) { + filp->f_ra.prev_page = 0; res = readdir_search_pagecache(desc); if (res == -EBADCOOKIE) { /* This means either end of directory */ @@ -473,21 +489,42 @@ static int nfs_readdir(struct file *filp desc->entry->eof = 0; continue; } - if (res < 0) + if (res < 0) { + /* We didn't find desc->target near the hint, + * so try again from the top of the directory */ + if (!pass) { + dfprintk(VFS, "NFS: nfs_readdir: retrying from the top\n"); + desc->page_index = 0; + desc->entry->eof = 0; + pass = 1; + goto retry; + } + /* We didn't find desc->target at all. */ break; + } res = nfs_do_filldir(desc, dirent, filldir); + if (res == -EAGAIN) { + /* Save a hint so we avoid rescanning old entries the next + * time we're called */ + filp->f_ra.prev_page = (desc->page_index ? desc->page_index - 1: 0); + dfprintk(VFS, "NFS: nfs_readdir: saving hint %lu\n", + filp->f_ra.prev_page); + } if (res < 0) { res = 0; break; } } unlock_kernel(); + if (desc->error < 0) - return desc->error; - if (res < 0) - return res; - return 0; + res = desc->error; + if (res > 0) + res = 0; + dfprintk(VFS, "NFS: nfs_readdir(%s/%s) returned %d\n", + dentry->d_parent->d_name.name, dentry->d_name.name, res); + return res; } /*