nfsd4: allow large readdirs

Currently we limit readdir results to a single page.  This can result in
a performance regression compared to NFSv3 when reading large
directories.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
This commit is contained in:
J. Bruce Fields 2014-01-20 16:37:11 -05:00
parent 32aaa62ede
commit 561f0ed498
3 changed files with 82 additions and 69 deletions

View File

@ -1500,13 +1500,14 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
u32 maxcount = svc_max_payload(rqstp);
u32 rlen = op->u.readdir.rd_maxcount;
if (rlen > PAGE_SIZE)
rlen = PAGE_SIZE;
if (rlen > maxcount)
rlen = maxcount;
return (op_encode_hdr_size + op_encode_verifier_maxsz)
* sizeof(__be32) + rlen;
return (op_encode_hdr_size + op_encode_verifier_maxsz +
XDR_QUADLEN(rlen)) * sizeof(__be32);
}
static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)

View File

@ -2575,8 +2575,8 @@ static inline int attributes_need_mount(u32 *bmval)
}
static __be32
nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
const char *name, int namlen, __be32 **p, int buflen)
nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
const char *name, int namlen)
{
struct svc_export *exp = cd->rd_fhp->fh_export;
struct dentry *dentry;
@ -2628,8 +2628,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
}
out_encode:
nfserr = nfsd4_encode_fattr_to_buf(p, buflen, NULL, exp, dentry,
cd->rd_bmval,
nfserr = nfsd4_encode_fattr(xdr, NULL, exp, dentry, cd->rd_bmval,
cd->rd_rqstp, ignore_crossmnt);
out_put:
dput(dentry);
@ -2638,9 +2637,12 @@ out_put:
}
static __be32 *
nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr)
nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr)
{
if (buflen < 6)
__be32 *p;
p = xdr_reserve_space(xdr, 6);
if (!p)
return NULL;
*p++ = htonl(2);
*p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */
@ -2657,10 +2659,13 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
{
struct readdir_cd *ccd = ccdv;
struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
int buflen;
__be32 *p = cd->buffer;
__be32 *cookiep;
struct xdr_stream *xdr = cd->xdr;
int start_offset = xdr->buf->len;
int cookie_offset;
int entry_bytes;
__be32 nfserr = nfserr_toosmall;
__be64 wire_offset;
__be32 *p;
/* In nfsv4, "." and ".." never make it onto the wire.. */
if (name && isdotent(name, namlen)) {
@ -2668,19 +2673,24 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
return 0;
}
if (cd->offset)
xdr_encode_hyper(cd->offset, (u64) offset);
if (cd->cookie_offset) {
wire_offset = cpu_to_be64(offset);
write_bytes_to_xdr_buf(xdr->buf, cd->cookie_offset,
&wire_offset, 8);
}
buflen = cd->buflen - 4 - XDR_QUADLEN(namlen);
if (buflen < 0)
p = xdr_reserve_space(xdr, 4);
if (!p)
goto fail;
*p++ = xdr_one; /* mark entry present */
cookiep = p;
cookie_offset = xdr->buf->len;
p = xdr_reserve_space(xdr, 3*4 + namlen);
if (!p)
goto fail;
p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */
p = xdr_encode_array(p, name, namlen); /* name length & name */
nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, &p, buflen);
nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen);
switch (nfserr) {
case nfs_ok:
break;
@ -2699,19 +2709,23 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
*/
if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR))
goto fail;
p = nfsd4_encode_rdattr_error(p, buflen, nfserr);
p = nfsd4_encode_rdattr_error(xdr, nfserr);
if (p == NULL) {
nfserr = nfserr_toosmall;
goto fail;
}
}
cd->buflen -= (p - cd->buffer);
cd->buffer = p;
cd->offset = cookiep;
nfserr = nfserr_toosmall;
entry_bytes = xdr->buf->len - start_offset;
if (entry_bytes > cd->rd_maxcount)
goto fail;
cd->rd_maxcount -= entry_bytes;
cd->cookie_offset = cookie_offset;
skip_entry:
cd->common.err = nfs_ok;
return 0;
fail:
xdr_truncate_encode(xdr, start_offset);
cd->common.err = nfserr;
return -EINVAL;
}
@ -3206,10 +3220,11 @@ static __be32
nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir)
{
int maxcount;
int bytes_left;
loff_t offset;
__be64 wire_offset;
struct xdr_stream *xdr = &resp->xdr;
int starting_len = xdr->buf->len;
__be32 *page, *tailbase;
__be32 *p;
if (nfserr)
@ -3219,38 +3234,38 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
if (!p)
return nfserr_resource;
if (resp->xdr.buf->page_len)
return nfserr_resource;
if (!*resp->rqstp->rq_next_page)
return nfserr_resource;
/* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
WRITE32(0);
WRITE32(0);
resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p)
- (char *)resp->xdr.buf->head[0].iov_base;
tailbase = p;
maxcount = PAGE_SIZE;
if (maxcount > readdir->rd_maxcount)
maxcount = readdir->rd_maxcount;
/*
* Convert from bytes to words, account for the two words already
* written, make sure to leave two words at the end for the next
* pointer and eof field.
* Number of bytes left for directory entries allowing for the
* final 8 bytes of the readdir and a following failed op:
*/
maxcount = (maxcount >> 2) - 4;
if (maxcount < 0) {
nfserr = nfserr_toosmall;
bytes_left = xdr->buf->buflen - xdr->buf->len
- COMPOUND_ERR_SLACK_SPACE - 8;
if (bytes_left < 0) {
nfserr = nfserr_resource;
goto err_no_verf;
}
maxcount = min_t(u32, readdir->rd_maxcount, INT_MAX);
/*
* Note the rfc defines rd_maxcount as the size of the
* READDIR4resok structure, which includes the verifier above
* and the 8 bytes encoded at the end of this function:
*/
if (maxcount < 16) {
nfserr = nfserr_toosmall;
goto err_no_verf;
}
maxcount = min_t(int, maxcount-16, bytes_left);
page = page_address(*(resp->rqstp->rq_next_page++));
readdir->xdr = xdr;
readdir->rd_maxcount = maxcount;
readdir->common.err = 0;
readdir->buflen = maxcount;
readdir->buffer = page;
readdir->offset = NULL;
readdir->cookie_offset = 0;
offset = readdir->rd_cookie;
nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp,
@ -3258,33 +3273,31 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
&readdir->common, nfsd4_encode_dirent);
if (nfserr == nfs_ok &&
readdir->common.err == nfserr_toosmall &&
readdir->buffer == page)
nfserr = nfserr_toosmall;
xdr->buf->len == starting_len + 8) {
/* nothing encoded; which limit did we hit?: */
if (maxcount - 16 < bytes_left)
/* It was the fault of rd_maxcount: */
nfserr = nfserr_toosmall;
else
/* We ran out of buffer space: */
nfserr = nfserr_resource;
}
if (nfserr)
goto err_no_verf;
if (readdir->offset)
xdr_encode_hyper(readdir->offset, offset);
if (readdir->cookie_offset) {
wire_offset = cpu_to_be64(offset);
write_bytes_to_xdr_buf(xdr->buf, readdir->cookie_offset,
&wire_offset, 8);
}
p = readdir->buffer;
p = xdr_reserve_space(xdr, 8);
if (!p) {
WARN_ON_ONCE(1);
goto err_no_verf;
}
*p++ = 0; /* no more entries */
*p++ = htonl(readdir->common.err == nfserr_eof);
resp->xdr.buf->page_len = ((char *)p) -
(char*)page_address(*(resp->rqstp->rq_next_page-1));
xdr->buf->len += xdr->buf->page_len;
xdr->iov = xdr->buf->tail;
xdr->page_ptr++;
xdr->buf->buflen -= PAGE_SIZE;
xdr->iov = xdr->buf->tail;
/* Use rest of head for padding and remaining ops: */
resp->xdr.buf->tail[0].iov_base = tailbase;
resp->xdr.buf->tail[0].iov_len = 0;
resp->xdr.p = resp->xdr.buf->tail[0].iov_base;
resp->xdr.end = resp->xdr.p +
(PAGE_SIZE - resp->xdr.buf->head[0].iov_len)/4;
return 0;
err_no_verf:

View File

@ -287,9 +287,8 @@ struct nfsd4_readdir {
struct svc_fh * rd_fhp; /* response */
struct readdir_cd common;
__be32 * buffer;
int buflen;
__be32 * offset;
struct xdr_stream *xdr;
int cookie_offset;
};
struct nfsd4_release_lockowner {