nfsd: merge cookie collision fixes from ext4 tree

These changes fix readdir loops on ext4 filesystems with dir_index
turned on.  I'm pulling them from Ted's tree as I'd like to give them
some extra nfsd testing, and expect to be applying (potentially
conflicting) patches to the same code before the next merge window.

From the nfs-ext4-premerge branch of

	git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
This commit is contained in:
J. Bruce Fields 2012-03-19 12:34:39 -04:00
commit 62b9510cb3
6 changed files with 205 additions and 58 deletions

View File

@ -32,24 +32,8 @@ static unsigned char ext4_filetype_table[] = {
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
};
static int ext4_readdir(struct file *, void *, filldir_t);
static int ext4_dx_readdir(struct file *filp,
void *dirent, filldir_t filldir);
static int ext4_release_dir(struct inode *inode,
struct file *filp);
const struct file_operations ext4_dir_operations = {
.llseek = ext4_llseek,
.read = generic_read_dir,
.readdir = ext4_readdir, /* we take BKL. needed?*/
.unlocked_ioctl = ext4_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ext4_compat_ioctl,
#endif
.fsync = ext4_sync_file,
.release = ext4_release_dir,
};
static unsigned char get_dtype(struct super_block *sb, int filetype)
{
@ -60,6 +44,26 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
return (ext4_filetype_table[filetype]);
}
/**
* Check if the given dir-inode refers to an htree-indexed directory
* (or a directory which chould potentially get coverted to use htree
* indexing).
*
* Return 1 if it is a dx dir, 0 if not
*/
static int is_dx_dir(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
EXT4_FEATURE_COMPAT_DIR_INDEX) &&
((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
((inode->i_size >> sb->s_blocksize_bits) == 1)))
return 1;
return 0;
}
/*
* Return 0 if the directory entry is OK, and 1 if there is a problem
*
@ -115,18 +119,13 @@ static int ext4_readdir(struct file *filp,
unsigned int offset;
int i, stored;
struct ext4_dir_entry_2 *de;
struct super_block *sb;
int err;
struct inode *inode = filp->f_path.dentry->d_inode;
struct super_block *sb = inode->i_sb;
int ret = 0;
int dir_has_error = 0;
sb = inode->i_sb;
if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
EXT4_FEATURE_COMPAT_DIR_INDEX) &&
((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
((inode->i_size >> sb->s_blocksize_bits) == 1))) {
if (is_dx_dir(inode)) {
err = ext4_dx_readdir(filp, dirent, filldir);
if (err != ERR_BAD_DX_DIR) {
ret = err;
@ -254,22 +253,134 @@ out:
return ret;
}
static inline int is_32bit_api(void)
{
#ifdef CONFIG_COMPAT
return is_compat_task();
#else
return (BITS_PER_LONG == 32);
#endif
}
/*
* These functions convert from the major/minor hash to an f_pos
* value.
* value for dx directories
*
* Currently we only use major hash numer. This is unfortunate, but
* on 32-bit machines, the same VFS interface is used for lseek and
* llseek, so if we use the 64 bit offset, then the 32-bit versions of
* lseek/telldir/seekdir will blow out spectacularly, and from within
* the ext2 low-level routine, we don't know if we're being called by
* a 64-bit version of the system call or the 32-bit version of the
* system call. Worse yet, NFSv2 only allows for a 32-bit readdir
* cookie. Sigh.
* Upper layer (for example NFS) should specify FMODE_32BITHASH or
* FMODE_64BITHASH explicitly. On the other hand, we allow ext4 to be mounted
* directly on both 32-bit and 64-bit nodes, under such case, neither
* FMODE_32BITHASH nor FMODE_64BITHASH is specified.
*/
#define hash2pos(major, minor) (major >> 1)
#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff)
#define pos2min_hash(pos) (0)
static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor)
{
if ((filp->f_mode & FMODE_32BITHASH) ||
(!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
return major >> 1;
else
return ((__u64)(major >> 1) << 32) | (__u64)minor;
}
static inline __u32 pos2maj_hash(struct file *filp, loff_t pos)
{
if ((filp->f_mode & FMODE_32BITHASH) ||
(!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
return (pos << 1) & 0xffffffff;
else
return ((pos >> 32) << 1) & 0xffffffff;
}
static inline __u32 pos2min_hash(struct file *filp, loff_t pos)
{
if ((filp->f_mode & FMODE_32BITHASH) ||
(!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
return 0;
else
return pos & 0xffffffff;
}
/*
* Return 32- or 64-bit end-of-file for dx directories
*/
static inline loff_t ext4_get_htree_eof(struct file *filp)
{
if ((filp->f_mode & FMODE_32BITHASH) ||
(!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
return EXT4_HTREE_EOF_32BIT;
else
return EXT4_HTREE_EOF_64BIT;
}
/*
* ext4_dir_llseek() based on generic_file_llseek() to handle both
* non-htree and htree directories, where the "offset" is in terms
* of the filename hash value instead of the byte offset.
*
* NOTE: offsets obtained *before* ext4_set_inode_flag(dir, EXT4_INODE_INDEX)
* will be invalid once the directory was converted into a dx directory
*/
loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin)
{
struct inode *inode = file->f_mapping->host;
loff_t ret = -EINVAL;
int dx_dir = is_dx_dir(inode);
mutex_lock(&inode->i_mutex);
/* NOTE: relative offsets with dx directories might not work
* as expected, as it is difficult to figure out the
* correct offset between dx hashes */
switch (origin) {
case SEEK_END:
if (unlikely(offset > 0))
goto out_err; /* not supported for directories */
/* so only negative offsets are left, does that have a
* meaning for directories at all? */
if (dx_dir)
offset += ext4_get_htree_eof(file);
else
offset += inode->i_size;
break;
case SEEK_CUR:
/*
* Here we special-case the lseek(fd, 0, SEEK_CUR)
* position-querying operation. Avoid rewriting the "same"
* f_pos value back to the file because a concurrent read(),
* write() or lseek() might have altered it
*/
if (offset == 0) {
offset = file->f_pos;
goto out_ok;
}
offset += file->f_pos;
break;
}
if (unlikely(offset < 0))
goto out_err;
if (!dx_dir) {
if (offset > inode->i_sb->s_maxbytes)
goto out_err;
} else if (offset > ext4_get_htree_eof(file))
goto out_err;
/* Special lock needed here? */
if (offset != file->f_pos) {
file->f_pos = offset;
file->f_version = 0;
}
out_ok:
ret = offset;
out_err:
mutex_unlock(&inode->i_mutex);
return ret;
}
/*
* This structure holds the nodes of the red-black tree used to store
@ -330,15 +441,16 @@ static void free_rb_tree_fname(struct rb_root *root)
}
static struct dir_private_info *ext4_htree_create_dir_info(loff_t pos)
static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp,
loff_t pos)
{
struct dir_private_info *p;
p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
if (!p)
return NULL;
p->curr_hash = pos2maj_hash(pos);
p->curr_minor_hash = pos2min_hash(pos);
p->curr_hash = pos2maj_hash(filp, pos);
p->curr_minor_hash = pos2min_hash(filp, pos);
return p;
}
@ -429,7 +541,7 @@ static int call_filldir(struct file *filp, void *dirent,
"null fname?!?\n");
return 0;
}
curr_pos = hash2pos(fname->hash, fname->minor_hash);
curr_pos = hash2pos(filp, fname->hash, fname->minor_hash);
while (fname) {
error = filldir(dirent, fname->name,
fname->name_len, curr_pos,
@ -454,13 +566,13 @@ static int ext4_dx_readdir(struct file *filp,
int ret;
if (!info) {
info = ext4_htree_create_dir_info(filp->f_pos);
info = ext4_htree_create_dir_info(filp, filp->f_pos);
if (!info)
return -ENOMEM;
filp->private_data = info;
}
if (filp->f_pos == EXT4_HTREE_EOF)
if (filp->f_pos == ext4_get_htree_eof(filp))
return 0; /* EOF */
/* Some one has messed with f_pos; reset the world */
@ -468,8 +580,8 @@ static int ext4_dx_readdir(struct file *filp,
free_rb_tree_fname(&info->root);
info->curr_node = NULL;
info->extra_fname = NULL;
info->curr_hash = pos2maj_hash(filp->f_pos);
info->curr_minor_hash = pos2min_hash(filp->f_pos);
info->curr_hash = pos2maj_hash(filp, filp->f_pos);
info->curr_minor_hash = pos2min_hash(filp, filp->f_pos);
}
/*
@ -501,7 +613,7 @@ static int ext4_dx_readdir(struct file *filp,
if (ret < 0)
return ret;
if (ret == 0) {
filp->f_pos = EXT4_HTREE_EOF;
filp->f_pos = ext4_get_htree_eof(filp);
break;
}
info->curr_node = rb_first(&info->root);
@ -521,7 +633,7 @@ static int ext4_dx_readdir(struct file *filp,
info->curr_minor_hash = fname->minor_hash;
} else {
if (info->next_hash == ~0) {
filp->f_pos = EXT4_HTREE_EOF;
filp->f_pos = ext4_get_htree_eof(filp);
break;
}
info->curr_hash = info->next_hash;
@ -540,3 +652,15 @@ static int ext4_release_dir(struct inode *inode, struct file *filp)
return 0;
}
const struct file_operations ext4_dir_operations = {
.llseek = ext4_dir_llseek,
.read = generic_read_dir,
.readdir = ext4_readdir,
.unlocked_ioctl = ext4_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ext4_compat_ioctl,
#endif
.fsync = ext4_sync_file,
.release = ext4_release_dir,
};

View File

@ -1612,7 +1612,11 @@ struct dx_hash_info
u32 *seed;
};
#define EXT4_HTREE_EOF 0x7fffffff
/* 32 and 64 bit signed EOF for dx directories */
#define EXT4_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1)
#define EXT4_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1)
/*
* Control parameters used by ext4_htree_next_block

View File

@ -200,8 +200,8 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
return -1;
}
hash = hash & ~1;
if (hash == (EXT4_HTREE_EOF << 1))
hash = (EXT4_HTREE_EOF-1) << 1;
if (hash == (EXT4_HTREE_EOF_32BIT << 1))
hash = (EXT4_HTREE_EOF_32BIT - 1) << 1;
hinfo->hash = hash;
hinfo->minor_hash = minor_hash;
return 0;

View File

@ -737,12 +737,13 @@ static int nfsd_open_break_lease(struct inode *inode, int access)
/*
* Open an existing file or directory.
* The access argument indicates the type of open (read/write/lock)
* The may_flags argument indicates the type of open (read/write/lock)
* and additional flags.
* N.B. After this call fhp needs an fh_put
*/
__be32
nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
int access, struct file **filp)
int may_flags, struct file **filp)
{
struct dentry *dentry;
struct inode *inode;
@ -757,7 +758,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
* and (hopefully) checked permission - so allow OWNER_OVERRIDE
* in case a chmod has now revoked permission.
*/
err = fh_verify(rqstp, fhp, type, access | NFSD_MAY_OWNER_OVERRIDE);
err = fh_verify(rqstp, fhp, type, may_flags | NFSD_MAY_OWNER_OVERRIDE);
if (err)
goto out;
@ -768,7 +769,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
* or any access when mandatory locking enabled
*/
err = nfserr_perm;
if (IS_APPEND(inode) && (access & NFSD_MAY_WRITE))
if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE))
goto out;
/*
* We must ignore files (but only files) which might have mandatory
@ -781,12 +782,12 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
if (!inode->i_fop)
goto out;
host_err = nfsd_open_break_lease(inode, access);
host_err = nfsd_open_break_lease(inode, may_flags);
if (host_err) /* NOMEM or WOULDBLOCK */
goto out_nfserr;
if (access & NFSD_MAY_WRITE) {
if (access & NFSD_MAY_READ)
if (may_flags & NFSD_MAY_WRITE) {
if (may_flags & NFSD_MAY_READ)
flags = O_RDWR|O_LARGEFILE;
else
flags = O_WRONLY|O_LARGEFILE;
@ -795,8 +796,15 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
flags, current_cred());
if (IS_ERR(*filp))
host_err = PTR_ERR(*filp);
else
host_err = ima_file_check(*filp, access);
else {
host_err = ima_file_check(*filp, may_flags);
if (may_flags & NFSD_MAY_64BIT_COOKIE)
(*filp)->f_mode |= FMODE_64BITHASH;
else
(*filp)->f_mode |= FMODE_32BITHASH;
}
out_nfserr:
err = nfserrno(host_err);
out:
@ -2020,8 +2028,13 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
__be32 err;
struct file *file;
loff_t offset = *offsetp;
int may_flags = NFSD_MAY_READ;
err = nfsd_open(rqstp, fhp, S_IFDIR, NFSD_MAY_READ, &file);
/* NFSv2 only supports 32 bit cookies */
if (rqstp->rq_vers > 2)
may_flags |= NFSD_MAY_64BIT_COOKIE;
err = nfsd_open(rqstp, fhp, S_IFDIR, may_flags, &file);
if (err)
goto out;

View File

@ -27,6 +27,8 @@
#define NFSD_MAY_BYPASS_GSS 0x400
#define NFSD_MAY_READ_IF_EXEC 0x800
#define NFSD_MAY_64BIT_COOKIE 0x1000 /* 64 bit readdir cookies for >= NFSv3 */
#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)

View File

@ -92,6 +92,10 @@ struct inodes_stat_t {
/* File is opened using open(.., 3, ..) and is writeable only for ioctls
(specialy hack for floppy.c) */
#define FMODE_WRITE_IOCTL ((__force fmode_t)0x100)
/* 32bit hashes as llseek() offset (for directories) */
#define FMODE_32BITHASH ((__force fmode_t)0x200)
/* 64bit hashes as llseek() offset (for directories) */
#define FMODE_64BITHASH ((__force fmode_t)0x400)
/*
* Don't update ctime and mtime.