From fe6e9c1f25ac01f848bd084ee0ee62a5a0966ff3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 23 Jun 2008 08:30:55 -0400 Subject: [PATCH 01/12] [PATCH] fix cgroup-inflicted breakage in block_dev.c devcgroup_inode_permission() expects MAY_FOO, not FMODE_FOO; kindly keep your misdesign consistent if you positively have to inflict it on the kernel. Signed-off-by: Al Viro --- fs/block_dev.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 470c10ceb0fb..10d8a0aa871a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -931,8 +931,16 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) struct gendisk *disk; int ret; int part; + int perm = 0; - ret = devcgroup_inode_permission(bdev->bd_inode, file->f_mode); + if (file->f_mode & FMODE_READ) + perm |= MAY_READ; + if (file->f_mode & FMODE_WRITE) + perm |= MAY_WRITE; + /* + * hooks: /n/, see "layering violations". + */ + ret = devcgroup_inode_permission(bdev->bd_inode, perm); if (ret != 0) return ret; From 12fd0d3088d27867be68655bcab2b074f2835f60 Mon Sep 17 00:00:00 2001 From: Michael Kerrisk Date: Mon, 9 Jun 2008 21:16:07 -0700 Subject: [PATCH 02/12] [patch for 2.6.26 2/4] vfs: utimensat(): be consistent with utime() for immutable and append-only files This patch fixes utimensat() to make its behavior consistent with that of utime()/utimes() when dealing with files marked immutable and append-only. The current utimensat() implementation also returns EPERM if 'times' is non-NULL and the tv_nsec fields are both UTIME_NOW. For consistency, the (times != NULL && times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) case should be treated like the traditional utimes() case where 'times' is NULL. That is, the call should succeed for a file marked append-only and should give the error EACCES if the file is marked as immutable. The simple way to do this is to set 'times' to NULL if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW). This is also the natural approach, since POSIX.1 semantics consider the times == {{x, UTIME_NOW}, {y, UTIME_NOW}} to be exactly equivalent to the case for times == NULL. (Thanks to Miklos for pointing this out.) Patch 3 in this series relies on the simplification provided by this patch. Acked-by: Miklos Szeredi Cc: Al Viro Cc: Ulrich Drepper Signed-off-by: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/utimes.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/utimes.c b/fs/utimes.c index af059d5cb485..14d3edbb3d7c 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -102,6 +102,10 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags if (error) goto dput_and_out; + if (times && times[0].tv_nsec == UTIME_NOW && + times[1].tv_nsec == UTIME_NOW) + times = NULL; + /* Don't worry, the checks are done in inode_change_ok() */ newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; if (times) { From 94c70b9ba7e9c1036284e779e2fef5be89021533 Mon Sep 17 00:00:00 2001 From: Michael Kerrisk Date: Mon, 9 Jun 2008 21:16:05 -0700 Subject: [PATCH 03/12] [patch for 2.6.26 1/4] vfs: utimensat(): ignore tv_sec if tv_nsec == UTIME_OMIT or UTIME_NOW The POSIX.1 draft spec for utimensat() says that if a times[n].tv_nsec field is UTIME_OMIT or UTIME_NOW, then the value in the corresponding tv_sec field is ignored. See the last sentence of this para, from the spec: If the tv_nsec field of a timespec structure has the special value UTIME_NOW, the file's relevant timestamp shall be set to the greatest value supported by the file system that is not greater than the current time. If the tv_nsec field has the special value UTIME_OMIT, the file's relevant timestamp shall not be changed. In either case, the tv_sec field shall be ignored. However the current Linux implementation requires the tv_sec value to be zero (or the EINVAL error results). This requirement should be removed. Acked-by: Miklos Szeredi Cc: Al Viro Cc: Ulrich Drepper Signed-off-by: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/utimes.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/fs/utimes.c b/fs/utimes.c index 14d3edbb3d7c..d466bc587e6e 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -173,14 +173,6 @@ asmlinkage long sys_utimensat(int dfd, char __user *filename, struct timespec __ if (utimes) { if (copy_from_user(&tstimes, utimes, sizeof(tstimes))) return -EFAULT; - if ((tstimes[0].tv_nsec == UTIME_OMIT || - tstimes[0].tv_nsec == UTIME_NOW) && - tstimes[0].tv_sec != 0) - return -EINVAL; - if ((tstimes[1].tv_nsec == UTIME_OMIT || - tstimes[1].tv_nsec == UTIME_NOW) && - tstimes[1].tv_sec != 0) - return -EINVAL; /* Nothing to do, we must not even check the path. */ if (tstimes[0].tv_nsec == UTIME_OMIT && From 4cca92264e61a90b43fc4e076cd25b7f4e16dc61 Mon Sep 17 00:00:00 2001 From: Michael Kerrisk Date: Mon, 9 Jun 2008 21:16:08 -0700 Subject: [PATCH 04/12] [patch for 2.6.26 3/4] vfs: utimensat(): fix error checking for {UTIME_NOW,UTIME_OMIT} case The POSIX.1 draft spec for utimensat() says: Only a process with the effective user ID equal to the user ID of the file or with appropriate privileges may use futimens() or utimensat() with a non-null times argument that does not have both tv_nsec fields set to UTIME_NOW and does not have both tv_nsec fields set to UTIME_OMIT. If this condition is violated, then the error EPERM should result. However, the current implementation does not generate EPERM if one tv_nsec field is UTIME_NOW while the other is UTIME_OMIT. It should give this error for that case. This patch: a) Repairs that problem. b) Removes the now unneeded nsec_special() helper function. c) Adds some comments to explain the checks that are being performed. Thanks to Miklos, who provided comments on the previous iteration of this patch. As a result, this version is a little simpler and and its logic is better structured. Miklos suggested an alternative idea, migrating the is_owner_or_cap() checks into fs/attr.c:inode_change_ok() via the use of an ATTR_OWNER_CHECK flag. Maybe we could do that later, but for now I've gone with this version, which is IMO simpler, and can be more easily read as being correct. Acked-by: Miklos Szeredi Cc: Al Viro Cc: Ulrich Drepper Signed-off-by: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/utimes.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/fs/utimes.c b/fs/utimes.c index d466bc587e6e..118d1c3241be 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -40,14 +40,9 @@ asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times) #endif -static bool nsec_special(long nsec) -{ - return nsec == UTIME_OMIT || nsec == UTIME_NOW; -} - static bool nsec_valid(long nsec) { - if (nsec_special(nsec)) + if (nsec == UTIME_OMIT || nsec == UTIME_NOW) return true; return nsec >= 0 && nsec <= 999999999; @@ -106,7 +101,7 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags times[1].tv_nsec == UTIME_NOW) times = NULL; - /* Don't worry, the checks are done in inode_change_ok() */ + /* In most cases, the checks are done in inode_change_ok() */ newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; if (times) { error = -EPERM; @@ -128,15 +123,26 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags newattrs.ia_mtime.tv_nsec = times[1].tv_nsec; newattrs.ia_valid |= ATTR_MTIME_SET; } - } - /* - * If times is NULL or both times are either UTIME_OMIT or - * UTIME_NOW, then need to check permissions, because - * inode_change_ok() won't do it. - */ - if (!times || (nsec_special(times[0].tv_nsec) && - nsec_special(times[1].tv_nsec))) { + /* + * For the UTIME_OMIT/UTIME_NOW and UTIME_NOW/UTIME_OMIT + * cases, we need to make an extra check that is not done by + * inode_change_ok(). + */ + if (((times[0].tv_nsec == UTIME_NOW && + times[1].tv_nsec == UTIME_OMIT) + || + (times[0].tv_nsec == UTIME_OMIT && + times[1].tv_nsec == UTIME_NOW)) + && !is_owner_or_cap(inode)) + goto mnt_drop_write_and_out; + } else { + + /* + * If times is NULL (or both times are UTIME_NOW), + * then we need to check permissions, because + * inode_change_ok() won't do it. + */ error = -EACCES; if (IS_IMMUTABLE(inode)) goto mnt_drop_write_and_out; From c70f84417429f41519be0197a1092a53c2201f47 Mon Sep 17 00:00:00 2001 From: Michael Kerrisk Date: Mon, 9 Jun 2008 21:16:09 -0700 Subject: [PATCH 05/12] [patch for 2.6.26 4/4] vfs: utimensat(): fix write access check for futimens() The POSIX.1 draft spec for futimens()/utimensat() says: Only a process with the effective user ID equal to the user ID of the file, *or with write access to the file*, or with appropriate privileges may use futimens() or utimensat() with a null pointer as the times argument or with both tv_nsec fields set to the special value UTIME_NOW. The important piece here is "with write access to the file", and this matters for futimens(), which deals with an argument that is a file descriptor referring to the file whose timestamps are being updated, The standard is saying that the "writability" check is based on the file permissions, not the access mode with which the file is opened. (This behavior is consistent with the semantics of FreeBSD's futimes().) However, Linux is currently doing the latter -- futimens(fd, times) is a library function implemented as utimensat(fd, NULL, times, 0) and within the utimensat() implementation we have the code: f = fget(dfd); // dfd is 'fd' ... if (f) { if (!(f->f_mode & FMODE_WRITE)) goto mnt_drop_write_and_out; The check should instead be based on the file permissions. Thanks to Miklos for pointing out how to do this check. Miklos also pointed out a simplification that could be made to my first version of this patch, since the checks for the pathname and file descriptor cases can now be conflated. Acked-by: Miklos Szeredi Cc: Al Viro Cc: Ulrich Drepper Signed-off-by: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/utimes.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/fs/utimes.c b/fs/utimes.c index 118d1c3241be..b6b664e7145e 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -148,14 +148,9 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags goto mnt_drop_write_and_out; if (!is_owner_or_cap(inode)) { - if (f) { - if (!(f->f_mode & FMODE_WRITE)) - goto mnt_drop_write_and_out; - } else { - error = vfs_permission(&nd, MAY_WRITE); - if (error) - goto mnt_drop_write_and_out; - } + error = permission(inode, MAY_WRITE, NULL); + if (error) + goto mnt_drop_write_and_out; } } mutex_lock(&inode->i_mutex); From c8e7f449b225ee6c87454ac069f0a041035c5140 Mon Sep 17 00:00:00 2001 From: Jan Blunck Date: Mon, 9 Jun 2008 16:40:35 -0700 Subject: [PATCH 06/12] [patch 1/4] vfs: path_{get,put}() cleanups Here are some more places where path_{get,put}() can be used instead of dput()/mntput() pair. Signed-off-by: Jan Blunck Cc: Al Viro Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/namei.c | 11 +++++------ fs/pipe.c | 10 ++++------ 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index c7e43536c49a..ee1544696e83 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -581,15 +581,13 @@ static __always_inline int link_path_walk(const char *name, struct nameidata *nd int result; /* make sure the stuff we saved doesn't go away */ - dget(save.dentry); - mntget(save.mnt); + path_get(&save); result = __link_path_walk(name, nd); if (result == -ESTALE) { /* nd->path had been dropped */ nd->path = save; - dget(nd->path.dentry); - mntget(nd->path.mnt); + path_get(&nd->path); nd->flags |= LOOKUP_REVAL; result = __link_path_walk(name, nd); } @@ -1216,8 +1214,9 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, nd->flags = flags; nd->depth = 0; - nd->path.mnt = mntget(mnt); - nd->path.dentry = dget(dentry); + nd->path.dentry = dentry; + nd->path.mnt = mnt; + path_get(&nd->path); retval = path_walk(name, nd); if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && diff --git a/fs/pipe.c b/fs/pipe.c index ec228bc9f882..700f4e0d9572 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1003,8 +1003,7 @@ struct file *create_write_pipe(void) void free_write_pipe(struct file *f) { free_pipe_info(f->f_dentry->d_inode); - dput(f->f_path.dentry); - mntput(f->f_path.mnt); + path_put(&f->f_path); put_filp(f); } @@ -1015,8 +1014,8 @@ struct file *create_read_pipe(struct file *wrf) return ERR_PTR(-ENFILE); /* Grab pipe from the writer */ - f->f_path.mnt = mntget(wrf->f_path.mnt); - f->f_path.dentry = dget(wrf->f_path.dentry); + f->f_path = wrf->f_path; + path_get(&wrf->f_path); f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping; f->f_pos = 0; @@ -1068,8 +1067,7 @@ int do_pipe(int *fd) err_fdr: put_unused_fd(fdr); err_read_pipe: - dput(fr->f_dentry); - mntput(fr->f_vfsmnt); + path_put(&fr->f_path); put_filp(fr); err_write_pipe: free_write_pipe(fw); From 20d4fdc1a788e4ca0aaf2422772ba668e7e10839 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 9 Jun 2008 16:40:36 -0700 Subject: [PATCH 07/12] [patch 2/4] fs: make struct file arg to d_path const Signed-off-by: Jan Engelhardt Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/dcache.c | 2 +- include/linux/dcache.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 3ee588d5f585..c4c9072d810c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1847,7 +1847,7 @@ Elong: * * "buflen" should be positive. Caller holds the dcache_lock. */ -char *d_path(struct path *path, char *buf, int buflen) +char *d_path(const struct path *path, char *buf, int buflen) { char *res; struct path root; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 2a6639407c80..d982eb89c77d 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -300,7 +300,7 @@ extern int d_validate(struct dentry *, struct dentry *); extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...); extern char *__d_path(const struct path *path, struct path *root, char *, int); -extern char *d_path(struct path *, char *, int); +extern char *d_path(const struct path *, char *, int); extern char *dentry_path(struct dentry *, char *, int); /* Allocation counts.. */ From 694a1764d657e0f7a9b139bc7269c8d5f5a2534b Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 9 Jun 2008 16:40:37 -0700 Subject: [PATCH 08/12] [patch 3/4] vfs: fix ERR_PTR abuse in generic_readlink generic_readlink calls ERR_PTR for negative and positive values (vfs_readlink returns length of "link"), but it should not (not an errno) and does not need to. Signed-off-by: Marcin Slusarz Cc: Al Viro Cc: Christoph Hellwig Acked-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/namei.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index ee1544696e83..01e67dddcc3d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2856,16 +2856,17 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) { struct nameidata nd; void *cookie; + int res; nd.depth = 0; cookie = dentry->d_inode->i_op->follow_link(dentry, &nd); - if (!IS_ERR(cookie)) { - int res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); - if (dentry->d_inode->i_op->put_link) - dentry->d_inode->i_op->put_link(dentry, &nd, cookie); - cookie = ERR_PTR(res); - } - return PTR_ERR(cookie); + if (IS_ERR(cookie)) + return PTR_ERR(cookie); + + res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); + if (dentry->d_inode->i_op->put_link) + dentry->d_inode->i_op->put_link(dentry, &nd, cookie); + return res; } int vfs_follow_link(struct nameidata *nd, const char *link) From f9f48ec72bfc9489a30bc6ddbfcf27d86a8bc651 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 9 Jun 2008 16:40:38 -0700 Subject: [PATCH 09/12] [patch 4/4] flock: remove unused fields from file_lock_operations fl_insert and fl_remove are not used right now in the kernel. Remove them. Signed-off-by: Denis V. Lunev Cc: Matthew Wilcox Cc: Alexander Viro Cc: "J. Bruce Fields" Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/locks.c | 6 ------ include/linux/fs.h | 2 -- 2 files changed, 8 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 11dbf08651b7..dce8c747371c 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -561,9 +561,6 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) /* insert into file's list */ fl->fl_next = *pos; *pos = fl; - - if (fl->fl_ops && fl->fl_ops->fl_insert) - fl->fl_ops->fl_insert(fl); } /* @@ -586,9 +583,6 @@ static void locks_delete_lock(struct file_lock **thisfl_p) fl->fl_fasync = NULL; } - if (fl->fl_ops && fl->fl_ops->fl_remove) - fl->fl_ops->fl_remove(fl); - if (fl->fl_nspid) { put_pid(fl->fl_nspid); fl->fl_nspid = NULL; diff --git a/include/linux/fs.h b/include/linux/fs.h index d490779f18d9..7c1080826832 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -894,8 +894,6 @@ static inline int file_check_writeable(struct file *filp) typedef struct files_struct *fl_owner_t; struct file_lock_operations { - void (*fl_insert)(struct file_lock *); /* lock insertion callback */ - void (*fl_remove)(struct file_lock *); /* lock removal callback */ void (*fl_copy_lock)(struct file_lock *, struct file_lock *); void (*fl_release_private)(struct file_lock *); }; From be285c712bbbe5db43e503782fbef2bfeaa345f9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 16 Jun 2008 13:28:07 +0200 Subject: [PATCH 10/12] [patch 3/3] vfs: make d_path() consistent across mount operations The path that __d_path() computes can become slightly inconsistent when it races with mount operations: it grabs the vfsmount_lock when traversing mount points but immediately drops it again, only to re-grab it when it reaches the next mount point. The result is that the filename computed is not always consisent, and the file may never have had that name. (This is unlikely, but still possible.) Fix this by grabbing the vfsmount_lock for the whole duration of __d_path(). Signed-off-by: Andreas Gruenbacher Signed-off-by: John Johansen Signed-off-by: Miklos Szeredi Acked-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/dcache.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index c4c9072d810c..2b479de10a0a 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1782,6 +1782,7 @@ char *__d_path(const struct path *path, struct path *root, char * end = buffer+buflen; char * retval; + spin_lock(&vfsmount_lock); prepend(&end, &buflen, "\0", 1); if (!IS_ROOT(dentry) && d_unhashed(dentry) && (prepend(&end, &buflen, " (deleted)", 10) != 0)) @@ -1800,14 +1801,11 @@ char *__d_path(const struct path *path, struct path *root, break; if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { /* Global root? */ - spin_lock(&vfsmount_lock); if (vfsmnt->mnt_parent == vfsmnt) { - spin_unlock(&vfsmount_lock); goto global_root; } dentry = vfsmnt->mnt_mountpoint; vfsmnt = vfsmnt->mnt_parent; - spin_unlock(&vfsmount_lock); continue; } parent = dentry->d_parent; @@ -1820,6 +1818,8 @@ char *__d_path(const struct path *path, struct path *root, dentry = parent; } +out: + spin_unlock(&vfsmount_lock); return retval; global_root: @@ -1829,9 +1829,11 @@ global_root: goto Elong; root->mnt = vfsmnt; root->dentry = dentry; - return retval; + goto out; + Elong: - return ERR_PTR(-ENAMETOOLONG); + retval = ERR_PTR(-ENAMETOOLONG); + goto out; } /** From 31f3e0b3a18c6d48196c40a82a3b8c01f4ff6b23 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 23 Jun 2008 18:11:52 +0200 Subject: [PATCH 11/12] [patch 1/3] vfs: dcache sparse fixes Fix the following sparse warnings: fs/dcache.c:2183:19: warning: symbol 'filp_cachep' was not declared. Should it be static? fs/dcache.c:115:3: warning: context imbalance in 'dentry_iput' - unexpected unlock fs/dcache.c:188:2: warning: context imbalance in 'dput' - different lock contexts for basic block fs/dcache.c:400:2: warning: context imbalance in 'prune_one_dentry' - different lock contexts for basic block fs/dcache.c:431:22: warning: context imbalance in 'prune_dcache' - different lock contexts for basic block fs/dcache.c:563:2: warning: context imbalance in 'shrink_dcache_sb' - different lock contexts for basic block fs/dcache.c:1385:6: warning: context imbalance in 'd_delete' - wrong count at exit fs/dcache.c:1636:2: warning: context imbalance in '__d_unalias' - unexpected unlock fs/dcache.c:1735:2: warning: context imbalance in 'd_materialise_unique' - different lock contexts for basic block Signed-off-by: Miklos Szeredi Reviewed-by: Matthew Wilcox Acked-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/dcache.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 2b479de10a0a..e4b2b9436b32 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -106,9 +107,10 @@ static void dentry_lru_remove(struct dentry *dentry) /* * Release the dentry's inode, using the filesystem * d_iput() operation if defined. - * Called with dcache_lock and per dentry lock held, drops both. */ static void dentry_iput(struct dentry * dentry) + __releases(dentry->d_lock) + __releases(dcache_lock) { struct inode *inode = dentry->d_inode; if (inode) { @@ -132,12 +134,13 @@ static void dentry_iput(struct dentry * dentry) * d_kill - kill dentry and return parent * @dentry: dentry to kill * - * Called with dcache_lock and d_lock, releases both. The dentry must - * already be unhashed and removed from the LRU. + * The dentry must already be unhashed and removed from the LRU. * * If this is the root of the dentry tree, return NULL. */ static struct dentry *d_kill(struct dentry *dentry) + __releases(dentry->d_lock) + __releases(dcache_lock) { struct dentry *parent; @@ -383,11 +386,11 @@ restart: * Try to prune ancestors as well. This is necessary to prevent * quadratic behavior of shrink_dcache_parent(), but is also expected * to be beneficial in reducing dentry cache fragmentation. - * - * Called with dcache_lock, drops it and then regains. - * Called with dentry->d_lock held, drops it. */ static void prune_one_dentry(struct dentry * dentry) + __releases(dentry->d_lock) + __releases(dcache_lock) + __acquires(dcache_lock) { __d_drop(dentry); dentry = d_kill(dentry); @@ -1604,10 +1607,9 @@ static int d_isparent(struct dentry *p1, struct dentry *p2) * * Note: If ever the locking in lock_rename() changes, then please * remember to update this too... - * - * On return, dcache_lock will have been unlocked. */ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) + __releases(dcache_lock) { struct mutex *m1 = NULL, *m2 = NULL; struct dentry *ret; @@ -1743,7 +1745,6 @@ out_nolock: shouldnt_be_hashed: spin_unlock(&dcache_lock); BUG(); - goto shouldnt_be_hashed; } static int prepend(char **buffer, int *buflen, const char *str, @@ -1758,7 +1759,7 @@ static int prepend(char **buffer, int *buflen, const char *str, } /** - * d_path - return the path of a dentry + * __d_path - return the path of a dentry * @path: the dentry/vfsmount to report * @root: root vfsmnt/dentry (may be modified by this function) * @buffer: buffer to return value in @@ -1847,7 +1848,7 @@ Elong: * * Returns the buffer or an error code if the path was too long. * - * "buflen" should be positive. Caller holds the dcache_lock. + * "buflen" should be positive. */ char *d_path(const struct path *path, char *buf, int buflen) { From cdd16d0265c9234228fd37fbbad844d7e894b278 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 23 Jun 2008 18:11:53 +0200 Subject: [PATCH 12/12] [patch 2/3] vfs: dcache cleanups Comment from Al Viro: add prepend_name() wrapper. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/dcache.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index e4b2b9436b32..6068c25b393c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1747,8 +1747,7 @@ shouldnt_be_hashed: BUG(); } -static int prepend(char **buffer, int *buflen, const char *str, - int namelen) +static int prepend(char **buffer, int *buflen, const char *str, int namelen) { *buflen -= namelen; if (*buflen < 0) @@ -1758,6 +1757,11 @@ static int prepend(char **buffer, int *buflen, const char *str, return 0; } +static int prepend_name(char **buffer, int *buflen, struct qstr *name) +{ + return prepend(buffer, buflen, name->name, name->len); +} + /** * __d_path - return the path of a dentry * @path: the dentry/vfsmount to report @@ -1780,8 +1784,8 @@ char *__d_path(const struct path *path, struct path *root, { struct dentry *dentry = path->dentry; struct vfsmount *vfsmnt = path->mnt; - char * end = buffer+buflen; - char * retval; + char *end = buffer + buflen; + char *retval; spin_lock(&vfsmount_lock); prepend(&end, &buflen, "\0", 1); @@ -1811,8 +1815,7 @@ char *__d_path(const struct path *path, struct path *root, } parent = dentry->d_parent; prefetch(parent); - if ((prepend(&end, &buflen, dentry->d_name.name, - dentry->d_name.len) != 0) || + if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || (prepend(&end, &buflen, "/", 1) != 0)) goto Elong; retval = end; @@ -1825,8 +1828,7 @@ out: global_root: retval += 1; /* hit the slash */ - if (prepend(&retval, &buflen, dentry->d_name.name, - dentry->d_name.len) != 0) + if (prepend_name(&retval, &buflen, &dentry->d_name) != 0) goto Elong; root->mnt = vfsmnt; root->dentry = dentry; @@ -1918,16 +1920,11 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) retval = end-1; *retval = '/'; - for (;;) { - struct dentry *parent; - if (IS_ROOT(dentry)) - break; + while (!IS_ROOT(dentry)) { + struct dentry *parent = dentry->d_parent; - parent = dentry->d_parent; prefetch(parent); - - if ((prepend(&end, &buflen, dentry->d_name.name, - dentry->d_name.len) != 0) || + if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || (prepend(&end, &buflen, "/", 1) != 0)) goto Elong; @@ -1978,7 +1975,7 @@ asmlinkage long sys_getcwd(char __user *buf, unsigned long size) error = -ENOENT; /* Has the current directory has been unlinked? */ spin_lock(&dcache_lock); - if (pwd.dentry->d_parent == pwd.dentry || !d_unhashed(pwd.dentry)) { + if (IS_ROOT(pwd.dentry) || !d_unhashed(pwd.dentry)) { unsigned long len; struct path tmp = root; char * cwd;