2005-12-16 06:29:43 +08:00
|
|
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
|
|
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
|
|
|
*
|
|
|
|
* dir.c - Operations for configfs directories.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public
|
|
|
|
* License along with this program; if not, write to the
|
|
|
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
|
|
* Boston, MA 021110-1307, USA.
|
|
|
|
*
|
|
|
|
* Based on sysfs:
|
|
|
|
* sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
|
|
|
|
*
|
|
|
|
* configfs Copyright (C) 2005 Oracle. All rights reserved.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#undef DEBUG
|
|
|
|
|
|
|
|
#include <linux/fs.h>
|
|
|
|
#include <linux/mount.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/slab.h>
|
2008-06-17 01:01:00 +08:00
|
|
|
#include <linux/err.h>
|
2005-12-16 06:29:43 +08:00
|
|
|
|
|
|
|
#include <linux/configfs.h>
|
|
|
|
#include "configfs_internal.h"
|
|
|
|
|
|
|
|
DECLARE_RWSEM(configfs_rename_sem);
|
2008-06-17 01:00:58 +08:00
|
|
|
/*
|
|
|
|
* Protects mutations of configfs_dirent linkage together with proper i_mutex
|
2008-06-17 01:00:59 +08:00
|
|
|
* Also protects mutations of symlinks linkage to target configfs_dirent
|
2008-06-17 01:00:58 +08:00
|
|
|
* Mutators of configfs_dirent linkage must *both* have the proper inode locked
|
|
|
|
* and configfs_dirent_lock locked, in that order.
|
2008-06-17 01:00:59 +08:00
|
|
|
* This allows one to safely traverse configfs_dirent trees and symlinks without
|
|
|
|
* having to lock inodes.
|
2008-06-17 01:01:01 +08:00
|
|
|
*
|
|
|
|
* Protects setting of CONFIGFS_USET_DROPPING: checking the flag
|
|
|
|
* unlocked is not reliable unless in detach_groups() called from
|
|
|
|
* rmdir()/unregister() and from configfs_attach_group()
|
2008-06-17 01:00:58 +08:00
|
|
|
*/
|
|
|
|
DEFINE_SPINLOCK(configfs_dirent_lock);
|
2005-12-16 06:29:43 +08:00
|
|
|
|
|
|
|
static void configfs_d_iput(struct dentry * dentry,
|
|
|
|
struct inode * inode)
|
|
|
|
{
|
2011-05-18 19:08:16 +08:00
|
|
|
struct configfs_dirent *sd = dentry->d_fsdata;
|
2005-12-16 06:29:43 +08:00
|
|
|
|
|
|
|
if (sd) {
|
2011-05-18 19:08:16 +08:00
|
|
|
/* Coordinate with configfs_readdir */
|
|
|
|
spin_lock(&configfs_dirent_lock);
|
configfs: fix race between dentry put and lookup
A race window in configfs, it starts from one dentry is UNHASHED and end
before configfs_d_iput is called. In this window, if a lookup happen,
since the original dentry was UNHASHED, so a new dentry will be
allocated, and then in configfs_attach_attr(), sd->s_dentry will be
updated to the new dentry. Then in configfs_d_iput(),
BUG_ON(sd->s_dentry != dentry) will be triggered and system panic.
sys_open: sys_close:
... fput
dput
dentry_kill
__d_drop <--- dentry unhashed here,
but sd->dentry still point
to this dentry.
lookup_real
configfs_lookup
configfs_attach_attr---> update sd->s_dentry
to new allocated dentry here.
d_kill
configfs_d_iput <--- BUG_ON(sd->s_dentry != dentry)
triggered here.
To fix it, change configfs_d_iput to not update sd->s_dentry if
sd->s_count > 2, that means there are another dentry is using the sd
beside the one that is going to be put. Use configfs_dirent_lock in
configfs_attach_attr to sync with configfs_d_iput.
With the following steps, you can reproduce the bug.
1. enable ocfs2, this will mount configfs at /sys/kernel/config and
fill configure in it.
2. run the following script.
while [ 1 ]; do cat /sys/kernel/config/cluster/$your_cluster_name/idle_timeout_ms > /dev/null; done &
while [ 1 ]; do cat /sys/kernel/config/cluster/$your_cluster_name/idle_timeout_ms > /dev/null; done &
Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-11-22 06:31:56 +08:00
|
|
|
/* Coordinate with configfs_attach_attr where will increase
|
|
|
|
* sd->s_count and update sd->s_dentry to new allocated one.
|
|
|
|
* Only set sd->dentry to null when this dentry is the only
|
|
|
|
* sd owner.
|
|
|
|
* If not do so, configfs_d_iput may run just after
|
|
|
|
* configfs_attach_attr and set sd->s_dentry to null
|
|
|
|
* even it's still in use.
|
|
|
|
*/
|
|
|
|
if (atomic_read(&sd->s_count) <= 2)
|
|
|
|
sd->s_dentry = NULL;
|
|
|
|
|
2011-05-18 19:08:16 +08:00
|
|
|
spin_unlock(&configfs_dirent_lock);
|
2005-12-16 06:29:43 +08:00
|
|
|
configfs_put(sd);
|
|
|
|
}
|
|
|
|
iput(inode);
|
|
|
|
}
|
|
|
|
|
2011-01-13 05:41:05 +08:00
|
|
|
const struct dentry_operations configfs_dentry_ops = {
|
2005-12-16 06:29:43 +08:00
|
|
|
.d_iput = configfs_d_iput,
|
2013-10-26 06:47:37 +08:00
|
|
|
.d_delete = always_delete_dentry,
|
2005-12-16 06:29:43 +08:00
|
|
|
};
|
|
|
|
|
configfs: Silence lockdep on mkdir() and rmdir()
When attaching default groups (subdirs) of a new group (in mkdir() or
in configfs_register()), configfs recursively takes inode's mutexes
along the path from the parent of the new group to the default
subdirs. This is needed to ensure that the VFS will not race with
operations on these sub-dirs. This is safe for the following reasons:
- the VFS allows one to lock first an inode and second one of its
children (The lock subclasses for this pattern are respectively
I_MUTEX_PARENT and I_MUTEX_CHILD);
- from this rule any inode path can be recursively locked in
descending order as long as it stays under a single mountpoint and
does not follow symlinks.
Unfortunately lockdep does not know (yet?) how to handle such
recursion.
I've tried to use Peter Zijlstra's lock_set_subclass() helper to
upgrade i_mutexes from I_MUTEX_CHILD to I_MUTEX_PARENT when we know
that we might recursively lock some of their descendant, but this
usage does not seem to fit the purpose of lock_set_subclass() because
it leads to several i_mutex locked with subclass I_MUTEX_PARENT by
the same task.
>From inside configfs it is not possible to serialize those recursive
locking with a top-level one, because mkdir() and rmdir() are already
called with inodes locked by the VFS. So using some
mutex_lock_nest_lock() is not an option.
I am proposing two solutions:
1) one that wraps recursive mutex_lock()s with
lockdep_off()/lockdep_on().
2) (as suggested earlier by Peter Zijlstra) one that puts the
i_mutexes recursively locked in different classes based on their
depth from the top-level config_group created. This
induces an arbitrary limit (MAX_LOCK_DEPTH - 2 == 46) on the
nesting of configfs default groups whenever lockdep is activated
but this limit looks reasonably high. Unfortunately, this also
isolates VFS operations on configfs default groups from the others
and thus lowers the chances to detect locking issues.
Nobody likes solution 1), which I can understand.
This patch implements solution 2). However lockdep is still not happy with
configfs_depend_item(). Next patch reworks the locking of
configfs_depend_item() and finally makes lockdep happy.
[ Note: This hides a few locking interactions with the VFS from lockdep.
That was my big concern, because we like lockdep's protection. However,
the current state always dumps a spurious warning. The locking is
correct, so I tell people to ignore the warning and that we'll keep
our eyes on the locking to make sure it stays correct. With this patch,
we eliminate the warning. We do lose some of the lockdep protections,
but this only means that we still have to keep our eyes on the locking.
We're going to do that anyway. -- Joel ]
Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
2009-01-29 02:18:32 +08:00
|
|
|
#ifdef CONFIG_LOCKDEP
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Helpers to make lockdep happy with our recursive locking of default groups'
|
|
|
|
* inodes (see configfs_attach_group() and configfs_detach_group()).
|
|
|
|
* We put default groups i_mutexes in separate classes according to their depth
|
|
|
|
* from the youngest non-default group ancestor.
|
|
|
|
*
|
|
|
|
* For a non-default group A having default groups A/B, A/C, and A/C/D, default
|
|
|
|
* groups A/B and A/C will have their inode's mutex in class
|
|
|
|
* default_group_class[0], and default group A/C/D will be in
|
|
|
|
* default_group_class[1].
|
|
|
|
*
|
|
|
|
* The lock classes are declared and assigned in inode.c, according to the
|
|
|
|
* s_depth value.
|
|
|
|
* The s_depth value is initialized to -1, adjusted to >= 0 when attaching
|
|
|
|
* default groups, and reset to -1 when all default groups are attached. During
|
|
|
|
* attachment, if configfs_create() sees s_depth > 0, the lock class of the new
|
|
|
|
* inode's mutex is set to default_group_class[s_depth - 1].
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void configfs_init_dirent_depth(struct configfs_dirent *sd)
|
|
|
|
{
|
|
|
|
sd->s_depth = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd,
|
|
|
|
struct configfs_dirent *sd)
|
|
|
|
{
|
|
|
|
int parent_depth = parent_sd->s_depth;
|
|
|
|
|
|
|
|
if (parent_depth >= 0)
|
|
|
|
sd->s_depth = parent_depth + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* item's i_mutex class is already setup, so s_depth is now only
|
|
|
|
* used to set new sub-directories s_depth, which is always done
|
|
|
|
* with item's i_mutex locked.
|
|
|
|
*/
|
|
|
|
/*
|
|
|
|
* sd->s_depth == -1 iff we are a non default group.
|
|
|
|
* else (we are a default group) sd->s_depth > 0 (see
|
|
|
|
* create_dir()).
|
|
|
|
*/
|
|
|
|
if (sd->s_depth == -1)
|
|
|
|
/*
|
|
|
|
* We are a non default group and we are going to create
|
|
|
|
* default groups.
|
|
|
|
*/
|
|
|
|
sd->s_depth = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd)
|
|
|
|
{
|
|
|
|
/* We will not create default groups anymore. */
|
|
|
|
sd->s_depth = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
#else /* CONFIG_LOCKDEP */
|
|
|
|
|
|
|
|
static void configfs_init_dirent_depth(struct configfs_dirent *sd)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd,
|
|
|
|
struct configfs_dirent *sd)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* CONFIG_LOCKDEP */
|
|
|
|
|
2005-12-16 06:29:43 +08:00
|
|
|
/*
|
|
|
|
* Allocates a new configfs_dirent and links it to the parent configfs_dirent
|
|
|
|
*/
|
2009-01-29 02:18:33 +08:00
|
|
|
static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent *parent_sd,
|
|
|
|
void *element, int type)
|
2005-12-16 06:29:43 +08:00
|
|
|
{
|
|
|
|
struct configfs_dirent * sd;
|
|
|
|
|
2007-02-10 17:45:03 +08:00
|
|
|
sd = kmem_cache_zalloc(configfs_dir_cachep, GFP_KERNEL);
|
2005-12-16 06:29:43 +08:00
|
|
|
if (!sd)
|
2008-06-17 01:01:00 +08:00
|
|
|
return ERR_PTR(-ENOMEM);
|
2005-12-16 06:29:43 +08:00
|
|
|
|
|
|
|
atomic_set(&sd->s_count, 1);
|
|
|
|
INIT_LIST_HEAD(&sd->s_links);
|
|
|
|
INIT_LIST_HEAD(&sd->s_children);
|
|
|
|
sd->s_element = element;
|
2009-01-29 02:18:33 +08:00
|
|
|
sd->s_type = type;
|
configfs: Silence lockdep on mkdir() and rmdir()
When attaching default groups (subdirs) of a new group (in mkdir() or
in configfs_register()), configfs recursively takes inode's mutexes
along the path from the parent of the new group to the default
subdirs. This is needed to ensure that the VFS will not race with
operations on these sub-dirs. This is safe for the following reasons:
- the VFS allows one to lock first an inode and second one of its
children (The lock subclasses for this pattern are respectively
I_MUTEX_PARENT and I_MUTEX_CHILD);
- from this rule any inode path can be recursively locked in
descending order as long as it stays under a single mountpoint and
does not follow symlinks.
Unfortunately lockdep does not know (yet?) how to handle such
recursion.
I've tried to use Peter Zijlstra's lock_set_subclass() helper to
upgrade i_mutexes from I_MUTEX_CHILD to I_MUTEX_PARENT when we know
that we might recursively lock some of their descendant, but this
usage does not seem to fit the purpose of lock_set_subclass() because
it leads to several i_mutex locked with subclass I_MUTEX_PARENT by
the same task.
>From inside configfs it is not possible to serialize those recursive
locking with a top-level one, because mkdir() and rmdir() are already
called with inodes locked by the VFS. So using some
mutex_lock_nest_lock() is not an option.
I am proposing two solutions:
1) one that wraps recursive mutex_lock()s with
lockdep_off()/lockdep_on().
2) (as suggested earlier by Peter Zijlstra) one that puts the
i_mutexes recursively locked in different classes based on their
depth from the top-level config_group created. This
induces an arbitrary limit (MAX_LOCK_DEPTH - 2 == 46) on the
nesting of configfs default groups whenever lockdep is activated
but this limit looks reasonably high. Unfortunately, this also
isolates VFS operations on configfs default groups from the others
and thus lowers the chances to detect locking issues.
Nobody likes solution 1), which I can understand.
This patch implements solution 2). However lockdep is still not happy with
configfs_depend_item(). Next patch reworks the locking of
configfs_depend_item() and finally makes lockdep happy.
[ Note: This hides a few locking interactions with the VFS from lockdep.
That was my big concern, because we like lockdep's protection. However,
the current state always dumps a spurious warning. The locking is
correct, so I tell people to ignore the warning and that we'll keep
our eyes on the locking to make sure it stays correct. With this patch,
we eliminate the warning. We do lose some of the lockdep protections,
but this only means that we still have to keep our eyes on the locking.
We're going to do that anyway. -- Joel ]
Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
2009-01-29 02:18:32 +08:00
|
|
|
configfs_init_dirent_depth(sd);
|
2008-06-17 01:00:58 +08:00
|
|
|
spin_lock(&configfs_dirent_lock);
|
2008-06-17 01:01:01 +08:00
|
|
|
if (parent_sd->s_type & CONFIGFS_USET_DROPPING) {
|
|
|
|
spin_unlock(&configfs_dirent_lock);
|
|
|
|
kmem_cache_free(configfs_dir_cachep, sd);
|
|
|
|
return ERR_PTR(-ENOENT);
|
|
|
|
}
|
2008-06-17 01:00:58 +08:00
|
|
|
list_add(&sd->s_sibling, &parent_sd->s_children);
|
|
|
|
spin_unlock(&configfs_dirent_lock);
|
2005-12-16 06:29:43 +08:00
|
|
|
|
|
|
|
return sd;
|
|
|
|
}
|
|
|
|
|
2006-09-14 02:01:19 +08:00
|
|
|
/*
|
|
|
|
*
|
|
|
|
* Return -EEXIST if there is already a configfs element with the same
|
|
|
|
* name for the same parent.
|
|
|
|
*
|
|
|
|
* called with parent inode's i_mutex held
|
|
|
|
*/
|
2006-11-20 10:24:00 +08:00
|
|
|
static int configfs_dirent_exists(struct configfs_dirent *parent_sd,
|
|
|
|
const unsigned char *new)
|
2006-09-14 02:01:19 +08:00
|
|
|
{
|
|
|
|
struct configfs_dirent * sd;
|
|
|
|
|
|
|
|
list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
|
|
|
|
if (sd->s_element) {
|
|
|
|
const unsigned char *existing = configfs_get_name(sd);
|
|
|
|
if (strcmp(existing, new))
|
|
|
|
continue;
|
|
|
|
else
|
|
|
|
return -EEXIST;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2005-12-16 06:29:43 +08:00
|
|
|
int configfs_make_dirent(struct configfs_dirent * parent_sd,
|
|
|
|
struct dentry * dentry, void * element,
|
|
|
|
umode_t mode, int type)
|
|
|
|
{
|
|
|
|
struct configfs_dirent * sd;
|
|
|
|
|
2009-01-29 02:18:33 +08:00
|
|
|
sd = configfs_new_dirent(parent_sd, element, type);
|
2008-06-17 01:01:00 +08:00
|
|
|
if (IS_ERR(sd))
|
|
|
|
return PTR_ERR(sd);
|
2005-12-16 06:29:43 +08:00
|
|
|
|
|
|
|
sd->s_mode = mode;
|
|
|
|
sd->s_dentry = dentry;
|
2011-01-07 14:49:21 +08:00
|
|
|
if (dentry)
|
2005-12-16 06:29:43 +08:00
|
|
|
dentry->d_fsdata = configfs_get(sd);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int init_dir(struct inode * inode)
|
|
|
|
{
|
|
|
|
inode->i_op = &configfs_dir_inode_operations;
|
|
|
|
inode->i_fop = &configfs_dir_operations;
|
|
|
|
|
|
|
|
/* directory inodes start off with i_nlink == 2 (for "." entry) */
|
2006-10-01 14:29:04 +08:00
|
|
|
inc_nlink(inode);
|
2005-12-16 06:29:43 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-10-17 14:31:13 +08:00
|
|
|
static int configfs_init_file(struct inode * inode)
|
2005-12-16 06:29:43 +08:00
|
|
|
{
|
|
|
|
inode->i_size = PAGE_SIZE;
|
|
|
|
inode->i_fop = &configfs_file_operations;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int init_symlink(struct inode * inode)
|
|
|
|
{
|
|
|
|
inode->i_op = &configfs_symlink_inode_operations;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-03-18 04:49:20 +08:00
|
|
|
static int create_dir(struct config_item *k, struct dentry *d)
|
2005-12-16 06:29:43 +08:00
|
|
|
{
|
|
|
|
int error;
|
|
|
|
umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
|
2012-03-18 04:49:20 +08:00
|
|
|
struct dentry *p = d->d_parent;
|
|
|
|
|
|
|
|
BUG_ON(!k);
|
2005-12-16 06:29:43 +08:00
|
|
|
|
2006-09-14 02:01:19 +08:00
|
|
|
error = configfs_dirent_exists(p->d_fsdata, d->d_name.name);
|
|
|
|
if (!error)
|
|
|
|
error = configfs_make_dirent(p->d_fsdata, d, k, mode,
|
[PATCH] configfs: Prevent userspace from creating new entries under attaching directories
process 1: process 2:
configfs_mkdir("A")
attach_group("A")
attach_item("A")
d_instantiate("A")
populate_groups("A")
mutex_lock("A")
attach_group("A/B")
attach_item("A")
d_instantiate("A/B")
mkdir("A/B/C")
do_path_lookup("A/B/C", LOOKUP_PARENT)
ok
lookup_create("A/B/C")
mutex_lock("A/B")
ok
configfs_mkdir("A/B/C")
ok
attach_group("A/C")
attach_item("A/C")
d_instantiate("A/C")
populate_groups("A/C")
mutex_lock("A/C")
attach_group("A/C/D")
attach_item("A/C/D")
failure
mutex_unlock("A/C")
detach_groups("A/C")
nothing to do
mkdir("A/C/E")
do_path_lookup("A/C/E", LOOKUP_PARENT)
ok
lookup_create("A/C/E")
mutex_lock("A/C")
ok
configfs_mkdir("A/C/E")
ok
detach_item("A/C")
d_delete("A/C")
mutex_unlock("A")
detach_groups("A")
mutex_lock("A/B")
detach_group("A/B")
detach_groups("A/B")
nothing since no _default_ group
detach_item("A/B")
mutex_unlock("A/B")
d_delete("A/B")
detach_item("A")
d_delete("A")
Two bugs:
1/ "A/B/C" and "A/C/E" are created, but never removed while their parent are
removed in the end. The same could happen with symlink() instead of mkdir().
2/ "A" and "A/C" inodes are not locked while detach_item() is called on them,
which may probably confuse VFS.
This commit fixes 1/, tagging new directories with CONFIGFS_USET_CREATING before
building the inode and instantiating the dentry, and validating the whole
group+default groups hierarchy in a second pass by clearing
CONFIGFS_USET_CREATING.
mkdir(), symlink(), lookup(), and dir_open() simply return -ENOENT if
called in (or linking to) a directory tagged with CONFIGFS_USET_CREATING. This
does not prevent userspace from calling stat() successfuly on such directories,
but this prevents userspace from adding (children to | symlinking from/to |
read/write attributes of | listing the contents of) not validated items. In
other words, userspace will not interact with the subsystem on a new item until
the new item creation completes correctly.
It was first proposed to re-use CONFIGFS_USET_IN_MKDIR instead of a new
flag CONFIGFS_USET_CREATING, but this generated conflicts when checking the
target of a new symlink: a valid target directory in the middle of attaching
a new user-created child item could be wrongly detected as being attached.
2/ is fixed by next commit.
Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-04 22:56:05 +08:00
|
|
|
CONFIGFS_DIR | CONFIGFS_USET_CREATING);
|
2005-12-16 06:29:43 +08:00
|
|
|
if (!error) {
|
configfs: Silence lockdep on mkdir() and rmdir()
When attaching default groups (subdirs) of a new group (in mkdir() or
in configfs_register()), configfs recursively takes inode's mutexes
along the path from the parent of the new group to the default
subdirs. This is needed to ensure that the VFS will not race with
operations on these sub-dirs. This is safe for the following reasons:
- the VFS allows one to lock first an inode and second one of its
children (The lock subclasses for this pattern are respectively
I_MUTEX_PARENT and I_MUTEX_CHILD);
- from this rule any inode path can be recursively locked in
descending order as long as it stays under a single mountpoint and
does not follow symlinks.
Unfortunately lockdep does not know (yet?) how to handle such
recursion.
I've tried to use Peter Zijlstra's lock_set_subclass() helper to
upgrade i_mutexes from I_MUTEX_CHILD to I_MUTEX_PARENT when we know
that we might recursively lock some of their descendant, but this
usage does not seem to fit the purpose of lock_set_subclass() because
it leads to several i_mutex locked with subclass I_MUTEX_PARENT by
the same task.
>From inside configfs it is not possible to serialize those recursive
locking with a top-level one, because mkdir() and rmdir() are already
called with inodes locked by the VFS. So using some
mutex_lock_nest_lock() is not an option.
I am proposing two solutions:
1) one that wraps recursive mutex_lock()s with
lockdep_off()/lockdep_on().
2) (as suggested earlier by Peter Zijlstra) one that puts the
i_mutexes recursively locked in different classes based on their
depth from the top-level config_group created. This
induces an arbitrary limit (MAX_LOCK_DEPTH - 2 == 46) on the
nesting of configfs default groups whenever lockdep is activated
but this limit looks reasonably high. Unfortunately, this also
isolates VFS operations on configfs default groups from the others
and thus lowers the chances to detect locking issues.
Nobody likes solution 1), which I can understand.
This patch implements solution 2). However lockdep is still not happy with
configfs_depend_item(). Next patch reworks the locking of
configfs_depend_item() and finally makes lockdep happy.
[ Note: This hides a few locking interactions with the VFS from lockdep.
That was my big concern, because we like lockdep's protection. However,
the current state always dumps a spurious warning. The locking is
correct, so I tell people to ignore the warning and that we'll keep
our eyes on the locking to make sure it stays correct. With this patch,
we eliminate the warning. We do lose some of the lockdep protections,
but this only means that we still have to keep our eyes on the locking.
We're going to do that anyway. -- Joel ]
Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
2009-01-29 02:18:32 +08:00
|
|
|
configfs_set_dir_dirent_depth(p->d_fsdata, d->d_fsdata);
|
2006-01-26 05:31:07 +08:00
|
|
|
error = configfs_create(d, mode, init_dir);
|
2005-12-16 06:29:43 +08:00
|
|
|
if (!error) {
|
2006-10-01 14:29:04 +08:00
|
|
|
inc_nlink(p->d_inode);
|
2006-01-26 05:31:07 +08:00
|
|
|
} else {
|
|
|
|
struct configfs_dirent *sd = d->d_fsdata;
|
|
|
|
if (sd) {
|
2008-06-17 01:00:58 +08:00
|
|
|
spin_lock(&configfs_dirent_lock);
|
2006-01-26 05:31:07 +08:00
|
|
|
list_del_init(&sd->s_sibling);
|
2008-06-17 01:00:58 +08:00
|
|
|
spin_unlock(&configfs_dirent_lock);
|
2006-01-26 05:31:07 +08:00
|
|
|
configfs_put(sd);
|
|
|
|
}
|
2005-12-16 06:29:43 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* configfs_create_dir - create a directory for an config_item.
|
|
|
|
* @item: config_itemwe're creating directory for.
|
|
|
|
* @dentry: config_item's dentry.
|
[PATCH] configfs: Prevent userspace from creating new entries under attaching directories
process 1: process 2:
configfs_mkdir("A")
attach_group("A")
attach_item("A")
d_instantiate("A")
populate_groups("A")
mutex_lock("A")
attach_group("A/B")
attach_item("A")
d_instantiate("A/B")
mkdir("A/B/C")
do_path_lookup("A/B/C", LOOKUP_PARENT)
ok
lookup_create("A/B/C")
mutex_lock("A/B")
ok
configfs_mkdir("A/B/C")
ok
attach_group("A/C")
attach_item("A/C")
d_instantiate("A/C")
populate_groups("A/C")
mutex_lock("A/C")
attach_group("A/C/D")
attach_item("A/C/D")
failure
mutex_unlock("A/C")
detach_groups("A/C")
nothing to do
mkdir("A/C/E")
do_path_lookup("A/C/E", LOOKUP_PARENT)
ok
lookup_create("A/C/E")
mutex_lock("A/C")
ok
configfs_mkdir("A/C/E")
ok
detach_item("A/C")
d_delete("A/C")
mutex_unlock("A")
detach_groups("A")
mutex_lock("A/B")
detach_group("A/B")
detach_groups("A/B")
nothing since no _default_ group
detach_item("A/B")
mutex_unlock("A/B")
d_delete("A/B")
detach_item("A")
d_delete("A")
Two bugs:
1/ "A/B/C" and "A/C/E" are created, but never removed while their parent are
removed in the end. The same could happen with symlink() instead of mkdir().
2/ "A" and "A/C" inodes are not locked while detach_item() is called on them,
which may probably confuse VFS.
This commit fixes 1/, tagging new directories with CONFIGFS_USET_CREATING before
building the inode and instantiating the dentry, and validating the whole
group+default groups hierarchy in a second pass by clearing
CONFIGFS_USET_CREATING.
mkdir(), symlink(), lookup(), and dir_open() simply return -ENOENT if
called in (or linking to) a directory tagged with CONFIGFS_USET_CREATING. This
does not prevent userspace from calling stat() successfuly on such directories,
but this prevents userspace from adding (children to | symlinking from/to |
read/write attributes of | listing the contents of) not validated items. In
other words, userspace will not interact with the subsystem on a new item until
the new item creation completes correctly.
It was first proposed to re-use CONFIGFS_USET_IN_MKDIR instead of a new
flag CONFIGFS_USET_CREATING, but this generated conflicts when checking the
target of a new symlink: a valid target directory in the middle of attaching
a new user-created child item could be wrongly detected as being attached.
2/ is fixed by next commit.
Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-04 22:56:05 +08:00
|
|
|
*
|
|
|
|
* Note: user-created entries won't be allowed under this new directory
|
|
|
|
* until it is validated by configfs_dir_set_ready()
|
2005-12-16 06:29:43 +08:00
|
|
|
*/
|
|
|
|
|
|
|
|
static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
|
|
|
|
{
|
2012-03-18 04:49:20 +08:00
|
|
|
int error = create_dir(item, dentry);
|
2005-12-16 06:29:43 +08:00
|
|
|
if (!error)
|
|
|
|
item->ci_dentry = dentry;
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
[PATCH] configfs: Prevent userspace from creating new entries under attaching directories
process 1: process 2:
configfs_mkdir("A")
attach_group("A")
attach_item("A")
d_instantiate("A")
populate_groups("A")
mutex_lock("A")
attach_group("A/B")
attach_item("A")
d_instantiate("A/B")
mkdir("A/B/C")
do_path_lookup("A/B/C", LOOKUP_PARENT)
ok
lookup_create("A/B/C")
mutex_lock("A/B")
ok
configfs_mkdir("A/B/C")
ok
attach_group("A/C")
attach_item("A/C")
d_instantiate("A/C")
populate_groups("A/C")
mutex_lock("A/C")
attach_group("A/C/D")
attach_item("A/C/D")
failure
mutex_unlock("A/C")
detach_groups("A/C")
nothing to do
mkdir("A/C/E")
do_path_lookup("A/C/E", LOOKUP_PARENT)
ok
lookup_create("A/C/E")
mutex_lock("A/C")
ok
configfs_mkdir("A/C/E")
ok
detach_item("A/C")
d_delete("A/C")
mutex_unlock("A")
detach_groups("A")
mutex_lock("A/B")
detach_group("A/B")
detach_groups("A/B")
nothing since no _default_ group
detach_item("A/B")
mutex_unlock("A/B")
d_delete("A/B")
detach_item("A")
d_delete("A")
Two bugs:
1/ "A/B/C" and "A/C/E" are created, but never removed while their parent are
removed in the end. The same could happen with symlink() instead of mkdir().
2/ "A" and "A/C" inodes are not locked while detach_item() is called on them,
which may probably confuse VFS.
This commit fixes 1/, tagging new directories with CONFIGFS_USET_CREATING before
building the inode and instantiating the dentry, and validating the whole
group+default groups hierarchy in a second pass by clearing
CONFIGFS_USET_CREATING.
mkdir(), symlink(), lookup(), and dir_open() simply return -ENOENT if
called in (or linking to) a directory tagged with CONFIGFS_USET_CREATING. This
does not prevent userspace from calling stat() successfuly on such directories,
but this prevents userspace from adding (children to | symlinking from/to |
read/write attributes of | listing the contents of) not validated items. In
other words, userspace will not interact with the subsystem on a new item until
the new item creation completes correctly.
It was first proposed to re-use CONFIGFS_USET_IN_MKDIR instead of a new
flag CONFIGFS_USET_CREATING, but this generated conflicts when checking the
target of a new symlink: a valid target directory in the middle of attaching
a new user-created child item could be wrongly detected as being attached.
2/ is fixed by next commit.
Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-04 22:56:05 +08:00
|
|
|
/*
|
|
|
|
* Allow userspace to create new entries under a new directory created with
|
|
|
|
* configfs_create_dir(), and under all of its chidlren directories recursively.
|
|
|
|
* @sd configfs_dirent of the new directory to validate
|
|
|
|
*
|
|
|
|
* Caller must hold configfs_dirent_lock.
|
|
|
|
*/
|
|
|
|
static void configfs_dir_set_ready(struct configfs_dirent *sd)
|
|
|
|
{
|
|
|
|
struct configfs_dirent *child_sd;
|
|
|
|
|
|
|
|
sd->s_type &= ~CONFIGFS_USET_CREATING;
|
|
|
|
list_for_each_entry(child_sd, &sd->s_children, s_sibling)
|
|
|
|
if (child_sd->s_type & CONFIGFS_USET_CREATING)
|
|
|
|
configfs_dir_set_ready(child_sd);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check that a directory does not belong to a directory hierarchy being
|
|
|
|
* attached and not validated yet.
|
|
|
|
* @sd configfs_dirent of the directory to check
|
|
|
|
*
|
|
|
|
* @return non-zero iff the directory was validated
|
|
|
|
*
|
|
|
|
* Note: takes configfs_dirent_lock, so the result may change from false to true
|
|
|
|
* in two consecutive calls, but never from true to false.
|
|
|
|
*/
|
|
|
|
int configfs_dirent_is_ready(struct configfs_dirent *sd)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
spin_lock(&configfs_dirent_lock);
|
|
|
|
ret = !(sd->s_type & CONFIGFS_USET_CREATING);
|
|
|
|
spin_unlock(&configfs_dirent_lock);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2005-12-16 06:29:43 +08:00
|
|
|
int configfs_create_link(struct configfs_symlink *sl,
|
|
|
|
struct dentry *parent,
|
|
|
|
struct dentry *dentry)
|
|
|
|
{
|
|
|
|
int err = 0;
|
|
|
|
umode_t mode = S_IFLNK | S_IRWXUGO;
|
|
|
|
|
2006-01-26 05:31:07 +08:00
|
|
|
err = configfs_make_dirent(parent->d_fsdata, dentry, sl, mode,
|
|
|
|
CONFIGFS_ITEM_LINK);
|
2005-12-16 06:29:43 +08:00
|
|
|
if (!err) {
|
2006-01-26 05:31:07 +08:00
|
|
|
err = configfs_create(dentry, mode, init_symlink);
|
2011-01-07 14:49:21 +08:00
|
|
|
if (err) {
|
2006-01-26 05:31:07 +08:00
|
|
|
struct configfs_dirent *sd = dentry->d_fsdata;
|
|
|
|
if (sd) {
|
2008-06-17 01:00:58 +08:00
|
|
|
spin_lock(&configfs_dirent_lock);
|
2006-01-26 05:31:07 +08:00
|
|
|
list_del_init(&sd->s_sibling);
|
2008-06-17 01:00:58 +08:00
|
|
|
spin_unlock(&configfs_dirent_lock);
|
2006-01-26 05:31:07 +08:00
|
|
|
configfs_put(sd);
|
|
|
|
}
|
|
|
|
}
|
2005-12-16 06:29:43 +08:00
|
|
|
}
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void remove_dir(struct dentry * d)
|
|
|
|
{
|
|
|
|
struct dentry * parent = dget(d->d_parent);
|
|
|
|
struct configfs_dirent * sd;
|
|
|
|
|
|
|
|
sd = d->d_fsdata;
|
2008-06-17 01:00:58 +08:00
|
|
|
spin_lock(&configfs_dirent_lock);
|
2006-03-11 03:42:30 +08:00
|
|
|
list_del_init(&sd->s_sibling);
|
2008-06-17 01:00:58 +08:00
|
|
|
spin_unlock(&configfs_dirent_lock);
|
2005-12-16 06:29:43 +08:00
|
|
|
configfs_put(sd);
|
|
|
|
if (d->d_inode)
|
|
|
|
simple_rmdir(parent->d_inode,d);
|
|
|
|
|
2013-07-10 02:26:44 +08:00
|
|
|
pr_debug(" o %s removing done (%d)\n",d->d_name.name, d_count(d));
|
2005-12-16 06:29:43 +08:00
|
|
|
|
|
|
|
dput(parent);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* configfs_remove_dir - remove an config_item's directory.
|
|
|
|
* @item: config_item we're removing.
|
|
|
|
*
|
|
|
|
* The only thing special about this is that we remove any files in
|
|
|
|
* the directory before we remove the directory, and we've inlined
|
|
|
|
* what used to be configfs_rmdir() below, instead of calling separately.
|
2008-07-04 22:56:06 +08:00
|
|
|
*
|
|
|
|
* Caller holds the mutex of the item's inode
|
2005-12-16 06:29:43 +08:00
|
|
|
*/
|
|
|
|
|
|
|
|
static void configfs_remove_dir(struct config_item * item)
|
|
|
|
{
|
|
|
|
struct dentry * dentry = dget(item->ci_dentry);
|
|
|
|
|
|
|
|
if (!dentry)
|
|
|
|
return;
|
|
|
|
|
|
|
|
remove_dir(dentry);
|
|
|
|
/**
|
|
|
|
* Drop reference from dget() on entrance.
|
|
|
|
*/
|
|
|
|
dput(dentry);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* attaches attribute's configfs_dirent to the dentry corresponding to the
|
|
|
|
* attribute file
|
|
|
|
*/
|
|
|
|
static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * dentry)
|
|
|
|
{
|
|
|
|
struct configfs_attribute * attr = sd->s_element;
|
|
|
|
int error;
|
|
|
|
|
configfs: fix race between dentry put and lookup
A race window in configfs, it starts from one dentry is UNHASHED and end
before configfs_d_iput is called. In this window, if a lookup happen,
since the original dentry was UNHASHED, so a new dentry will be
allocated, and then in configfs_attach_attr(), sd->s_dentry will be
updated to the new dentry. Then in configfs_d_iput(),
BUG_ON(sd->s_dentry != dentry) will be triggered and system panic.
sys_open: sys_close:
... fput
dput
dentry_kill
__d_drop <--- dentry unhashed here,
but sd->dentry still point
to this dentry.
lookup_real
configfs_lookup
configfs_attach_attr---> update sd->s_dentry
to new allocated dentry here.
d_kill
configfs_d_iput <--- BUG_ON(sd->s_dentry != dentry)
triggered here.
To fix it, change configfs_d_iput to not update sd->s_dentry if
sd->s_count > 2, that means there are another dentry is using the sd
beside the one that is going to be put. Use configfs_dirent_lock in
configfs_attach_attr to sync with configfs_d_iput.
With the following steps, you can reproduce the bug.
1. enable ocfs2, this will mount configfs at /sys/kernel/config and
fill configure in it.
2. run the following script.
while [ 1 ]; do cat /sys/kernel/config/cluster/$your_cluster_name/idle_timeout_ms > /dev/null; done &
while [ 1 ]; do cat /sys/kernel/config/cluster/$your_cluster_name/idle_timeout_ms > /dev/null; done &
Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-11-22 06:31:56 +08:00
|
|
|
spin_lock(&configfs_dirent_lock);
|
2006-01-26 05:31:07 +08:00
|
|
|
dentry->d_fsdata = configfs_get(sd);
|
|
|
|
sd->s_dentry = dentry;
|
configfs: fix race between dentry put and lookup
A race window in configfs, it starts from one dentry is UNHASHED and end
before configfs_d_iput is called. In this window, if a lookup happen,
since the original dentry was UNHASHED, so a new dentry will be
allocated, and then in configfs_attach_attr(), sd->s_dentry will be
updated to the new dentry. Then in configfs_d_iput(),
BUG_ON(sd->s_dentry != dentry) will be triggered and system panic.
sys_open: sys_close:
... fput
dput
dentry_kill
__d_drop <--- dentry unhashed here,
but sd->dentry still point
to this dentry.
lookup_real
configfs_lookup
configfs_attach_attr---> update sd->s_dentry
to new allocated dentry here.
d_kill
configfs_d_iput <--- BUG_ON(sd->s_dentry != dentry)
triggered here.
To fix it, change configfs_d_iput to not update sd->s_dentry if
sd->s_count > 2, that means there are another dentry is using the sd
beside the one that is going to be put. Use configfs_dirent_lock in
configfs_attach_attr to sync with configfs_d_iput.
With the following steps, you can reproduce the bug.
1. enable ocfs2, this will mount configfs at /sys/kernel/config and
fill configure in it.
2. run the following script.
while [ 1 ]; do cat /sys/kernel/config/cluster/$your_cluster_name/idle_timeout_ms > /dev/null; done &
while [ 1 ]; do cat /sys/kernel/config/cluster/$your_cluster_name/idle_timeout_ms > /dev/null; done &
Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-11-22 06:31:56 +08:00
|
|
|
spin_unlock(&configfs_dirent_lock);
|
|
|
|
|
2007-10-17 14:31:13 +08:00
|
|
|
error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG,
|
|
|
|
configfs_init_file);
|
2006-01-26 05:31:07 +08:00
|
|
|
if (error) {
|
|
|
|
configfs_put(sd);
|
2005-12-16 06:29:43 +08:00
|
|
|
return error;
|
2006-01-26 05:31:07 +08:00
|
|
|
}
|
2005-12-16 06:29:43 +08:00
|
|
|
|
|
|
|
d_rehash(dentry);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct dentry * configfs_lookup(struct inode *dir,
|
|
|
|
struct dentry *dentry,
|
2012-06-11 05:13:09 +08:00
|
|
|
unsigned int flags)
|
2005-12-16 06:29:43 +08:00
|
|
|
{
|
|
|
|
struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
|
|
|
|
struct configfs_dirent * sd;
|
|
|
|
int found = 0;
|
[PATCH] configfs: Prevent userspace from creating new entries under attaching directories
process 1: process 2:
configfs_mkdir("A")
attach_group("A")
attach_item("A")
d_instantiate("A")
populate_groups("A")
mutex_lock("A")
attach_group("A/B")
attach_item("A")
d_instantiate("A/B")
mkdir("A/B/C")
do_path_lookup("A/B/C", LOOKUP_PARENT)
ok
lookup_create("A/B/C")
mutex_lock("A/B")
ok
configfs_mkdir("A/B/C")
ok
attach_group("A/C")
attach_item("A/C")
d_instantiate("A/C")
populate_groups("A/C")
mutex_lock("A/C")
attach_group("A/C/D")
attach_item("A/C/D")
failure
mutex_unlock("A/C")
detach_groups("A/C")
nothing to do
mkdir("A/C/E")
do_path_lookup("A/C/E", LOOKUP_PARENT)
ok
lookup_create("A/C/E")
mutex_lock("A/C")
ok
configfs_mkdir("A/C/E")
ok
detach_item("A/C")
d_delete("A/C")
mutex_unlock("A")
detach_groups("A")
mutex_lock("A/B")
detach_group("A/B")
detach_groups("A/B")
nothing since no _default_ group
detach_item("A/B")
mutex_unlock("A/B")
d_delete("A/B")
detach_item("A")
d_delete("A")
Two bugs:
1/ "A/B/C" and "A/C/E" are created, but never removed while their parent are
removed in the end. The same could happen with symlink() instead of mkdir().
2/ "A" and "A/C" inodes are not locked while detach_item() is called on them,
which may probably confuse VFS.
This commit fixes 1/, tagging new directories with CONFIGFS_USET_CREATING before
building the inode and instantiating the dentry, and validating the whole
group+default groups hierarchy in a second pass by clearing
CONFIGFS_USET_CREATING.
mkdir(), symlink(), lookup(), and dir_open() simply return -ENOENT if
called in (or linking to) a directory tagged with CONFIGFS_USET_CREATING. This
does not prevent userspace from calling stat() successfuly on such directories,
but this prevents userspace from adding (children to | symlinking from/to |
read/write attributes of | listing the contents of) not validated items. In
other words, userspace will not interact with the subsystem on a new item until
the new item creation completes correctly.
It was first proposed to re-use CONFIGFS_USET_IN_MKDIR instead of a new
flag CONFIGFS_USET_CREATING, but this generated conflicts when checking the
target of a new symlink: a valid target directory in the middle of attaching
a new user-created child item could be wrongly detected as being attached.
2/ is fixed by next commit.
Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-04 22:56:05 +08:00
|
|
|
int err;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Fake invisibility if dir belongs to a group/default groups hierarchy
|
|
|
|
* being attached
|
|
|
|
*
|
|
|
|
* This forbids userspace to read/write attributes of items which may
|
|
|
|
* not complete their initialization, since the dentries of the
|
|
|
|
* attributes won't be instantiated.
|
|
|
|
*/
|
|
|
|
err = -ENOENT;
|
|
|
|
if (!configfs_dirent_is_ready(parent_sd))
|
|
|
|
goto out;
|
2005-12-16 06:29:43 +08:00
|
|
|
|
|
|
|
list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
|
|
|
|
if (sd->s_type & CONFIGFS_NOT_PINNED) {
|
|
|
|
const unsigned char * name = configfs_get_name(sd);
|
|
|
|
|
|
|
|
if (strcmp(name, dentry->d_name.name))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
found = 1;
|
|
|
|
err = configfs_attach_attr(sd, dentry);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!found) {
|
|
|
|
/*
|
|
|
|
* If it doesn't exist and it isn't a NOT_PINNED item,
|
|
|
|
* it must be negative.
|
|
|
|
*/
|
2011-01-07 14:49:21 +08:00
|
|
|
if (dentry->d_name.len > NAME_MAX)
|
|
|
|
return ERR_PTR(-ENAMETOOLONG);
|
|
|
|
d_add(dentry, NULL);
|
|
|
|
return NULL;
|
2005-12-16 06:29:43 +08:00
|
|
|
}
|
|
|
|
|
[PATCH] configfs: Prevent userspace from creating new entries under attaching directories
process 1: process 2:
configfs_mkdir("A")
attach_group("A")
attach_item("A")
d_instantiate("A")
populate_groups("A")
mutex_lock("A")
attach_group("A/B")
attach_item("A")
d_instantiate("A/B")
mkdir("A/B/C")
do_path_lookup("A/B/C", LOOKUP_PARENT)
ok
lookup_create("A/B/C")
mutex_lock("A/B")
ok
configfs_mkdir("A/B/C")
ok
attach_group("A/C")
attach_item("A/C")
d_instantiate("A/C")
populate_groups("A/C")
mutex_lock("A/C")
attach_group("A/C/D")
attach_item("A/C/D")
failure
mutex_unlock("A/C")
detach_groups("A/C")
nothing to do
mkdir("A/C/E")
do_path_lookup("A/C/E", LOOKUP_PARENT)
ok
lookup_create("A/C/E")
mutex_lock("A/C")
ok
configfs_mkdir("A/C/E")
ok
detach_item("A/C")
d_delete("A/C")
mutex_unlock("A")
detach_groups("A")
mutex_lock("A/B")
detach_group("A/B")
detach_groups("A/B")
nothing since no _default_ group
detach_item("A/B")
mutex_unlock("A/B")
d_delete("A/B")
detach_item("A")
d_delete("A")
Two bugs:
1/ "A/B/C" and "A/C/E" are created, but never removed while their parent are
removed in the end. The same could happen with symlink() instead of mkdir().
2/ "A" and "A/C" inodes are not locked while detach_item() is called on them,
which may probably confuse VFS.
This commit fixes 1/, tagging new directories with CONFIGFS_USET_CREATING before
building the inode and instantiating the dentry, and validating the whole
group+default groups hierarchy in a second pass by clearing
CONFIGFS_USET_CREATING.
mkdir(), symlink(), lookup(), and dir_open() simply return -ENOENT if
called in (or linking to) a directory tagged with CONFIGFS_USET_CREATING. This
does not prevent userspace from calling stat() successfuly on such directories,
but this prevents userspace from adding (children to | symlinking from/to |
read/write attributes of | listing the contents of) not validated items. In
other words, userspace will not interact with the subsystem on a new item until
the new item creation completes correctly.
It was first proposed to re-use CONFIGFS_USET_IN_MKDIR instead of a new
flag CONFIGFS_USET_CREATING, but this generated conflicts when checking the
target of a new symlink: a valid target directory in the middle of attaching
a new user-created child item could be wrongly detected as being attached.
2/ is fixed by next commit.
Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-04 22:56:05 +08:00
|
|
|
out:
|
2005-12-16 06:29:43 +08:00
|
|
|
return ERR_PTR(err);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Only subdirectories count here. Files (CONFIGFS_NOT_PINNED) are
|
2008-06-17 01:01:01 +08:00
|
|
|
* attributes and are removed by rmdir(). We recurse, setting
|
|
|
|
* CONFIGFS_USET_DROPPING on all children that are candidates for
|
|
|
|
* default detach.
|
|
|
|
* If there is an error, the caller will reset the flags via
|
|
|
|
* configfs_detach_rollback().
|
2005-12-16 06:29:43 +08:00
|
|
|
*/
|
2008-06-17 01:01:02 +08:00
|
|
|
static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex)
|
2005-12-16 06:29:43 +08:00
|
|
|
{
|
|
|
|
struct configfs_dirent *parent_sd = dentry->d_fsdata;
|
|
|
|
struct configfs_dirent *sd;
|
|
|
|
int ret;
|
|
|
|
|
2008-06-23 20:16:17 +08:00
|
|
|
/* Mark that we're trying to drop the group */
|
|
|
|
parent_sd->s_type |= CONFIGFS_USET_DROPPING;
|
|
|
|
|
2005-12-16 06:29:43 +08:00
|
|
|
ret = -EBUSY;
|
|
|
|
if (!list_empty(&parent_sd->s_links))
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
|
2008-06-27 19:10:25 +08:00
|
|
|
if (!sd->s_element ||
|
|
|
|
(sd->s_type & CONFIGFS_NOT_PINNED))
|
2005-12-16 06:29:43 +08:00
|
|
|
continue;
|
|
|
|
if (sd->s_type & CONFIGFS_USET_DEFAULT) {
|
2008-06-17 01:01:02 +08:00
|
|
|
/* Abort if racing with mkdir() */
|
|
|
|
if (sd->s_type & CONFIGFS_USET_IN_MKDIR) {
|
|
|
|
if (wait_mutex)
|
|
|
|
*wait_mutex = &sd->s_dentry->d_inode->i_mutex;
|
|
|
|
return -EAGAIN;
|
|
|
|
}
|
2005-12-16 06:29:43 +08:00
|
|
|
|
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-19 09:06:09 +08:00
|
|
|
/*
|
|
|
|
* Yup, recursive. If there's a problem, blame
|
|
|
|
* deep nesting of default_groups
|
|
|
|
*/
|
2008-06-17 01:01:02 +08:00
|
|
|
ret = configfs_detach_prep(sd->s_dentry, wait_mutex);
|
2005-12-16 06:29:43 +08:00
|
|
|
if (!ret)
|
2006-03-11 03:42:30 +08:00
|
|
|
continue;
|
2005-12-16 06:29:43 +08:00
|
|
|
} else
|
|
|
|
ret = -ENOTEMPTY;
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2008-06-17 01:01:01 +08:00
|
|
|
* Walk the tree, resetting CONFIGFS_USET_DROPPING wherever it was
|
2005-12-16 06:29:43 +08:00
|
|
|
* set.
|
|
|
|
*/
|
|
|
|
static void configfs_detach_rollback(struct dentry *dentry)
|
|
|
|
{
|
|
|
|
struct configfs_dirent *parent_sd = dentry->d_fsdata;
|
|
|
|
struct configfs_dirent *sd;
|
|
|
|
|
2008-06-23 20:16:17 +08:00
|
|
|
parent_sd->s_type &= ~CONFIGFS_USET_DROPPING;
|
|
|
|
|
|
|
|
list_for_each_entry(sd, &parent_sd->s_children, s_sibling)
|
|
|
|
if (sd->s_type & CONFIGFS_USET_DEFAULT)
|
2005-12-16 06:29:43 +08:00
|
|
|
configfs_detach_rollback(sd->s_dentry);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void detach_attrs(struct config_item * item)
|
|
|
|
{
|
|
|
|
struct dentry * dentry = dget(item->ci_dentry);
|
|
|
|
struct configfs_dirent * parent_sd;
|
|
|
|
struct configfs_dirent * sd, * tmp;
|
|
|
|
|
|
|
|
if (!dentry)
|
|
|
|
return;
|
|
|
|
|
|
|
|
pr_debug("configfs %s: dropping attrs for dir\n",
|
|
|
|
dentry->d_name.name);
|
|
|
|
|
|
|
|
parent_sd = dentry->d_fsdata;
|
|
|
|
list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) {
|
|
|
|
if (!sd->s_element || !(sd->s_type & CONFIGFS_NOT_PINNED))
|
|
|
|
continue;
|
2008-06-17 01:00:58 +08:00
|
|
|
spin_lock(&configfs_dirent_lock);
|
2005-12-16 06:29:43 +08:00
|
|
|
list_del_init(&sd->s_sibling);
|
2008-06-17 01:00:58 +08:00
|
|
|
spin_unlock(&configfs_dirent_lock);
|
2005-12-16 06:29:43 +08:00
|
|
|
configfs_drop_dentry(sd, dentry);
|
|
|
|
configfs_put(sd);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Drop reference from dget() on entrance.
|
|
|
|
*/
|
|
|
|
dput(dentry);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int populate_attrs(struct config_item *item)
|
|
|
|
{
|
|
|
|
struct config_item_type *t = item->ci_type;
|
|
|
|
struct configfs_attribute *attr;
|
|
|
|
int error = 0;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (!t)
|
|
|
|
return -EINVAL;
|
|
|
|
if (t->ct_attrs) {
|
|
|
|
for (i = 0; (attr = t->ct_attrs[i]) != NULL; i++) {
|
|
|
|
if ((error = configfs_create_file(item, attr)))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (error)
|
|
|
|
detach_attrs(item);
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int configfs_attach_group(struct config_item *parent_item,
|
|
|
|
struct config_item *item,
|
|
|
|
struct dentry *dentry);
|
|
|
|
static void configfs_detach_group(struct config_item *item);
|
|
|
|
|
|
|
|
static void detach_groups(struct config_group *group)
|
|
|
|
{
|
|
|
|
struct dentry * dentry = dget(group->cg_item.ci_dentry);
|
|
|
|
struct dentry *child;
|
|
|
|
struct configfs_dirent *parent_sd;
|
|
|
|
struct configfs_dirent *sd, *tmp;
|
|
|
|
|
|
|
|
if (!dentry)
|
|
|
|
return;
|
|
|
|
|
|
|
|
parent_sd = dentry->d_fsdata;
|
|
|
|
list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) {
|
|
|
|
if (!sd->s_element ||
|
|
|
|
!(sd->s_type & CONFIGFS_USET_DEFAULT))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
child = sd->s_dentry;
|
|
|
|
|
2008-06-17 01:01:01 +08:00
|
|
|
mutex_lock(&child->d_inode->i_mutex);
|
|
|
|
|
2005-12-16 06:29:43 +08:00
|
|
|
configfs_detach_group(sd->s_element);
|
|
|
|
child->d_inode->i_flags |= S_DEAD;
|
2010-05-01 05:17:09 +08:00
|
|
|
dont_mount(child);
|
2005-12-16 06:29:43 +08:00
|
|
|
|
2008-06-17 01:01:01 +08:00
|
|
|
mutex_unlock(&child->d_inode->i_mutex);
|
2005-12-16 06:29:43 +08:00
|
|
|
|
|
|
|
d_delete(child);
|
|
|
|
dput(child);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Drop reference from dget() on entrance.
|
|
|
|
*/
|
|
|
|
dput(dentry);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This fakes mkdir(2) on a default_groups[] entry. It
|
|
|
|
* creates a dentry, attachs it, and then does fixup
|
|
|
|
* on the sd->s_type.
|
|
|
|
*
|
|
|
|
* We could, perhaps, tweak our parent's ->mkdir for a minute and
|
|
|
|
* try using vfs_mkdir. Just a thought.
|
|
|
|
*/
|
|
|
|
static int create_default_group(struct config_group *parent_group,
|
|
|
|
struct config_group *group)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
struct configfs_dirent *sd;
|
|
|
|
/* We trust the caller holds a reference to parent */
|
|
|
|
struct dentry *child, *parent = parent_group->cg_item.ci_dentry;
|
|
|
|
|
|
|
|
if (!group->cg_item.ci_name)
|
|
|
|
group->cg_item.ci_name = group->cg_item.ci_namebuf;
|
|
|
|
|
|
|
|
ret = -ENOMEM;
|
2013-07-14 21:16:52 +08:00
|
|
|
child = d_alloc_name(parent, group->cg_item.ci_name);
|
2005-12-16 06:29:43 +08:00
|
|
|
if (child) {
|
|
|
|
d_add(child, NULL);
|
|
|
|
|
|
|
|
ret = configfs_attach_group(&parent_group->cg_item,
|
|
|
|
&group->cg_item, child);
|
|
|
|
if (!ret) {
|
|
|
|
sd = child->d_fsdata;
|
|
|
|
sd->s_type |= CONFIGFS_USET_DEFAULT;
|
|
|
|
} else {
|
2011-02-22 17:09:49 +08:00
|
|
|
BUG_ON(child->d_inode);
|
|
|
|
d_drop(child);
|
2005-12-16 06:29:43 +08:00
|
|
|
dput(child);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int populate_groups(struct config_group *group)
|
|
|
|
{
|
|
|
|
struct config_group *new_group;
|
|
|
|
int ret = 0;
|
|
|
|
int i;
|
|
|
|
|
2006-03-23 07:36:54 +08:00
|
|
|
if (group->default_groups) {
|
2005-12-16 06:29:43 +08:00
|
|
|
for (i = 0; group->default_groups[i]; i++) {
|
|
|
|
new_group = group->default_groups[i];
|
|
|
|
|
|
|
|
ret = create_default_group(group, new_group);
|
2008-07-04 22:56:06 +08:00
|
|
|
if (ret) {
|
|
|
|
detach_groups(group);
|
2005-12-16 06:29:43 +08:00
|
|
|
break;
|
2008-07-04 22:56:06 +08:00
|
|
|
}
|
2005-12-16 06:29:43 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* All of link_obj/unlink_obj/link_group/unlink_group require that
|
2007-07-07 14:33:17 +08:00
|
|
|
* subsys->su_mutex is held.
|
2005-12-16 06:29:43 +08:00
|
|
|
*/
|
|
|
|
|
|
|
|
static void unlink_obj(struct config_item *item)
|
|
|
|
{
|
|
|
|
struct config_group *group;
|
|
|
|
|
|
|
|
group = item->ci_group;
|
|
|
|
if (group) {
|
|
|
|
list_del_init(&item->ci_entry);
|
|
|
|
|
|
|
|
item->ci_group = NULL;
|
|
|
|
item->ci_parent = NULL;
|
2006-04-12 12:37:20 +08:00
|
|
|
|
|
|
|
/* Drop the reference for ci_entry */
|
2005-12-16 06:29:43 +08:00
|
|
|
config_item_put(item);
|
|
|
|
|
2006-04-12 12:37:20 +08:00
|
|
|
/* Drop the reference for ci_parent */
|
2005-12-16 06:29:43 +08:00
|
|
|
config_group_put(group);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void link_obj(struct config_item *parent_item, struct config_item *item)
|
|
|
|
{
|
2006-04-12 12:37:20 +08:00
|
|
|
/*
|
|
|
|
* Parent seems redundant with group, but it makes certain
|
|
|
|
* traversals much nicer.
|
|
|
|
*/
|
2005-12-16 06:29:43 +08:00
|
|
|
item->ci_parent = parent_item;
|
2006-04-12 12:37:20 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We hold a reference on the parent for the child's ci_parent
|
|
|
|
* link.
|
|
|
|
*/
|
2005-12-16 06:29:43 +08:00
|
|
|
item->ci_group = config_group_get(to_config_group(parent_item));
|
|
|
|
list_add_tail(&item->ci_entry, &item->ci_group->cg_children);
|
|
|
|
|
2006-04-12 12:37:20 +08:00
|
|
|
/*
|
|
|
|
* We hold a reference on the child for ci_entry on the parent's
|
|
|
|
* cg_children
|
|
|
|
*/
|
2005-12-16 06:29:43 +08:00
|
|
|
config_item_get(item);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void unlink_group(struct config_group *group)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct config_group *new_group;
|
|
|
|
|
|
|
|
if (group->default_groups) {
|
|
|
|
for (i = 0; group->default_groups[i]; i++) {
|
|
|
|
new_group = group->default_groups[i];
|
|
|
|
unlink_group(new_group);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
group->cg_subsys = NULL;
|
|
|
|
unlink_obj(&group->cg_item);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void link_group(struct config_group *parent_group, struct config_group *group)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct config_group *new_group;
|
|
|
|
struct configfs_subsystem *subsys = NULL; /* gcc is a turd */
|
|
|
|
|
|
|
|
link_obj(&parent_group->cg_item, &group->cg_item);
|
|
|
|
|
|
|
|
if (parent_group->cg_subsys)
|
|
|
|
subsys = parent_group->cg_subsys;
|
|
|
|
else if (configfs_is_root(&parent_group->cg_item))
|
|
|
|
subsys = to_configfs_subsystem(group);
|
|
|
|
else
|
|
|
|
BUG();
|
|
|
|
group->cg_subsys = subsys;
|
|
|
|
|
|
|
|
if (group->default_groups) {
|
|
|
|
for (i = 0; group->default_groups[i]; i++) {
|
|
|
|
new_group = group->default_groups[i];
|
|
|
|
link_group(group, new_group);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The goal is that configfs_attach_item() (and
|
|
|
|
* configfs_attach_group()) can be called from either the VFS or this
|
|
|
|
* module. That is, they assume that the items have been created,
|
|
|
|
* the dentry allocated, and the dcache is all ready to go.
|
|
|
|
*
|
|
|
|
* If they fail, they must clean up after themselves as if they
|
|
|
|
* had never been called. The caller (VFS or local function) will
|
|
|
|
* handle cleaning up the dcache bits.
|
|
|
|
*
|
|
|
|
* configfs_detach_group() and configfs_detach_item() behave similarly on
|
|
|
|
* the way out. They assume that the proper semaphores are held, they
|
|
|
|
* clean up the configfs items, and they expect their callers will
|
|
|
|
* handle the dcache bits.
|
|
|
|
*/
|
|
|
|
static int configfs_attach_item(struct config_item *parent_item,
|
|
|
|
struct config_item *item,
|
|
|
|
struct dentry *dentry)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = configfs_create_dir(item, dentry);
|
|
|
|
if (!ret) {
|
|
|
|
ret = populate_attrs(item);
|
|
|
|
if (ret) {
|
2008-07-04 22:56:06 +08:00
|
|
|
/*
|
|
|
|
* We are going to remove an inode and its dentry but
|
|
|
|
* the VFS may already have hit and used them. Thus,
|
|
|
|
* we must lock them as rmdir() would.
|
|
|
|
*/
|
|
|
|
mutex_lock(&dentry->d_inode->i_mutex);
|
2005-12-16 06:29:43 +08:00
|
|
|
configfs_remove_dir(item);
|
2008-07-04 22:56:06 +08:00
|
|
|
dentry->d_inode->i_flags |= S_DEAD;
|
2010-05-01 05:17:09 +08:00
|
|
|
dont_mount(dentry);
|
2008-07-04 22:56:06 +08:00
|
|
|
mutex_unlock(&dentry->d_inode->i_mutex);
|
2005-12-16 06:29:43 +08:00
|
|
|
d_delete(dentry);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2008-07-04 22:56:06 +08:00
|
|
|
/* Caller holds the mutex of the item's inode */
|
2005-12-16 06:29:43 +08:00
|
|
|
static void configfs_detach_item(struct config_item *item)
|
|
|
|
{
|
|
|
|
detach_attrs(item);
|
|
|
|
configfs_remove_dir(item);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int configfs_attach_group(struct config_item *parent_item,
|
|
|
|
struct config_item *item,
|
|
|
|
struct dentry *dentry)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
struct configfs_dirent *sd;
|
|
|
|
|
|
|
|
ret = configfs_attach_item(parent_item, item, dentry);
|
|
|
|
if (!ret) {
|
|
|
|
sd = dentry->d_fsdata;
|
|
|
|
sd->s_type |= CONFIGFS_USET_DIR;
|
|
|
|
|
2008-07-04 22:56:06 +08:00
|
|
|
/*
|
|
|
|
* FYI, we're faking mkdir in populate_groups()
|
|
|
|
* We must lock the group's inode to avoid races with the VFS
|
|
|
|
* which can already hit the inode and try to add/remove entries
|
|
|
|
* under it.
|
|
|
|
*
|
|
|
|
* We must also lock the inode to remove it safely in case of
|
|
|
|
* error, as rmdir() would.
|
|
|
|
*/
|
|
|
|
mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
|
configfs: Silence lockdep on mkdir() and rmdir()
When attaching default groups (subdirs) of a new group (in mkdir() or
in configfs_register()), configfs recursively takes inode's mutexes
along the path from the parent of the new group to the default
subdirs. This is needed to ensure that the VFS will not race with
operations on these sub-dirs. This is safe for the following reasons:
- the VFS allows one to lock first an inode and second one of its
children (The lock subclasses for this pattern are respectively
I_MUTEX_PARENT and I_MUTEX_CHILD);
- from this rule any inode path can be recursively locked in
descending order as long as it stays under a single mountpoint and
does not follow symlinks.
Unfortunately lockdep does not know (yet?) how to handle such
recursion.
I've tried to use Peter Zijlstra's lock_set_subclass() helper to
upgrade i_mutexes from I_MUTEX_CHILD to I_MUTEX_PARENT when we know
that we might recursively lock some of their descendant, but this
usage does not seem to fit the purpose of lock_set_subclass() because
it leads to several i_mutex locked with subclass I_MUTEX_PARENT by
the same task.
>From inside configfs it is not possible to serialize those recursive
locking with a top-level one, because mkdir() and rmdir() are already
called with inodes locked by the VFS. So using some
mutex_lock_nest_lock() is not an option.
I am proposing two solutions:
1) one that wraps recursive mutex_lock()s with
lockdep_off()/lockdep_on().
2) (as suggested earlier by Peter Zijlstra) one that puts the
i_mutexes recursively locked in different classes based on their
depth from the top-level config_group created. This
induces an arbitrary limit (MAX_LOCK_DEPTH - 2 == 46) on the
nesting of configfs default groups whenever lockdep is activated
but this limit looks reasonably high. Unfortunately, this also
isolates VFS operations on configfs default groups from the others
and thus lowers the chances to detect locking issues.
Nobody likes solution 1), which I can understand.
This patch implements solution 2). However lockdep is still not happy with
configfs_depend_item(). Next patch reworks the locking of
configfs_depend_item() and finally makes lockdep happy.
[ Note: This hides a few locking interactions with the VFS from lockdep.
That was my big concern, because we like lockdep's protection. However,
the current state always dumps a spurious warning. The locking is
correct, so I tell people to ignore the warning and that we'll keep
our eyes on the locking to make sure it stays correct. With this patch,
we eliminate the warning. We do lose some of the lockdep protections,
but this only means that we still have to keep our eyes on the locking.
We're going to do that anyway. -- Joel ]
Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
2009-01-29 02:18:32 +08:00
|
|
|
configfs_adjust_dir_dirent_depth_before_populate(sd);
|
2005-12-16 06:29:43 +08:00
|
|
|
ret = populate_groups(to_config_group(item));
|
|
|
|
if (ret) {
|
|
|
|
configfs_detach_item(item);
|
2008-07-04 22:56:06 +08:00
|
|
|
dentry->d_inode->i_flags |= S_DEAD;
|
2010-05-01 05:17:09 +08:00
|
|
|
dont_mount(dentry);
|
2005-12-16 06:29:43 +08:00
|
|
|
}
|
configfs: Silence lockdep on mkdir() and rmdir()
When attaching default groups (subdirs) of a new group (in mkdir() or
in configfs_register()), configfs recursively takes inode's mutexes
along the path from the parent of the new group to the default
subdirs. This is needed to ensure that the VFS will not race with
operations on these sub-dirs. This is safe for the following reasons:
- the VFS allows one to lock first an inode and second one of its
children (The lock subclasses for this pattern are respectively
I_MUTEX_PARENT and I_MUTEX_CHILD);
- from this rule any inode path can be recursively locked in
descending order as long as it stays under a single mountpoint and
does not follow symlinks.
Unfortunately lockdep does not know (yet?) how to handle such
recursion.
I've tried to use Peter Zijlstra's lock_set_subclass() helper to
upgrade i_mutexes from I_MUTEX_CHILD to I_MUTEX_PARENT when we know
that we might recursively lock some of their descendant, but this
usage does not seem to fit the purpose of lock_set_subclass() because
it leads to several i_mutex locked with subclass I_MUTEX_PARENT by
the same task.
>From inside configfs it is not possible to serialize those recursive
locking with a top-level one, because mkdir() and rmdir() are already
called with inodes locked by the VFS. So using some
mutex_lock_nest_lock() is not an option.
I am proposing two solutions:
1) one that wraps recursive mutex_lock()s with
lockdep_off()/lockdep_on().
2) (as suggested earlier by Peter Zijlstra) one that puts the
i_mutexes recursively locked in different classes based on their
depth from the top-level config_group created. This
induces an arbitrary limit (MAX_LOCK_DEPTH - 2 == 46) on the
nesting of configfs default groups whenever lockdep is activated
but this limit looks reasonably high. Unfortunately, this also
isolates VFS operations on configfs default groups from the others
and thus lowers the chances to detect locking issues.
Nobody likes solution 1), which I can understand.
This patch implements solution 2). However lockdep is still not happy with
configfs_depend_item(). Next patch reworks the locking of
configfs_depend_item() and finally makes lockdep happy.
[ Note: This hides a few locking interactions with the VFS from lockdep.
That was my big concern, because we like lockdep's protection. However,
the current state always dumps a spurious warning. The locking is
correct, so I tell people to ignore the warning and that we'll keep
our eyes on the locking to make sure it stays correct. With this patch,
we eliminate the warning. We do lose some of the lockdep protections,
but this only means that we still have to keep our eyes on the locking.
We're going to do that anyway. -- Joel ]
Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
2009-01-29 02:18:32 +08:00
|
|
|
configfs_adjust_dir_dirent_depth_after_populate(sd);
|
2008-07-04 22:56:06 +08:00
|
|
|
mutex_unlock(&dentry->d_inode->i_mutex);
|
|
|
|
if (ret)
|
|
|
|
d_delete(dentry);
|
2005-12-16 06:29:43 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2008-07-04 22:56:06 +08:00
|
|
|
/* Caller holds the mutex of the group's inode */
|
2005-12-16 06:29:43 +08:00
|
|
|
static void configfs_detach_group(struct config_item *item)
|
|
|
|
{
|
|
|
|
detach_groups(to_config_group(item));
|
|
|
|
configfs_detach_item(item);
|
|
|
|
}
|
|
|
|
|
2006-10-07 08:33:23 +08:00
|
|
|
/*
|
|
|
|
* After the item has been detached from the filesystem view, we are
|
|
|
|
* ready to tear it out of the hierarchy. Notify the client before
|
|
|
|
* we do that so they can perform any cleanup that requires
|
|
|
|
* navigating the hierarchy. A client does not need to provide this
|
|
|
|
* callback. The subsystem semaphore MUST be held by the caller, and
|
|
|
|
* references must be valid for both items. It also assumes the
|
|
|
|
* caller has validated ci_type.
|
|
|
|
*/
|
|
|
|
static void client_disconnect_notify(struct config_item *parent_item,
|
|
|
|
struct config_item *item)
|
|
|
|
{
|
|
|
|
struct config_item_type *type;
|
|
|
|
|
|
|
|
type = parent_item->ci_type;
|
|
|
|
BUG_ON(!type);
|
|
|
|
|
|
|
|
if (type->ct_group_ops && type->ct_group_ops->disconnect_notify)
|
|
|
|
type->ct_group_ops->disconnect_notify(to_config_group(parent_item),
|
|
|
|
item);
|
|
|
|
}
|
|
|
|
|
2005-12-16 06:29:43 +08:00
|
|
|
/*
|
|
|
|
* Drop the initial reference from make_item()/make_group()
|
|
|
|
* This function assumes that reference is held on item
|
|
|
|
* and that item holds a valid reference to the parent. Also, it
|
|
|
|
* assumes the caller has validated ci_type.
|
|
|
|
*/
|
|
|
|
static void client_drop_item(struct config_item *parent_item,
|
|
|
|
struct config_item *item)
|
|
|
|
{
|
|
|
|
struct config_item_type *type;
|
|
|
|
|
|
|
|
type = parent_item->ci_type;
|
|
|
|
BUG_ON(!type);
|
|
|
|
|
2006-04-12 12:37:20 +08:00
|
|
|
/*
|
|
|
|
* If ->drop_item() exists, it is responsible for the
|
|
|
|
* config_item_put().
|
|
|
|
*/
|
2005-12-16 06:29:43 +08:00
|
|
|
if (type->ct_group_ops && type->ct_group_ops->drop_item)
|
|
|
|
type->ct_group_ops->drop_item(to_config_group(parent_item),
|
2006-10-07 08:33:23 +08:00
|
|
|
item);
|
2005-12-16 06:29:43 +08:00
|
|
|
else
|
|
|
|
config_item_put(item);
|
|
|
|
}
|
|
|
|
|
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-19 09:06:09 +08:00
|
|
|
#ifdef DEBUG
|
|
|
|
static void configfs_dump_one(struct configfs_dirent *sd, int level)
|
|
|
|
{
|
|
|
|
printk(KERN_INFO "%*s\"%s\":\n", level, " ", configfs_get_name(sd));
|
|
|
|
|
|
|
|
#define type_print(_type) if (sd->s_type & _type) printk(KERN_INFO "%*s %s\n", level, " ", #_type);
|
|
|
|
type_print(CONFIGFS_ROOT);
|
|
|
|
type_print(CONFIGFS_DIR);
|
|
|
|
type_print(CONFIGFS_ITEM_ATTR);
|
|
|
|
type_print(CONFIGFS_ITEM_LINK);
|
|
|
|
type_print(CONFIGFS_USET_DIR);
|
|
|
|
type_print(CONFIGFS_USET_DEFAULT);
|
|
|
|
type_print(CONFIGFS_USET_DROPPING);
|
|
|
|
#undef type_print
|
|
|
|
}
|
|
|
|
|
|
|
|
static int configfs_dump(struct configfs_dirent *sd, int level)
|
|
|
|
{
|
|
|
|
struct configfs_dirent *child_sd;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
configfs_dump_one(sd, level);
|
|
|
|
|
|
|
|
if (!(sd->s_type & (CONFIGFS_DIR|CONFIGFS_ROOT)))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
list_for_each_entry(child_sd, &sd->s_children, s_sibling) {
|
|
|
|
ret = configfs_dump(child_sd, level + 2);
|
|
|
|
if (ret)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* configfs_depend_item() and configfs_undepend_item()
|
|
|
|
*
|
|
|
|
* WARNING: Do not call these from a configfs callback!
|
|
|
|
*
|
|
|
|
* This describes these functions and their helpers.
|
|
|
|
*
|
|
|
|
* Allow another kernel system to depend on a config_item. If this
|
2011-03-31 09:57:33 +08:00
|
|
|
* happens, the item cannot go away until the dependent can live without
|
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-19 09:06:09 +08:00
|
|
|
* it. The idea is to give client modules as simple an interface as
|
|
|
|
* possible. When a system asks them to depend on an item, they just
|
|
|
|
* call configfs_depend_item(). If the item is live and the client
|
|
|
|
* driver is in good shape, we'll happily do the work for them.
|
|
|
|
*
|
|
|
|
* Why is the locking complex? Because configfs uses the VFS to handle
|
|
|
|
* all locking, but this function is called outside the normal
|
|
|
|
* VFS->configfs path. So it must take VFS locks to prevent the
|
|
|
|
* VFS->configfs stuff (configfs_mkdir(), configfs_rmdir(), etc). This is
|
|
|
|
* why you can't call these functions underneath configfs callbacks.
|
|
|
|
*
|
|
|
|
* Note, btw, that this can be called at *any* time, even when a configfs
|
|
|
|
* subsystem isn't registered, or when configfs is loading or unloading.
|
|
|
|
* Just like configfs_register_subsystem(). So we take the same
|
2009-01-29 02:18:33 +08:00
|
|
|
* precautions. We pin the filesystem. We lock configfs_dirent_lock.
|
|
|
|
* If we can find the target item in the
|
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-19 09:06:09 +08:00
|
|
|
* configfs tree, it must be part of the subsystem tree as well, so we
|
2009-01-29 02:18:33 +08:00
|
|
|
* do not need the subsystem semaphore. Holding configfs_dirent_lock helps
|
|
|
|
* locking out mkdir() and rmdir(), who might be racing us.
|
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-19 09:06:09 +08:00
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* configfs_depend_prep()
|
|
|
|
*
|
|
|
|
* Only subdirectories count here. Files (CONFIGFS_NOT_PINNED) are
|
|
|
|
* attributes. This is similar but not the same to configfs_detach_prep().
|
|
|
|
* Note that configfs_detach_prep() expects the parent to be locked when it
|
|
|
|
* is called, but we lock the parent *inside* configfs_depend_prep(). We
|
|
|
|
* do that so we can unlock it if we find nothing.
|
|
|
|
*
|
|
|
|
* Here we do a depth-first search of the dentry hierarchy looking for
|
2009-01-29 02:18:33 +08:00
|
|
|
* our object.
|
|
|
|
* We deliberately ignore items tagged as dropping since they are virtually
|
|
|
|
* dead, as well as items in the middle of attachment since they virtually
|
|
|
|
* do not exist yet. This completes the locking out of racing mkdir() and
|
|
|
|
* rmdir().
|
|
|
|
* Note: subdirectories in the middle of attachment start with s_type =
|
|
|
|
* CONFIGFS_DIR|CONFIGFS_USET_CREATING set by create_dir(). When
|
|
|
|
* CONFIGFS_USET_CREATING is set, we ignore the item. The actual set of
|
|
|
|
* s_type is in configfs_new_dirent(), which has configfs_dirent_lock.
|
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-19 09:06:09 +08:00
|
|
|
*
|
2009-01-29 02:18:33 +08:00
|
|
|
* If the target is not found, -ENOENT is bubbled up.
|
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-19 09:06:09 +08:00
|
|
|
*
|
|
|
|
* This adds a requirement that all config_items be unique!
|
|
|
|
*
|
2009-01-29 02:18:33 +08:00
|
|
|
* This is recursive. There isn't
|
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-19 09:06:09 +08:00
|
|
|
* much on the stack, though, so folks that need this function - be careful
|
|
|
|
* about your stack! Patches will be accepted to make it iterative.
|
|
|
|
*/
|
|
|
|
static int configfs_depend_prep(struct dentry *origin,
|
|
|
|
struct config_item *target)
|
|
|
|
{
|
2013-02-22 08:42:43 +08:00
|
|
|
struct configfs_dirent *child_sd, *sd;
|
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-19 09:06:09 +08:00
|
|
|
int ret = 0;
|
|
|
|
|
2013-02-22 08:42:43 +08:00
|
|
|
BUG_ON(!origin || !origin->d_fsdata);
|
|
|
|
sd = origin->d_fsdata;
|
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-19 09:06:09 +08:00
|
|
|
|
|
|
|
if (sd->s_element == target) /* Boo-yah */
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
list_for_each_entry(child_sd, &sd->s_children, s_sibling) {
|
2009-01-29 02:18:33 +08:00
|
|
|
if ((child_sd->s_type & CONFIGFS_DIR) &&
|
|
|
|
!(child_sd->s_type & CONFIGFS_USET_DROPPING) &&
|
|
|
|
!(child_sd->s_type & CONFIGFS_USET_CREATING)) {
|
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-19 09:06:09 +08:00
|
|
|
ret = configfs_depend_prep(child_sd->s_dentry,
|
|
|
|
target);
|
|
|
|
if (!ret)
|
|
|
|
goto out; /* Child path boo-yah */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We looped all our children and didn't find target */
|
|
|
|
ret = -ENOENT;
|
|
|
|
|
|
|
|
out:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int configfs_depend_item(struct configfs_subsystem *subsys,
|
|
|
|
struct config_item *target)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
struct configfs_dirent *p, *root_sd, *subsys_sd = NULL;
|
|
|
|
struct config_item *s_item = &subsys->su_group.cg_item;
|
2012-03-18 04:24:54 +08:00
|
|
|
struct dentry *root;
|
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-19 09:06:09 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Pin the configfs filesystem. This means we can safely access
|
|
|
|
* the root of the configfs filesystem.
|
|
|
|
*/
|
2012-03-18 04:53:29 +08:00
|
|
|
root = configfs_pin_fs();
|
|
|
|
if (IS_ERR(root))
|
|
|
|
return PTR_ERR(root);
|
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-19 09:06:09 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Next, lock the root directory. We're going to check that the
|
|
|
|
* subsystem is really registered, and so we need to lock out
|
|
|
|
* configfs_[un]register_subsystem().
|
|
|
|
*/
|
2012-03-18 04:24:54 +08:00
|
|
|
mutex_lock(&root->d_inode->i_mutex);
|
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-19 09:06:09 +08:00
|
|
|
|
2012-03-18 04:24:54 +08:00
|
|
|
root_sd = root->d_fsdata;
|
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-19 09:06:09 +08:00
|
|
|
|
|
|
|
list_for_each_entry(p, &root_sd->s_children, s_sibling) {
|
|
|
|
if (p->s_type & CONFIGFS_DIR) {
|
|
|
|
if (p->s_element == s_item) {
|
|
|
|
subsys_sd = p;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!subsys_sd) {
|
|
|
|
ret = -ENOENT;
|
|
|
|
goto out_unlock_fs;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Ok, now we can trust subsys/s_item */
|
|
|
|
|
2009-01-29 02:18:33 +08:00
|
|
|
spin_lock(&configfs_dirent_lock);
|
|
|
|
/* Scan the tree, return 0 if found */
|
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-19 09:06:09 +08:00
|
|
|
ret = configfs_depend_prep(subsys_sd->s_dentry, target);
|
|
|
|
if (ret)
|
2009-01-29 02:18:33 +08:00
|
|
|
goto out_unlock_dirent_lock;
|
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-19 09:06:09 +08:00
|
|
|
|
2009-01-29 02:18:33 +08:00
|
|
|
/*
|
|
|
|
* We are sure that the item is not about to be removed by rmdir(), and
|
|
|
|
* not in the middle of attachment by mkdir().
|
|
|
|
*/
|
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-19 09:06:09 +08:00
|
|
|
p = target->ci_dentry->d_fsdata;
|
|
|
|
p->s_dependent_count += 1;
|
|
|
|
|
2009-01-29 02:18:33 +08:00
|
|
|
out_unlock_dirent_lock:
|
|
|
|
spin_unlock(&configfs_dirent_lock);
|
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-19 09:06:09 +08:00
|
|
|
out_unlock_fs:
|
2012-03-18 04:24:54 +08:00
|
|
|
mutex_unlock(&root->d_inode->i_mutex);
|
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-19 09:06:09 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If we succeeded, the fs is pinned via other methods. If not,
|
|
|
|
* we're done with it anyway. So release_fs() is always right.
|
|
|
|
*/
|
|
|
|
configfs_release_fs();
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(configfs_depend_item);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Release the dependent linkage. This is much simpler than
|
|
|
|
* configfs_depend_item() because we know that that the client driver is
|
|
|
|
* pinned, thus the subsystem is pinned, and therefore configfs is pinned.
|
|
|
|
*/
|
|
|
|
void configfs_undepend_item(struct configfs_subsystem *subsys,
|
|
|
|
struct config_item *target)
|
|
|
|
{
|
|
|
|
struct configfs_dirent *sd;
|
|
|
|
|
|
|
|
/*
|
2009-01-29 02:18:33 +08:00
|
|
|
* Since we can trust everything is pinned, we just need
|
|
|
|
* configfs_dirent_lock.
|
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-19 09:06:09 +08:00
|
|
|
*/
|
2009-01-29 02:18:33 +08:00
|
|
|
spin_lock(&configfs_dirent_lock);
|
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-19 09:06:09 +08:00
|
|
|
|
|
|
|
sd = target->ci_dentry->d_fsdata;
|
|
|
|
BUG_ON(sd->s_dependent_count < 1);
|
|
|
|
|
|
|
|
sd->s_dependent_count -= 1;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* After this unlock, we cannot trust the item to stay alive!
|
|
|
|
* DO NOT REFERENCE item after this unlock.
|
|
|
|
*/
|
2009-01-29 02:18:33 +08:00
|
|
|
spin_unlock(&configfs_dirent_lock);
|
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-19 09:06:09 +08:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(configfs_undepend_item);
|
2005-12-16 06:29:43 +08:00
|
|
|
|
2011-07-26 13:41:39 +08:00
|
|
|
static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
|
2005-12-16 06:29:43 +08:00
|
|
|
{
|
2008-07-18 06:21:29 +08:00
|
|
|
int ret = 0;
|
|
|
|
int module_got = 0;
|
|
|
|
struct config_group *group = NULL;
|
|
|
|
struct config_item *item = NULL;
|
2005-12-16 06:29:43 +08:00
|
|
|
struct config_item *parent_item;
|
|
|
|
struct configfs_subsystem *subsys;
|
|
|
|
struct configfs_dirent *sd;
|
|
|
|
struct config_item_type *type;
|
2008-06-18 06:34:32 +08:00
|
|
|
struct module *subsys_owner = NULL, *new_item_owner = NULL;
|
2005-12-16 06:29:43 +08:00
|
|
|
char *name;
|
|
|
|
|
|
|
|
sd = dentry->d_parent->d_fsdata;
|
[PATCH] configfs: Prevent userspace from creating new entries under attaching directories
process 1: process 2:
configfs_mkdir("A")
attach_group("A")
attach_item("A")
d_instantiate("A")
populate_groups("A")
mutex_lock("A")
attach_group("A/B")
attach_item("A")
d_instantiate("A/B")
mkdir("A/B/C")
do_path_lookup("A/B/C", LOOKUP_PARENT)
ok
lookup_create("A/B/C")
mutex_lock("A/B")
ok
configfs_mkdir("A/B/C")
ok
attach_group("A/C")
attach_item("A/C")
d_instantiate("A/C")
populate_groups("A/C")
mutex_lock("A/C")
attach_group("A/C/D")
attach_item("A/C/D")
failure
mutex_unlock("A/C")
detach_groups("A/C")
nothing to do
mkdir("A/C/E")
do_path_lookup("A/C/E", LOOKUP_PARENT)
ok
lookup_create("A/C/E")
mutex_lock("A/C")
ok
configfs_mkdir("A/C/E")
ok
detach_item("A/C")
d_delete("A/C")
mutex_unlock("A")
detach_groups("A")
mutex_lock("A/B")
detach_group("A/B")
detach_groups("A/B")
nothing since no _default_ group
detach_item("A/B")
mutex_unlock("A/B")
d_delete("A/B")
detach_item("A")
d_delete("A")
Two bugs:
1/ "A/B/C" and "A/C/E" are created, but never removed while their parent are
removed in the end. The same could happen with symlink() instead of mkdir().
2/ "A" and "A/C" inodes are not locked while detach_item() is called on them,
which may probably confuse VFS.
This commit fixes 1/, tagging new directories with CONFIGFS_USET_CREATING before
building the inode and instantiating the dentry, and validating the whole
group+default groups hierarchy in a second pass by clearing
CONFIGFS_USET_CREATING.
mkdir(), symlink(), lookup(), and dir_open() simply return -ENOENT if
called in (or linking to) a directory tagged with CONFIGFS_USET_CREATING. This
does not prevent userspace from calling stat() successfuly on such directories,
but this prevents userspace from adding (children to | symlinking from/to |
read/write attributes of | listing the contents of) not validated items. In
other words, userspace will not interact with the subsystem on a new item until
the new item creation completes correctly.
It was first proposed to re-use CONFIGFS_USET_IN_MKDIR instead of a new
flag CONFIGFS_USET_CREATING, but this generated conflicts when checking the
target of a new symlink: a valid target directory in the middle of attaching
a new user-created child item could be wrongly detected as being attached.
2/ is fixed by next commit.
Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-04 22:56:05 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Fake invisibility if dir belongs to a group/default groups hierarchy
|
|
|
|
* being attached
|
|
|
|
*/
|
|
|
|
if (!configfs_dirent_is_ready(sd)) {
|
|
|
|
ret = -ENOENT;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2006-03-28 10:46:09 +08:00
|
|
|
if (!(sd->s_type & CONFIGFS_USET_DIR)) {
|
|
|
|
ret = -EPERM;
|
|
|
|
goto out;
|
|
|
|
}
|
2005-12-16 06:29:43 +08:00
|
|
|
|
2006-03-28 10:46:09 +08:00
|
|
|
/* Get a working ref for the duration of this function */
|
2005-12-16 06:29:43 +08:00
|
|
|
parent_item = configfs_get_config_item(dentry->d_parent);
|
|
|
|
type = parent_item->ci_type;
|
|
|
|
subsys = to_config_group(parent_item)->cg_subsys;
|
|
|
|
BUG_ON(!subsys);
|
|
|
|
|
|
|
|
if (!type || !type->ct_group_ops ||
|
|
|
|
(!type->ct_group_ops->make_group &&
|
|
|
|
!type->ct_group_ops->make_item)) {
|
2006-03-28 10:46:09 +08:00
|
|
|
ret = -EPERM; /* Lack-of-mkdir returns -EPERM */
|
|
|
|
goto out_put;
|
2005-12-16 06:29:43 +08:00
|
|
|
}
|
|
|
|
|
2008-06-18 06:34:32 +08:00
|
|
|
/*
|
|
|
|
* The subsystem may belong to a different module than the item
|
|
|
|
* being created. We don't want to safely pin the new item but
|
|
|
|
* fail to pin the subsystem it sits under.
|
|
|
|
*/
|
|
|
|
if (!subsys->su_group.cg_item.ci_type) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out_put;
|
|
|
|
}
|
|
|
|
subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner;
|
|
|
|
if (!try_module_get(subsys_owner)) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out_put;
|
|
|
|
}
|
|
|
|
|
2005-12-16 06:29:43 +08:00
|
|
|
name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL);
|
|
|
|
if (!name) {
|
2006-03-28 10:46:09 +08:00
|
|
|
ret = -ENOMEM;
|
2008-06-18 06:34:32 +08:00
|
|
|
goto out_subsys_put;
|
2005-12-16 06:29:43 +08:00
|
|
|
}
|
2006-03-28 10:46:09 +08:00
|
|
|
|
2005-12-16 06:29:43 +08:00
|
|
|
snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name);
|
|
|
|
|
2007-07-07 14:33:17 +08:00
|
|
|
mutex_lock(&subsys->su_mutex);
|
2005-12-16 06:29:43 +08:00
|
|
|
if (type->ct_group_ops->make_group) {
|
2008-07-18 05:53:48 +08:00
|
|
|
group = type->ct_group_ops->make_group(to_config_group(parent_item), name);
|
2008-07-18 06:21:29 +08:00
|
|
|
if (!group)
|
|
|
|
group = ERR_PTR(-ENOMEM);
|
|
|
|
if (!IS_ERR(group)) {
|
2005-12-16 06:29:43 +08:00
|
|
|
link_group(to_config_group(parent_item), group);
|
|
|
|
item = &group->cg_item;
|
2008-07-18 06:21:29 +08:00
|
|
|
} else
|
|
|
|
ret = PTR_ERR(group);
|
2005-12-16 06:29:43 +08:00
|
|
|
} else {
|
2008-07-18 05:53:48 +08:00
|
|
|
item = type->ct_group_ops->make_item(to_config_group(parent_item), name);
|
2008-07-18 06:21:29 +08:00
|
|
|
if (!item)
|
|
|
|
item = ERR_PTR(-ENOMEM);
|
|
|
|
if (!IS_ERR(item))
|
2005-12-16 06:29:43 +08:00
|
|
|
link_obj(parent_item, item);
|
2008-07-18 06:21:29 +08:00
|
|
|
else
|
|
|
|
ret = PTR_ERR(item);
|
2005-12-16 06:29:43 +08:00
|
|
|
}
|
2007-07-07 14:33:17 +08:00
|
|
|
mutex_unlock(&subsys->su_mutex);
|
2005-12-16 06:29:43 +08:00
|
|
|
|
|
|
|
kfree(name);
|
2008-07-18 06:21:29 +08:00
|
|
|
if (ret) {
|
2006-04-12 12:37:20 +08:00
|
|
|
/*
|
2008-07-18 07:54:19 +08:00
|
|
|
* If ret != 0, then link_obj() was never called.
|
2006-04-12 12:37:20 +08:00
|
|
|
* There are no extra references to clean up.
|
|
|
|
*/
|
2008-06-18 06:34:32 +08:00
|
|
|
goto out_subsys_put;
|
2005-12-16 06:29:43 +08:00
|
|
|
}
|
|
|
|
|
2006-04-12 12:37:20 +08:00
|
|
|
/*
|
|
|
|
* link_obj() has been called (via link_group() for groups).
|
|
|
|
* From here on out, errors must clean that up.
|
|
|
|
*/
|
|
|
|
|
2005-12-16 06:29:43 +08:00
|
|
|
type = item->ci_type;
|
2006-04-12 12:37:20 +08:00
|
|
|
if (!type) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out_unlink;
|
|
|
|
}
|
2005-12-16 06:29:43 +08:00
|
|
|
|
2008-06-18 06:34:32 +08:00
|
|
|
new_item_owner = type->ct_owner;
|
|
|
|
if (!try_module_get(new_item_owner)) {
|
2006-04-12 12:37:20 +08:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto out_unlink;
|
|
|
|
}
|
2005-12-16 06:29:43 +08:00
|
|
|
|
2006-04-12 12:37:20 +08:00
|
|
|
/*
|
|
|
|
* I hate doing it this way, but if there is
|
|
|
|
* an error, module_put() probably should
|
|
|
|
* happen after any cleanup.
|
|
|
|
*/
|
|
|
|
module_got = 1;
|
|
|
|
|
2008-06-17 01:01:02 +08:00
|
|
|
/*
|
|
|
|
* Make racing rmdir() fail if it did not tag parent with
|
|
|
|
* CONFIGFS_USET_DROPPING
|
|
|
|
* Note: if CONFIGFS_USET_DROPPING is already set, attach_group() will
|
|
|
|
* fail and let rmdir() terminate correctly
|
|
|
|
*/
|
|
|
|
spin_lock(&configfs_dirent_lock);
|
|
|
|
/* This will make configfs_detach_prep() fail */
|
|
|
|
sd->s_type |= CONFIGFS_USET_IN_MKDIR;
|
|
|
|
spin_unlock(&configfs_dirent_lock);
|
|
|
|
|
2006-04-12 12:37:20 +08:00
|
|
|
if (group)
|
|
|
|
ret = configfs_attach_group(parent_item, item, dentry);
|
|
|
|
else
|
|
|
|
ret = configfs_attach_item(parent_item, item, dentry);
|
|
|
|
|
2008-06-17 01:01:02 +08:00
|
|
|
spin_lock(&configfs_dirent_lock);
|
|
|
|
sd->s_type &= ~CONFIGFS_USET_IN_MKDIR;
|
[PATCH] configfs: Prevent userspace from creating new entries under attaching directories
process 1: process 2:
configfs_mkdir("A")
attach_group("A")
attach_item("A")
d_instantiate("A")
populate_groups("A")
mutex_lock("A")
attach_group("A/B")
attach_item("A")
d_instantiate("A/B")
mkdir("A/B/C")
do_path_lookup("A/B/C", LOOKUP_PARENT)
ok
lookup_create("A/B/C")
mutex_lock("A/B")
ok
configfs_mkdir("A/B/C")
ok
attach_group("A/C")
attach_item("A/C")
d_instantiate("A/C")
populate_groups("A/C")
mutex_lock("A/C")
attach_group("A/C/D")
attach_item("A/C/D")
failure
mutex_unlock("A/C")
detach_groups("A/C")
nothing to do
mkdir("A/C/E")
do_path_lookup("A/C/E", LOOKUP_PARENT)
ok
lookup_create("A/C/E")
mutex_lock("A/C")
ok
configfs_mkdir("A/C/E")
ok
detach_item("A/C")
d_delete("A/C")
mutex_unlock("A")
detach_groups("A")
mutex_lock("A/B")
detach_group("A/B")
detach_groups("A/B")
nothing since no _default_ group
detach_item("A/B")
mutex_unlock("A/B")
d_delete("A/B")
detach_item("A")
d_delete("A")
Two bugs:
1/ "A/B/C" and "A/C/E" are created, but never removed while their parent are
removed in the end. The same could happen with symlink() instead of mkdir().
2/ "A" and "A/C" inodes are not locked while detach_item() is called on them,
which may probably confuse VFS.
This commit fixes 1/, tagging new directories with CONFIGFS_USET_CREATING before
building the inode and instantiating the dentry, and validating the whole
group+default groups hierarchy in a second pass by clearing
CONFIGFS_USET_CREATING.
mkdir(), symlink(), lookup(), and dir_open() simply return -ENOENT if
called in (or linking to) a directory tagged with CONFIGFS_USET_CREATING. This
does not prevent userspace from calling stat() successfuly on such directories,
but this prevents userspace from adding (children to | symlinking from/to |
read/write attributes of | listing the contents of) not validated items. In
other words, userspace will not interact with the subsystem on a new item until
the new item creation completes correctly.
It was first proposed to re-use CONFIGFS_USET_IN_MKDIR instead of a new
flag CONFIGFS_USET_CREATING, but this generated conflicts when checking the
target of a new symlink: a valid target directory in the middle of attaching
a new user-created child item could be wrongly detected as being attached.
2/ is fixed by next commit.
Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-04 22:56:05 +08:00
|
|
|
if (!ret)
|
|
|
|
configfs_dir_set_ready(dentry->d_fsdata);
|
2008-06-17 01:01:02 +08:00
|
|
|
spin_unlock(&configfs_dirent_lock);
|
|
|
|
|
2006-04-12 12:37:20 +08:00
|
|
|
out_unlink:
|
|
|
|
if (ret) {
|
|
|
|
/* Tear down everything we built up */
|
2007-07-07 14:33:17 +08:00
|
|
|
mutex_lock(&subsys->su_mutex);
|
2006-10-07 08:33:23 +08:00
|
|
|
|
|
|
|
client_disconnect_notify(parent_item, item);
|
2006-04-12 12:37:20 +08:00
|
|
|
if (group)
|
|
|
|
unlink_group(group);
|
|
|
|
else
|
|
|
|
unlink_obj(item);
|
|
|
|
client_drop_item(parent_item, item);
|
2006-10-07 08:33:23 +08:00
|
|
|
|
2007-07-07 14:33:17 +08:00
|
|
|
mutex_unlock(&subsys->su_mutex);
|
2006-04-12 12:37:20 +08:00
|
|
|
|
|
|
|
if (module_got)
|
2008-06-18 06:34:32 +08:00
|
|
|
module_put(new_item_owner);
|
2005-12-16 06:29:43 +08:00
|
|
|
}
|
|
|
|
|
2008-06-18 06:34:32 +08:00
|
|
|
out_subsys_put:
|
|
|
|
if (ret)
|
|
|
|
module_put(subsys_owner);
|
|
|
|
|
2006-03-28 10:46:09 +08:00
|
|
|
out_put:
|
|
|
|
/*
|
2006-04-12 12:37:20 +08:00
|
|
|
* link_obj()/link_group() took a reference from child->parent,
|
|
|
|
* so the parent is safely pinned. We can drop our working
|
|
|
|
* reference.
|
2006-03-28 10:46:09 +08:00
|
|
|
*/
|
|
|
|
config_item_put(parent_item);
|
|
|
|
|
|
|
|
out:
|
2005-12-16 06:29:43 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
|
|
|
|
{
|
|
|
|
struct config_item *parent_item;
|
|
|
|
struct config_item *item;
|
|
|
|
struct configfs_subsystem *subsys;
|
|
|
|
struct configfs_dirent *sd;
|
2008-06-18 06:34:32 +08:00
|
|
|
struct module *subsys_owner = NULL, *dead_item_owner = NULL;
|
2005-12-16 06:29:43 +08:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
sd = dentry->d_fsdata;
|
|
|
|
if (sd->s_type & CONFIGFS_USET_DEFAULT)
|
|
|
|
return -EPERM;
|
|
|
|
|
2006-03-28 10:46:09 +08:00
|
|
|
/* Get a working ref until we have the child */
|
2005-12-16 06:29:43 +08:00
|
|
|
parent_item = configfs_get_config_item(dentry->d_parent);
|
|
|
|
subsys = to_config_group(parent_item)->cg_subsys;
|
|
|
|
BUG_ON(!subsys);
|
|
|
|
|
|
|
|
if (!parent_item->ci_type) {
|
|
|
|
config_item_put(parent_item);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2008-06-18 06:34:32 +08:00
|
|
|
/* configfs_mkdir() shouldn't have allowed this */
|
|
|
|
BUG_ON(!subsys->su_group.cg_item.ci_type);
|
|
|
|
subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner;
|
|
|
|
|
2008-06-20 20:09:22 +08:00
|
|
|
/*
|
|
|
|
* Ensure that no racing symlink() will make detach_prep() fail while
|
|
|
|
* the new link is temporarily attached
|
|
|
|
*/
|
2008-06-17 01:01:02 +08:00
|
|
|
do {
|
|
|
|
struct mutex *wait_mutex;
|
|
|
|
|
2008-08-16 03:37:23 +08:00
|
|
|
mutex_lock(&configfs_symlink_mutex);
|
|
|
|
spin_lock(&configfs_dirent_lock);
|
2009-01-29 02:18:33 +08:00
|
|
|
/*
|
|
|
|
* Here's where we check for dependents. We're protected by
|
|
|
|
* configfs_dirent_lock.
|
|
|
|
* If no dependent, atomically tag the item as dropping.
|
|
|
|
*/
|
|
|
|
ret = sd->s_dependent_count ? -EBUSY : 0;
|
|
|
|
if (!ret) {
|
|
|
|
ret = configfs_detach_prep(dentry, &wait_mutex);
|
|
|
|
if (ret)
|
|
|
|
configfs_detach_rollback(dentry);
|
|
|
|
}
|
2008-08-16 03:37:23 +08:00
|
|
|
spin_unlock(&configfs_dirent_lock);
|
|
|
|
mutex_unlock(&configfs_symlink_mutex);
|
|
|
|
|
|
|
|
if (ret) {
|
2008-06-17 01:01:02 +08:00
|
|
|
if (ret != -EAGAIN) {
|
|
|
|
config_item_put(parent_item);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Wait until the racing operation terminates */
|
|
|
|
mutex_lock(wait_mutex);
|
|
|
|
mutex_unlock(wait_mutex);
|
|
|
|
}
|
|
|
|
} while (ret == -EAGAIN);
|
2005-12-16 06:29:43 +08:00
|
|
|
|
2006-03-28 10:46:09 +08:00
|
|
|
/* Get a working ref for the duration of this function */
|
2005-12-16 06:29:43 +08:00
|
|
|
item = configfs_get_config_item(dentry);
|
|
|
|
|
|
|
|
/* Drop reference from above, item already holds one. */
|
|
|
|
config_item_put(parent_item);
|
|
|
|
|
|
|
|
if (item->ci_type)
|
2008-06-18 06:34:32 +08:00
|
|
|
dead_item_owner = item->ci_type->ct_owner;
|
2005-12-16 06:29:43 +08:00
|
|
|
|
|
|
|
if (sd->s_type & CONFIGFS_USET_DIR) {
|
|
|
|
configfs_detach_group(item);
|
|
|
|
|
2007-07-07 14:33:17 +08:00
|
|
|
mutex_lock(&subsys->su_mutex);
|
2006-10-07 08:33:23 +08:00
|
|
|
client_disconnect_notify(parent_item, item);
|
2005-12-16 06:29:43 +08:00
|
|
|
unlink_group(to_config_group(item));
|
|
|
|
} else {
|
|
|
|
configfs_detach_item(item);
|
|
|
|
|
2007-07-07 14:33:17 +08:00
|
|
|
mutex_lock(&subsys->su_mutex);
|
2006-10-07 08:33:23 +08:00
|
|
|
client_disconnect_notify(parent_item, item);
|
2005-12-16 06:29:43 +08:00
|
|
|
unlink_obj(item);
|
|
|
|
}
|
|
|
|
|
|
|
|
client_drop_item(parent_item, item);
|
2007-07-07 14:33:17 +08:00
|
|
|
mutex_unlock(&subsys->su_mutex);
|
2005-12-16 06:29:43 +08:00
|
|
|
|
|
|
|
/* Drop our reference from above */
|
|
|
|
config_item_put(item);
|
|
|
|
|
2008-06-18 06:34:32 +08:00
|
|
|
module_put(dead_item_owner);
|
|
|
|
module_put(subsys_owner);
|
2005-12-16 06:29:43 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-02-12 16:55:38 +08:00
|
|
|
const struct inode_operations configfs_dir_inode_operations = {
|
2005-12-16 06:29:43 +08:00
|
|
|
.mkdir = configfs_mkdir,
|
|
|
|
.rmdir = configfs_rmdir,
|
|
|
|
.symlink = configfs_symlink,
|
|
|
|
.unlink = configfs_unlink,
|
|
|
|
.lookup = configfs_lookup,
|
2006-01-26 05:31:07 +08:00
|
|
|
.setattr = configfs_setattr,
|
2005-12-16 06:29:43 +08:00
|
|
|
};
|
|
|
|
|
2012-03-18 04:13:25 +08:00
|
|
|
const struct inode_operations configfs_root_inode_operations = {
|
|
|
|
.lookup = configfs_lookup,
|
|
|
|
.setattr = configfs_setattr,
|
|
|
|
};
|
|
|
|
|
2005-12-16 06:29:43 +08:00
|
|
|
#if 0
|
|
|
|
int configfs_rename_dir(struct config_item * item, const char *new_name)
|
|
|
|
{
|
|
|
|
int error = 0;
|
|
|
|
struct dentry * new_dentry, * parent;
|
|
|
|
|
|
|
|
if (!strcmp(config_item_name(item), new_name))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (!item->parent)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
down_write(&configfs_rename_sem);
|
|
|
|
parent = item->parent->dentry;
|
|
|
|
|
2006-01-10 07:59:24 +08:00
|
|
|
mutex_lock(&parent->d_inode->i_mutex);
|
2005-12-16 06:29:43 +08:00
|
|
|
|
|
|
|
new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
|
|
|
|
if (!IS_ERR(new_dentry)) {
|
2006-03-11 03:42:30 +08:00
|
|
|
if (!new_dentry->d_inode) {
|
2005-12-16 06:29:43 +08:00
|
|
|
error = config_item_set_name(item, "%s", new_name);
|
|
|
|
if (!error) {
|
|
|
|
d_add(new_dentry, NULL);
|
|
|
|
d_move(item->dentry, new_dentry);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
d_delete(new_dentry);
|
|
|
|
} else
|
|
|
|
error = -EEXIST;
|
|
|
|
dput(new_dentry);
|
|
|
|
}
|
2006-01-10 07:59:24 +08:00
|
|
|
mutex_unlock(&parent->d_inode->i_mutex);
|
2005-12-16 06:29:43 +08:00
|
|
|
up_write(&configfs_rename_sem);
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static int configfs_dir_open(struct inode *inode, struct file *file)
|
|
|
|
{
|
2006-12-08 18:36:47 +08:00
|
|
|
struct dentry * dentry = file->f_path.dentry;
|
2005-12-16 06:29:43 +08:00
|
|
|
struct configfs_dirent * parent_sd = dentry->d_fsdata;
|
[PATCH] configfs: Prevent userspace from creating new entries under attaching directories
process 1: process 2:
configfs_mkdir("A")
attach_group("A")
attach_item("A")
d_instantiate("A")
populate_groups("A")
mutex_lock("A")
attach_group("A/B")
attach_item("A")
d_instantiate("A/B")
mkdir("A/B/C")
do_path_lookup("A/B/C", LOOKUP_PARENT)
ok
lookup_create("A/B/C")
mutex_lock("A/B")
ok
configfs_mkdir("A/B/C")
ok
attach_group("A/C")
attach_item("A/C")
d_instantiate("A/C")
populate_groups("A/C")
mutex_lock("A/C")
attach_group("A/C/D")
attach_item("A/C/D")
failure
mutex_unlock("A/C")
detach_groups("A/C")
nothing to do
mkdir("A/C/E")
do_path_lookup("A/C/E", LOOKUP_PARENT)
ok
lookup_create("A/C/E")
mutex_lock("A/C")
ok
configfs_mkdir("A/C/E")
ok
detach_item("A/C")
d_delete("A/C")
mutex_unlock("A")
detach_groups("A")
mutex_lock("A/B")
detach_group("A/B")
detach_groups("A/B")
nothing since no _default_ group
detach_item("A/B")
mutex_unlock("A/B")
d_delete("A/B")
detach_item("A")
d_delete("A")
Two bugs:
1/ "A/B/C" and "A/C/E" are created, but never removed while their parent are
removed in the end. The same could happen with symlink() instead of mkdir().
2/ "A" and "A/C" inodes are not locked while detach_item() is called on them,
which may probably confuse VFS.
This commit fixes 1/, tagging new directories with CONFIGFS_USET_CREATING before
building the inode and instantiating the dentry, and validating the whole
group+default groups hierarchy in a second pass by clearing
CONFIGFS_USET_CREATING.
mkdir(), symlink(), lookup(), and dir_open() simply return -ENOENT if
called in (or linking to) a directory tagged with CONFIGFS_USET_CREATING. This
does not prevent userspace from calling stat() successfuly on such directories,
but this prevents userspace from adding (children to | symlinking from/to |
read/write attributes of | listing the contents of) not validated items. In
other words, userspace will not interact with the subsystem on a new item until
the new item creation completes correctly.
It was first proposed to re-use CONFIGFS_USET_IN_MKDIR instead of a new
flag CONFIGFS_USET_CREATING, but this generated conflicts when checking the
target of a new symlink: a valid target directory in the middle of attaching
a new user-created child item could be wrongly detected as being attached.
2/ is fixed by next commit.
Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-04 22:56:05 +08:00
|
|
|
int err;
|
2005-12-16 06:29:43 +08:00
|
|
|
|
2006-01-10 07:59:24 +08:00
|
|
|
mutex_lock(&dentry->d_inode->i_mutex);
|
[PATCH] configfs: Prevent userspace from creating new entries under attaching directories
process 1: process 2:
configfs_mkdir("A")
attach_group("A")
attach_item("A")
d_instantiate("A")
populate_groups("A")
mutex_lock("A")
attach_group("A/B")
attach_item("A")
d_instantiate("A/B")
mkdir("A/B/C")
do_path_lookup("A/B/C", LOOKUP_PARENT)
ok
lookup_create("A/B/C")
mutex_lock("A/B")
ok
configfs_mkdir("A/B/C")
ok
attach_group("A/C")
attach_item("A/C")
d_instantiate("A/C")
populate_groups("A/C")
mutex_lock("A/C")
attach_group("A/C/D")
attach_item("A/C/D")
failure
mutex_unlock("A/C")
detach_groups("A/C")
nothing to do
mkdir("A/C/E")
do_path_lookup("A/C/E", LOOKUP_PARENT)
ok
lookup_create("A/C/E")
mutex_lock("A/C")
ok
configfs_mkdir("A/C/E")
ok
detach_item("A/C")
d_delete("A/C")
mutex_unlock("A")
detach_groups("A")
mutex_lock("A/B")
detach_group("A/B")
detach_groups("A/B")
nothing since no _default_ group
detach_item("A/B")
mutex_unlock("A/B")
d_delete("A/B")
detach_item("A")
d_delete("A")
Two bugs:
1/ "A/B/C" and "A/C/E" are created, but never removed while their parent are
removed in the end. The same could happen with symlink() instead of mkdir().
2/ "A" and "A/C" inodes are not locked while detach_item() is called on them,
which may probably confuse VFS.
This commit fixes 1/, tagging new directories with CONFIGFS_USET_CREATING before
building the inode and instantiating the dentry, and validating the whole
group+default groups hierarchy in a second pass by clearing
CONFIGFS_USET_CREATING.
mkdir(), symlink(), lookup(), and dir_open() simply return -ENOENT if
called in (or linking to) a directory tagged with CONFIGFS_USET_CREATING. This
does not prevent userspace from calling stat() successfuly on such directories,
but this prevents userspace from adding (children to | symlinking from/to |
read/write attributes of | listing the contents of) not validated items. In
other words, userspace will not interact with the subsystem on a new item until
the new item creation completes correctly.
It was first proposed to re-use CONFIGFS_USET_IN_MKDIR instead of a new
flag CONFIGFS_USET_CREATING, but this generated conflicts when checking the
target of a new symlink: a valid target directory in the middle of attaching
a new user-created child item could be wrongly detected as being attached.
2/ is fixed by next commit.
Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-04 22:56:05 +08:00
|
|
|
/*
|
|
|
|
* Fake invisibility if dir belongs to a group/default groups hierarchy
|
|
|
|
* being attached
|
|
|
|
*/
|
|
|
|
err = -ENOENT;
|
|
|
|
if (configfs_dirent_is_ready(parent_sd)) {
|
2009-01-29 02:18:33 +08:00
|
|
|
file->private_data = configfs_new_dirent(parent_sd, NULL, 0);
|
[PATCH] configfs: Prevent userspace from creating new entries under attaching directories
process 1: process 2:
configfs_mkdir("A")
attach_group("A")
attach_item("A")
d_instantiate("A")
populate_groups("A")
mutex_lock("A")
attach_group("A/B")
attach_item("A")
d_instantiate("A/B")
mkdir("A/B/C")
do_path_lookup("A/B/C", LOOKUP_PARENT)
ok
lookup_create("A/B/C")
mutex_lock("A/B")
ok
configfs_mkdir("A/B/C")
ok
attach_group("A/C")
attach_item("A/C")
d_instantiate("A/C")
populate_groups("A/C")
mutex_lock("A/C")
attach_group("A/C/D")
attach_item("A/C/D")
failure
mutex_unlock("A/C")
detach_groups("A/C")
nothing to do
mkdir("A/C/E")
do_path_lookup("A/C/E", LOOKUP_PARENT)
ok
lookup_create("A/C/E")
mutex_lock("A/C")
ok
configfs_mkdir("A/C/E")
ok
detach_item("A/C")
d_delete("A/C")
mutex_unlock("A")
detach_groups("A")
mutex_lock("A/B")
detach_group("A/B")
detach_groups("A/B")
nothing since no _default_ group
detach_item("A/B")
mutex_unlock("A/B")
d_delete("A/B")
detach_item("A")
d_delete("A")
Two bugs:
1/ "A/B/C" and "A/C/E" are created, but never removed while their parent are
removed in the end. The same could happen with symlink() instead of mkdir().
2/ "A" and "A/C" inodes are not locked while detach_item() is called on them,
which may probably confuse VFS.
This commit fixes 1/, tagging new directories with CONFIGFS_USET_CREATING before
building the inode and instantiating the dentry, and validating the whole
group+default groups hierarchy in a second pass by clearing
CONFIGFS_USET_CREATING.
mkdir(), symlink(), lookup(), and dir_open() simply return -ENOENT if
called in (or linking to) a directory tagged with CONFIGFS_USET_CREATING. This
does not prevent userspace from calling stat() successfuly on such directories,
but this prevents userspace from adding (children to | symlinking from/to |
read/write attributes of | listing the contents of) not validated items. In
other words, userspace will not interact with the subsystem on a new item until
the new item creation completes correctly.
It was first proposed to re-use CONFIGFS_USET_IN_MKDIR instead of a new
flag CONFIGFS_USET_CREATING, but this generated conflicts when checking the
target of a new symlink: a valid target directory in the middle of attaching
a new user-created child item could be wrongly detected as being attached.
2/ is fixed by next commit.
Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-04 22:56:05 +08:00
|
|
|
if (IS_ERR(file->private_data))
|
|
|
|
err = PTR_ERR(file->private_data);
|
|
|
|
else
|
|
|
|
err = 0;
|
|
|
|
}
|
2006-01-10 07:59:24 +08:00
|
|
|
mutex_unlock(&dentry->d_inode->i_mutex);
|
2005-12-16 06:29:43 +08:00
|
|
|
|
[PATCH] configfs: Prevent userspace from creating new entries under attaching directories
process 1: process 2:
configfs_mkdir("A")
attach_group("A")
attach_item("A")
d_instantiate("A")
populate_groups("A")
mutex_lock("A")
attach_group("A/B")
attach_item("A")
d_instantiate("A/B")
mkdir("A/B/C")
do_path_lookup("A/B/C", LOOKUP_PARENT)
ok
lookup_create("A/B/C")
mutex_lock("A/B")
ok
configfs_mkdir("A/B/C")
ok
attach_group("A/C")
attach_item("A/C")
d_instantiate("A/C")
populate_groups("A/C")
mutex_lock("A/C")
attach_group("A/C/D")
attach_item("A/C/D")
failure
mutex_unlock("A/C")
detach_groups("A/C")
nothing to do
mkdir("A/C/E")
do_path_lookup("A/C/E", LOOKUP_PARENT)
ok
lookup_create("A/C/E")
mutex_lock("A/C")
ok
configfs_mkdir("A/C/E")
ok
detach_item("A/C")
d_delete("A/C")
mutex_unlock("A")
detach_groups("A")
mutex_lock("A/B")
detach_group("A/B")
detach_groups("A/B")
nothing since no _default_ group
detach_item("A/B")
mutex_unlock("A/B")
d_delete("A/B")
detach_item("A")
d_delete("A")
Two bugs:
1/ "A/B/C" and "A/C/E" are created, but never removed while their parent are
removed in the end. The same could happen with symlink() instead of mkdir().
2/ "A" and "A/C" inodes are not locked while detach_item() is called on them,
which may probably confuse VFS.
This commit fixes 1/, tagging new directories with CONFIGFS_USET_CREATING before
building the inode and instantiating the dentry, and validating the whole
group+default groups hierarchy in a second pass by clearing
CONFIGFS_USET_CREATING.
mkdir(), symlink(), lookup(), and dir_open() simply return -ENOENT if
called in (or linking to) a directory tagged with CONFIGFS_USET_CREATING. This
does not prevent userspace from calling stat() successfuly on such directories,
but this prevents userspace from adding (children to | symlinking from/to |
read/write attributes of | listing the contents of) not validated items. In
other words, userspace will not interact with the subsystem on a new item until
the new item creation completes correctly.
It was first proposed to re-use CONFIGFS_USET_IN_MKDIR instead of a new
flag CONFIGFS_USET_CREATING, but this generated conflicts when checking the
target of a new symlink: a valid target directory in the middle of attaching
a new user-created child item could be wrongly detected as being attached.
2/ is fixed by next commit.
Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-04 22:56:05 +08:00
|
|
|
return err;
|
2005-12-16 06:29:43 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int configfs_dir_close(struct inode *inode, struct file *file)
|
|
|
|
{
|
2006-12-08 18:36:47 +08:00
|
|
|
struct dentry * dentry = file->f_path.dentry;
|
2005-12-16 06:29:43 +08:00
|
|
|
struct configfs_dirent * cursor = file->private_data;
|
|
|
|
|
2006-01-10 07:59:24 +08:00
|
|
|
mutex_lock(&dentry->d_inode->i_mutex);
|
2008-06-17 01:00:58 +08:00
|
|
|
spin_lock(&configfs_dirent_lock);
|
2005-12-16 06:29:43 +08:00
|
|
|
list_del_init(&cursor->s_sibling);
|
2008-06-17 01:00:58 +08:00
|
|
|
spin_unlock(&configfs_dirent_lock);
|
2006-01-10 07:59:24 +08:00
|
|
|
mutex_unlock(&dentry->d_inode->i_mutex);
|
2005-12-16 06:29:43 +08:00
|
|
|
|
|
|
|
release_configfs_dirent(cursor);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Relationship between s_mode and the DT_xxx types */
|
|
|
|
static inline unsigned char dt_type(struct configfs_dirent *sd)
|
|
|
|
{
|
|
|
|
return (sd->s_mode >> 12) & 15;
|
|
|
|
}
|
|
|
|
|
2013-05-16 13:28:34 +08:00
|
|
|
static int configfs_readdir(struct file *file, struct dir_context *ctx)
|
2005-12-16 06:29:43 +08:00
|
|
|
{
|
2013-05-16 13:28:34 +08:00
|
|
|
struct dentry *dentry = file->f_path.dentry;
|
2012-03-18 04:24:54 +08:00
|
|
|
struct super_block *sb = dentry->d_sb;
|
2005-12-16 06:29:43 +08:00
|
|
|
struct configfs_dirent * parent_sd = dentry->d_fsdata;
|
2013-05-16 13:28:34 +08:00
|
|
|
struct configfs_dirent *cursor = file->private_data;
|
2005-12-16 06:29:43 +08:00
|
|
|
struct list_head *p, *q = &cursor->s_sibling;
|
2011-05-18 19:08:16 +08:00
|
|
|
ino_t ino = 0;
|
2005-12-16 06:29:43 +08:00
|
|
|
|
2013-05-16 13:28:34 +08:00
|
|
|
if (!dir_emit_dots(file, ctx))
|
|
|
|
return 0;
|
|
|
|
if (ctx->pos == 2) {
|
|
|
|
spin_lock(&configfs_dirent_lock);
|
|
|
|
list_move(q, &parent_sd->s_children);
|
|
|
|
spin_unlock(&configfs_dirent_lock);
|
|
|
|
}
|
|
|
|
for (p = q->next; p != &parent_sd->s_children; p = p->next) {
|
|
|
|
struct configfs_dirent *next;
|
|
|
|
const char *name;
|
|
|
|
int len;
|
|
|
|
struct inode *inode = NULL;
|
|
|
|
|
|
|
|
next = list_entry(p, struct configfs_dirent, s_sibling);
|
|
|
|
if (!next->s_element)
|
|
|
|
continue;
|
2005-12-16 06:29:43 +08:00
|
|
|
|
2013-05-16 13:28:34 +08:00
|
|
|
name = configfs_get_name(next);
|
|
|
|
len = strlen(name);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We'll have a dentry and an inode for
|
|
|
|
* PINNED items and for open attribute
|
|
|
|
* files. We lock here to prevent a race
|
|
|
|
* with configfs_d_iput() clearing
|
|
|
|
* s_dentry before calling iput().
|
|
|
|
*
|
|
|
|
* Why do we go to the trouble? If
|
|
|
|
* someone has an attribute file open,
|
|
|
|
* the inode number should match until
|
|
|
|
* they close it. Beyond that, we don't
|
|
|
|
* care.
|
|
|
|
*/
|
|
|
|
spin_lock(&configfs_dirent_lock);
|
|
|
|
dentry = next->s_dentry;
|
|
|
|
if (dentry)
|
|
|
|
inode = dentry->d_inode;
|
|
|
|
if (inode)
|
|
|
|
ino = inode->i_ino;
|
|
|
|
spin_unlock(&configfs_dirent_lock);
|
|
|
|
if (!inode)
|
|
|
|
ino = iunique(sb, 2);
|
2005-12-16 06:29:43 +08:00
|
|
|
|
2013-05-16 13:28:34 +08:00
|
|
|
if (!dir_emit(ctx, name, len, ino, dt_type(next)))
|
|
|
|
return 0;
|
2005-12-16 06:29:43 +08:00
|
|
|
|
2013-05-16 13:28:34 +08:00
|
|
|
spin_lock(&configfs_dirent_lock);
|
|
|
|
list_move(q, p);
|
|
|
|
spin_unlock(&configfs_dirent_lock);
|
|
|
|
p = q;
|
|
|
|
ctx->pos++;
|
2005-12-16 06:29:43 +08:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-12-18 07:59:39 +08:00
|
|
|
static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
|
2005-12-16 06:29:43 +08:00
|
|
|
{
|
2006-12-08 18:36:47 +08:00
|
|
|
struct dentry * dentry = file->f_path.dentry;
|
2005-12-16 06:29:43 +08:00
|
|
|
|
2006-01-10 07:59:24 +08:00
|
|
|
mutex_lock(&dentry->d_inode->i_mutex);
|
2012-12-18 07:59:39 +08:00
|
|
|
switch (whence) {
|
2005-12-16 06:29:43 +08:00
|
|
|
case 1:
|
|
|
|
offset += file->f_pos;
|
|
|
|
case 0:
|
|
|
|
if (offset >= 0)
|
|
|
|
break;
|
|
|
|
default:
|
2013-01-24 06:07:38 +08:00
|
|
|
mutex_unlock(&file_inode(file)->i_mutex);
|
2005-12-16 06:29:43 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (offset != file->f_pos) {
|
|
|
|
file->f_pos = offset;
|
|
|
|
if (file->f_pos >= 2) {
|
|
|
|
struct configfs_dirent *sd = dentry->d_fsdata;
|
|
|
|
struct configfs_dirent *cursor = file->private_data;
|
|
|
|
struct list_head *p;
|
|
|
|
loff_t n = file->f_pos - 2;
|
|
|
|
|
2008-06-17 01:00:58 +08:00
|
|
|
spin_lock(&configfs_dirent_lock);
|
2005-12-16 06:29:43 +08:00
|
|
|
list_del(&cursor->s_sibling);
|
|
|
|
p = sd->s_children.next;
|
|
|
|
while (n && p != &sd->s_children) {
|
|
|
|
struct configfs_dirent *next;
|
|
|
|
next = list_entry(p, struct configfs_dirent,
|
|
|
|
s_sibling);
|
|
|
|
if (next->s_element)
|
|
|
|
n--;
|
|
|
|
p = p->next;
|
|
|
|
}
|
|
|
|
list_add_tail(&cursor->s_sibling, p);
|
2008-06-17 01:00:58 +08:00
|
|
|
spin_unlock(&configfs_dirent_lock);
|
2005-12-16 06:29:43 +08:00
|
|
|
}
|
|
|
|
}
|
2006-01-10 07:59:24 +08:00
|
|
|
mutex_unlock(&dentry->d_inode->i_mutex);
|
2005-12-16 06:29:43 +08:00
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
2006-03-28 17:56:42 +08:00
|
|
|
const struct file_operations configfs_dir_operations = {
|
2005-12-16 06:29:43 +08:00
|
|
|
.open = configfs_dir_open,
|
|
|
|
.release = configfs_dir_close,
|
|
|
|
.llseek = configfs_dir_lseek,
|
|
|
|
.read = generic_read_dir,
|
2013-05-16 13:28:34 +08:00
|
|
|
.iterate = configfs_readdir,
|
2005-12-16 06:29:43 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
int configfs_register_subsystem(struct configfs_subsystem *subsys)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
struct config_group *group = &subsys->su_group;
|
|
|
|
struct dentry *dentry;
|
2012-03-18 04:24:54 +08:00
|
|
|
struct dentry *root;
|
2005-12-16 06:29:43 +08:00
|
|
|
struct configfs_dirent *sd;
|
|
|
|
|
2012-03-18 04:53:29 +08:00
|
|
|
root = configfs_pin_fs();
|
|
|
|
if (IS_ERR(root))
|
|
|
|
return PTR_ERR(root);
|
2005-12-16 06:29:43 +08:00
|
|
|
|
|
|
|
if (!group->cg_item.ci_name)
|
|
|
|
group->cg_item.ci_name = group->cg_item.ci_namebuf;
|
|
|
|
|
2012-03-18 04:24:54 +08:00
|
|
|
sd = root->d_fsdata;
|
2005-12-16 06:29:43 +08:00
|
|
|
link_group(to_config_group(sd->s_element), group);
|
|
|
|
|
2012-03-18 04:24:54 +08:00
|
|
|
mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_PARENT);
|
2005-12-16 06:29:43 +08:00
|
|
|
|
|
|
|
err = -ENOMEM;
|
2013-07-14 21:16:52 +08:00
|
|
|
dentry = d_alloc_name(root, group->cg_item.ci_name);
|
2007-03-06 07:49:49 +08:00
|
|
|
if (dentry) {
|
|
|
|
d_add(dentry, NULL);
|
2005-12-16 06:29:43 +08:00
|
|
|
|
2007-03-06 07:49:49 +08:00
|
|
|
err = configfs_attach_group(sd->s_element, &group->cg_item,
|
|
|
|
dentry);
|
|
|
|
if (err) {
|
2011-02-22 17:09:49 +08:00
|
|
|
BUG_ON(dentry->d_inode);
|
|
|
|
d_drop(dentry);
|
2007-03-06 07:49:49 +08:00
|
|
|
dput(dentry);
|
[PATCH] configfs: Prevent userspace from creating new entries under attaching directories
process 1: process 2:
configfs_mkdir("A")
attach_group("A")
attach_item("A")
d_instantiate("A")
populate_groups("A")
mutex_lock("A")
attach_group("A/B")
attach_item("A")
d_instantiate("A/B")
mkdir("A/B/C")
do_path_lookup("A/B/C", LOOKUP_PARENT)
ok
lookup_create("A/B/C")
mutex_lock("A/B")
ok
configfs_mkdir("A/B/C")
ok
attach_group("A/C")
attach_item("A/C")
d_instantiate("A/C")
populate_groups("A/C")
mutex_lock("A/C")
attach_group("A/C/D")
attach_item("A/C/D")
failure
mutex_unlock("A/C")
detach_groups("A/C")
nothing to do
mkdir("A/C/E")
do_path_lookup("A/C/E", LOOKUP_PARENT)
ok
lookup_create("A/C/E")
mutex_lock("A/C")
ok
configfs_mkdir("A/C/E")
ok
detach_item("A/C")
d_delete("A/C")
mutex_unlock("A")
detach_groups("A")
mutex_lock("A/B")
detach_group("A/B")
detach_groups("A/B")
nothing since no _default_ group
detach_item("A/B")
mutex_unlock("A/B")
d_delete("A/B")
detach_item("A")
d_delete("A")
Two bugs:
1/ "A/B/C" and "A/C/E" are created, but never removed while their parent are
removed in the end. The same could happen with symlink() instead of mkdir().
2/ "A" and "A/C" inodes are not locked while detach_item() is called on them,
which may probably confuse VFS.
This commit fixes 1/, tagging new directories with CONFIGFS_USET_CREATING before
building the inode and instantiating the dentry, and validating the whole
group+default groups hierarchy in a second pass by clearing
CONFIGFS_USET_CREATING.
mkdir(), symlink(), lookup(), and dir_open() simply return -ENOENT if
called in (or linking to) a directory tagged with CONFIGFS_USET_CREATING. This
does not prevent userspace from calling stat() successfuly on such directories,
but this prevents userspace from adding (children to | symlinking from/to |
read/write attributes of | listing the contents of) not validated items. In
other words, userspace will not interact with the subsystem on a new item until
the new item creation completes correctly.
It was first proposed to re-use CONFIGFS_USET_IN_MKDIR instead of a new
flag CONFIGFS_USET_CREATING, but this generated conflicts when checking the
target of a new symlink: a valid target directory in the middle of attaching
a new user-created child item could be wrongly detected as being attached.
2/ is fixed by next commit.
Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-04 22:56:05 +08:00
|
|
|
} else {
|
|
|
|
spin_lock(&configfs_dirent_lock);
|
|
|
|
configfs_dir_set_ready(dentry->d_fsdata);
|
|
|
|
spin_unlock(&configfs_dirent_lock);
|
2007-03-06 07:49:49 +08:00
|
|
|
}
|
|
|
|
}
|
2005-12-16 06:29:43 +08:00
|
|
|
|
2012-03-18 04:24:54 +08:00
|
|
|
mutex_unlock(&root->d_inode->i_mutex);
|
2005-12-16 06:29:43 +08:00
|
|
|
|
2007-03-06 07:49:49 +08:00
|
|
|
if (err) {
|
|
|
|
unlink_group(group);
|
|
|
|
configfs_release_fs();
|
2005-12-16 06:29:43 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
|
|
|
|
{
|
|
|
|
struct config_group *group = &subsys->su_group;
|
|
|
|
struct dentry *dentry = group->cg_item.ci_dentry;
|
2012-03-18 04:24:54 +08:00
|
|
|
struct dentry *root = dentry->d_sb->s_root;
|
2005-12-16 06:29:43 +08:00
|
|
|
|
2012-03-18 04:24:54 +08:00
|
|
|
if (dentry->d_parent != root) {
|
2005-12-16 06:29:43 +08:00
|
|
|
printk(KERN_ERR "configfs: Tried to unregister non-subsystem!\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2012-03-18 04:24:54 +08:00
|
|
|
mutex_lock_nested(&root->d_inode->i_mutex,
|
2006-10-21 05:55:54 +08:00
|
|
|
I_MUTEX_PARENT);
|
|
|
|
mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
|
2008-06-20 20:09:22 +08:00
|
|
|
mutex_lock(&configfs_symlink_mutex);
|
2008-06-17 01:01:01 +08:00
|
|
|
spin_lock(&configfs_dirent_lock);
|
2008-06-17 01:01:02 +08:00
|
|
|
if (configfs_detach_prep(dentry, NULL)) {
|
2005-12-16 06:29:43 +08:00
|
|
|
printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n");
|
|
|
|
}
|
2008-06-17 01:01:01 +08:00
|
|
|
spin_unlock(&configfs_dirent_lock);
|
2008-06-20 20:09:22 +08:00
|
|
|
mutex_unlock(&configfs_symlink_mutex);
|
2005-12-16 06:29:43 +08:00
|
|
|
configfs_detach_group(&group->cg_item);
|
|
|
|
dentry->d_inode->i_flags |= S_DEAD;
|
2010-05-01 05:17:09 +08:00
|
|
|
dont_mount(dentry);
|
2006-01-10 07:59:24 +08:00
|
|
|
mutex_unlock(&dentry->d_inode->i_mutex);
|
2005-12-16 06:29:43 +08:00
|
|
|
|
|
|
|
d_delete(dentry);
|
|
|
|
|
2012-03-18 04:24:54 +08:00
|
|
|
mutex_unlock(&root->d_inode->i_mutex);
|
2005-12-16 06:29:43 +08:00
|
|
|
|
|
|
|
dput(dentry);
|
|
|
|
|
|
|
|
unlink_group(group);
|
|
|
|
configfs_release_fs();
|
|
|
|
}
|
|
|
|
|
|
|
|
EXPORT_SYMBOL(configfs_register_subsystem);
|
|
|
|
EXPORT_SYMBOL(configfs_unregister_subsystem);
|