diff --git a/fs/mount.h b/fs/mount.h index 2c856fc47ae3..2826543a131d 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -89,7 +89,6 @@ static inline int is_mounted(struct vfsmount *mnt) } extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *); -extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *); extern int __legitimize_mnt(struct vfsmount *, unsigned); extern bool legitimize_mnt(struct vfsmount *, unsigned); diff --git a/fs/namespace.c b/fs/namespace.c index 089a6b23135a..8bfad42c1ccf 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -636,28 +636,6 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) return NULL; } -/* - * find the last mount at @dentry on vfsmount @mnt. - * mount_lock must be held. - */ -struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) -{ - struct mount *p, *res = NULL; - p = __lookup_mnt(mnt, dentry); - if (!p) - goto out; - if (!(p->mnt.mnt_flags & MNT_UMOUNT)) - res = p; - hlist_for_each_entry_continue(p, mnt_hash) { - if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry) - break; - if (!(p->mnt.mnt_flags & MNT_UMOUNT)) - res = p; - } -out: - return res; -} - /* * lookup_mnt - Return the first child mount mounted at path * @@ -878,6 +856,13 @@ void mnt_set_mountpoint(struct mount *mnt, hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list); } +static void __attach_mnt(struct mount *mnt, struct mount *parent) +{ + hlist_add_head_rcu(&mnt->mnt_hash, + m_hash(&parent->mnt, mnt->mnt_mountpoint)); + list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); +} + /* * vfsmount lock must be held for write */ @@ -886,28 +871,45 @@ static void attach_mnt(struct mount *mnt, struct mountpoint *mp) { mnt_set_mountpoint(parent, mp, mnt); - hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry)); - list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); + __attach_mnt(mnt, parent); } -static void attach_shadowed(struct mount *mnt, - struct mount *parent, - struct mount *shadows) +void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt) { - if (shadows) { - hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash); - list_add(&mnt->mnt_child, &shadows->mnt_child); - } else { - hlist_add_head_rcu(&mnt->mnt_hash, - m_hash(&parent->mnt, mnt->mnt_mountpoint)); - list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); - } + struct mountpoint *old_mp = mnt->mnt_mp; + struct dentry *old_mountpoint = mnt->mnt_mountpoint; + struct mount *old_parent = mnt->mnt_parent; + + list_del_init(&mnt->mnt_child); + hlist_del_init(&mnt->mnt_mp_list); + hlist_del_init_rcu(&mnt->mnt_hash); + + attach_mnt(mnt, parent, mp); + + put_mountpoint(old_mp); + + /* + * Safely avoid even the suggestion this code might sleep or + * lock the mount hash by taking advantage of the knowledge that + * mnt_change_mountpoint will not release the final reference + * to a mountpoint. + * + * During mounting, the mount passed in as the parent mount will + * continue to use the old mountpoint and during unmounting, the + * old mountpoint will continue to exist until namespace_unlock, + * which happens well after mnt_change_mountpoint. + */ + spin_lock(&old_mountpoint->d_lock); + old_mountpoint->d_lockref.count--; + spin_unlock(&old_mountpoint->d_lock); + + mnt_add_count(old_parent, -1); } /* * vfsmount lock must be held for write */ -static void commit_tree(struct mount *mnt, struct mount *shadows) +static void commit_tree(struct mount *mnt) { struct mount *parent = mnt->mnt_parent; struct mount *m; @@ -925,7 +927,7 @@ static void commit_tree(struct mount *mnt, struct mount *shadows) n->mounts += n->pending_mounts; n->pending_mounts = 0; - attach_shadowed(mnt, parent, shadows); + __attach_mnt(mnt, parent); touch_mnt_namespace(n); } @@ -1779,7 +1781,6 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, continue; for (s = r; s; s = next_mnt(s, r)) { - struct mount *t = NULL; if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(s)) { s = skip_mnt_tree(s); @@ -1801,14 +1802,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, goto out; lock_mount_hash(); list_add_tail(&q->mnt_list, &res->mnt_list); - mnt_set_mountpoint(parent, p->mnt_mp, q); - if (!list_empty(&parent->mnt_mounts)) { - t = list_last_entry(&parent->mnt_mounts, - struct mount, mnt_child); - if (t->mnt_mp != p->mnt_mp) - t = NULL; - } - attach_shadowed(q, parent, t); + attach_mnt(q, parent, p->mnt_mp); unlock_mount_hash(); } } @@ -2007,10 +2001,18 @@ static int attach_recursive_mnt(struct mount *source_mnt, { HLIST_HEAD(tree_list); struct mnt_namespace *ns = dest_mnt->mnt_ns; + struct mountpoint *smp; struct mount *child, *p; struct hlist_node *n; int err; + /* Preallocate a mountpoint in case the new mounts need + * to be tucked under other mounts. + */ + smp = get_mountpoint(source_mnt->mnt.mnt_root); + if (IS_ERR(smp)) + return PTR_ERR(smp); + /* Is there space to add these mounts to the mount namespace? */ if (!parent_path) { err = count_mounts(ns, source_mnt); @@ -2037,16 +2039,19 @@ static int attach_recursive_mnt(struct mount *source_mnt, touch_mnt_namespace(source_mnt->mnt_ns); } else { mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt); - commit_tree(source_mnt, NULL); + commit_tree(source_mnt); } hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) { struct mount *q; hlist_del_init(&child->mnt_hash); - q = __lookup_mnt_last(&child->mnt_parent->mnt, - child->mnt_mountpoint); - commit_tree(child, q); + q = __lookup_mnt(&child->mnt_parent->mnt, + child->mnt_mountpoint); + if (q) + mnt_change_mountpoint(child, smp, q); + commit_tree(child); } + put_mountpoint(smp); unlock_mount_hash(); return 0; @@ -2061,6 +2066,11 @@ static int attach_recursive_mnt(struct mount *source_mnt, cleanup_group_ids(source_mnt, NULL); out: ns->pending_mounts = 0; + + read_seqlock_excl(&mount_lock); + put_mountpoint(smp); + read_sequnlock_excl(&mount_lock); + return err; } diff --git a/fs/pnode.c b/fs/pnode.c index 06a793f4ae38..5bc7896d122a 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -322,6 +322,21 @@ out: return ret; } +static struct mount *find_topper(struct mount *mnt) +{ + /* If there is exactly one mount covering mnt completely return it. */ + struct mount *child; + + if (!list_is_singular(&mnt->mnt_mounts)) + return NULL; + + child = list_first_entry(&mnt->mnt_mounts, struct mount, mnt_child); + if (child->mnt_mountpoint != mnt->mnt.mnt_root) + return NULL; + + return child; +} + /* * return true if the refcount is greater than count */ @@ -342,9 +357,8 @@ static inline int do_refcount_check(struct mount *mnt, int count) */ int propagate_mount_busy(struct mount *mnt, int refcnt) { - struct mount *m, *child; + struct mount *m, *child, *topper; struct mount *parent = mnt->mnt_parent; - int ret = 0; if (mnt == parent) return do_refcount_check(mnt, refcnt); @@ -359,12 +373,24 @@ int propagate_mount_busy(struct mount *mnt, int refcnt) for (m = propagation_next(parent, parent); m; m = propagation_next(m, parent)) { - child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint); - if (child && list_empty(&child->mnt_mounts) && - (ret = do_refcount_check(child, 1))) - break; + int count = 1; + child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint); + if (!child) + continue; + + /* Is there exactly one mount on the child that covers + * it completely whose reference should be ignored? + */ + topper = find_topper(child); + if (topper) + count += 1; + else if (!list_empty(&child->mnt_mounts)) + continue; + + if (do_refcount_check(child, count)) + return 1; } - return ret; + return 0; } /* @@ -381,7 +407,7 @@ void propagate_mount_unlock(struct mount *mnt) for (m = propagation_next(parent, parent); m; m = propagation_next(m, parent)) { - child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint); + child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint); if (child) child->mnt.mnt_flags &= ~MNT_LOCKED; } @@ -399,9 +425,11 @@ static void mark_umount_candidates(struct mount *mnt) for (m = propagation_next(parent, parent); m; m = propagation_next(m, parent)) { - struct mount *child = __lookup_mnt_last(&m->mnt, + struct mount *child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint); - if (child && (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m))) { + if (!child || (child->mnt.mnt_flags & MNT_UMOUNT)) + continue; + if (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m)) { SET_MNT_MARK(child); } } @@ -420,8 +448,8 @@ static void __propagate_umount(struct mount *mnt) for (m = propagation_next(parent, parent); m; m = propagation_next(m, parent)) { - - struct mount *child = __lookup_mnt_last(&m->mnt, + struct mount *topper; + struct mount *child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint); /* * umount the child only if the child has no children @@ -430,6 +458,15 @@ static void __propagate_umount(struct mount *mnt) if (!child || !IS_MNT_MARKED(child)) continue; CLEAR_MNT_MARK(child); + + /* If there is exactly one mount covering all of child + * replace child with that mount. + */ + topper = find_topper(child); + if (topper) + mnt_change_mountpoint(child->mnt_parent, child->mnt_mp, + topper); + if (list_empty(&child->mnt_mounts)) { list_del_init(&child->mnt_child); child->mnt.mnt_flags |= MNT_UMOUNT; diff --git a/fs/pnode.h b/fs/pnode.h index 550f5a8b4fcf..dc87e65becd2 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -49,6 +49,8 @@ int get_dominating_id(struct mount *mnt, const struct path *root); unsigned int mnt_get_count(struct mount *mnt); void mnt_set_mountpoint(struct mount *, struct mountpoint *, struct mount *); +void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, + struct mount *mnt); struct mount *copy_tree(struct mount *, struct dentry *, int); bool is_path_reachable(struct mount *, struct dentry *, const struct path *root);