2
0
mirror of https://github.com/edk2-porting/linux-next.git synced 2024-12-29 23:53:55 +08:00
linux-next/fs/notify/fanotify/fanotify.c
Amir Goldstein 83b535d289 fanotify: support events with data type FSNOTIFY_EVENT_INODE
When event data type is FSNOTIFY_EVENT_INODE, we don't have a refernece
to the mount, so we will not be able to open a file descriptor when user
reads the event. However, if the listener has enabled reporting file
identifier with the FAN_REPORT_FID init flag, we allow reporting those
events and we use an identifier inode to encode fid.

The inode to use as identifier when reporting fid depends on the event.
For dirent modification events, we report the modified directory inode
and we report the "victim" inode otherwise.
For example:
FS_ATTRIB reports the child inode even if reported on a watched parent.
FS_CREATE reports the modified dir inode and not the created inode.

[JK: Fixup condition in fanotify_group_event_mask()]

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
2019-02-07 16:38:36 +01:00

421 lines
12 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include <linux/fanotify.h>
#include <linux/fdtable.h>
#include <linux/fsnotify_backend.h>
#include <linux/init.h>
#include <linux/jiffies.h>
#include <linux/kernel.h> /* UINT_MAX */
#include <linux/mount.h>
#include <linux/sched.h>
#include <linux/sched/user.h>
#include <linux/sched/signal.h>
#include <linux/types.h>
#include <linux/wait.h>
#include <linux/audit.h>
#include <linux/sched/mm.h>
#include <linux/statfs.h>
#include "fanotify.h"
static bool should_merge(struct fsnotify_event *old_fsn,
struct fsnotify_event *new_fsn)
{
struct fanotify_event *old, *new;
pr_debug("%s: old=%p new=%p\n", __func__, old_fsn, new_fsn);
old = FANOTIFY_E(old_fsn);
new = FANOTIFY_E(new_fsn);
if (old_fsn->inode != new_fsn->inode || old->pid != new->pid ||
old->fh_type != new->fh_type || old->fh_len != new->fh_len)
return false;
if (fanotify_event_has_path(old)) {
return old->path.mnt == new->path.mnt &&
old->path.dentry == new->path.dentry;
} else if (fanotify_event_has_fid(old)) {
return fanotify_fid_equal(&old->fid, &new->fid, old->fh_len);
}
/* Do not merge events if we failed to encode fid */
return false;
}
/* and the list better be locked by something too! */
static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
{
struct fsnotify_event *test_event;
struct fanotify_event *new;
pr_debug("%s: list=%p event=%p\n", __func__, list, event);
new = FANOTIFY_E(event);
/*
* Don't merge a permission event with any other event so that we know
* the event structure we have created in fanotify_handle_event() is the
* one we should check for permission response.
*/
if (fanotify_is_perm_event(new->mask))
return 0;
list_for_each_entry_reverse(test_event, list, list) {
if (should_merge(test_event, event)) {
FANOTIFY_E(test_event)->mask |= new->mask;
return 1;
}
}
return 0;
}
static int fanotify_get_response(struct fsnotify_group *group,
struct fanotify_perm_event *event,
struct fsnotify_iter_info *iter_info)
{
int ret;
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
wait_event(group->fanotify_data.access_waitq, event->response);
/* userspace responded, convert to something usable */
switch (event->response & ~FAN_AUDIT) {
case FAN_ALLOW:
ret = 0;
break;
case FAN_DENY:
default:
ret = -EPERM;
}
/* Check if the response should be audited */
if (event->response & FAN_AUDIT)
audit_fanotify(event->response & ~FAN_AUDIT);
event->response = 0;
pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
group, event, ret);
return ret;
}
/*
* This function returns a mask for an event that only contains the flags
* that have been specifically requested by the user. Flags that may have
* been included within the event mask, but have not been explicitly
* requested by the user, will not be present in the returned mask.
*/
static u32 fanotify_group_event_mask(struct fsnotify_group *group,
struct fsnotify_iter_info *iter_info,
u32 event_mask, const void *data,
int data_type)
{
__u32 marks_mask = 0, marks_ignored_mask = 0;
const struct path *path = data;
struct fsnotify_mark *mark;
int type;
pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
__func__, iter_info->report_mask, event_mask, data, data_type);
if (!FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
/* Do we have path to open a file descriptor? */
if (data_type != FSNOTIFY_EVENT_PATH)
return 0;
/* Path type events are only relevant for files and dirs */
if (!d_is_reg(path->dentry) && !d_can_lookup(path->dentry))
return 0;
}
fsnotify_foreach_obj_type(type) {
if (!fsnotify_iter_should_report_type(iter_info, type))
continue;
mark = iter_info->marks[type];
/*
* If the event is for a child and this mark doesn't care about
* events on a child, don't send it!
*/
if (event_mask & FS_EVENT_ON_CHILD &&
(type != FSNOTIFY_OBJ_TYPE_INODE ||
!(mark->mask & FS_EVENT_ON_CHILD)))
continue;
marks_mask |= mark->mask;
marks_ignored_mask |= mark->ignored_mask;
}
if (event_mask & FS_ISDIR &&
!(marks_mask & FS_ISDIR & ~marks_ignored_mask))
return 0;
return event_mask & FANOTIFY_OUTGOING_EVENTS & marks_mask &
~marks_ignored_mask;
}
static int fanotify_encode_fid(struct fanotify_event *event,
struct inode *inode, gfp_t gfp,
__kernel_fsid_t *fsid)
{
struct fanotify_fid *fid = &event->fid;
int dwords, bytes = 0;
int err, type;
fid->ext_fh = NULL;
dwords = 0;
err = -ENOENT;
type = exportfs_encode_inode_fh(inode, NULL, &dwords, NULL);
if (!dwords)
goto out_err;
bytes = dwords << 2;
if (bytes > FANOTIFY_INLINE_FH_LEN) {
/* Treat failure to allocate fh as failure to allocate event */
err = -ENOMEM;
fid->ext_fh = kmalloc(bytes, gfp);
if (!fid->ext_fh)
goto out_err;
}
type = exportfs_encode_inode_fh(inode, fanotify_fid_fh(fid, bytes),
&dwords, NULL);
err = -EINVAL;
if (!type || type == FILEID_INVALID || bytes != dwords << 2)
goto out_err;
fid->fsid = *fsid;
event->fh_len = bytes;
return type;
out_err:
pr_warn_ratelimited("fanotify: failed to encode fid (fsid=%x.%x, "
"type=%d, bytes=%d, err=%i)\n",
fsid->val[0], fsid->val[1], type, bytes, err);
kfree(fid->ext_fh);
fid->ext_fh = NULL;
event->fh_len = 0;
return FILEID_INVALID;
}
/*
* The inode to use as identifier when reporting fid depends on the event.
* Report the modified directory inode on dirent modification events.
* Report the "victim" inode otherwise.
* For example:
* FS_ATTRIB reports the child inode even if reported on a watched parent.
* FS_CREATE reports the modified dir inode and not the created inode.
*/
static struct inode *fanotify_fid_inode(struct inode *to_tell, u32 event_mask,
const void *data, int data_type)
{
if (event_mask & ALL_FSNOTIFY_DIRENT_EVENTS)
return to_tell;
else if (data_type == FSNOTIFY_EVENT_INODE)
return (struct inode *)data;
else if (data_type == FSNOTIFY_EVENT_PATH)
return d_inode(((struct path *)data)->dentry);
return NULL;
}
struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
struct inode *inode, u32 mask,
const void *data, int data_type,
__kernel_fsid_t *fsid)
{
struct fanotify_event *event = NULL;
gfp_t gfp = GFP_KERNEL_ACCOUNT;
struct inode *id = fanotify_fid_inode(inode, mask, data, data_type);
/*
* For queues with unlimited length lost events are not expected and
* can possibly have security implications. Avoid losing events when
* memory is short.
*/
if (group->max_events == UINT_MAX)
gfp |= __GFP_NOFAIL;
/* Whoever is interested in the event, pays for the allocation. */
memalloc_use_memcg(group->memcg);
if (fanotify_is_perm_event(mask)) {
struct fanotify_perm_event *pevent;
pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp);
if (!pevent)
goto out;
event = &pevent->fae;
pevent->response = 0;
goto init;
}
event = kmem_cache_alloc(fanotify_event_cachep, gfp);
if (!event)
goto out;
init: __maybe_unused
fsnotify_init_event(&event->fse, inode);
event->mask = mask;
if (FAN_GROUP_FLAG(group, FAN_REPORT_TID))
event->pid = get_pid(task_pid(current));
else
event->pid = get_pid(task_tgid(current));
event->fh_len = 0;
if (id && FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
/* Report the event without a file identifier on encode error */
event->fh_type = fanotify_encode_fid(event, id, gfp, fsid);
} else if (data_type == FSNOTIFY_EVENT_PATH) {
event->fh_type = FILEID_ROOT;
event->path = *((struct path *)data);
path_get(&event->path);
} else {
event->fh_type = FILEID_INVALID;
event->path.mnt = NULL;
event->path.dentry = NULL;
}
out:
memalloc_unuse_memcg();
return event;
}
/*
* Get cached fsid of the filesystem containing the object from any connector.
* All connectors are supposed to have the same fsid, but we do not verify that
* here.
*/
static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
{
int type;
__kernel_fsid_t fsid = {};
fsnotify_foreach_obj_type(type) {
if (!fsnotify_iter_should_report_type(iter_info, type))
continue;
fsid = iter_info->marks[type]->connector->fsid;
if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1]))
continue;
return fsid;
}
return fsid;
}
static int fanotify_handle_event(struct fsnotify_group *group,
struct inode *inode,
u32 mask, const void *data, int data_type,
const unsigned char *file_name, u32 cookie,
struct fsnotify_iter_info *iter_info)
{
int ret = 0;
struct fanotify_event *event;
struct fsnotify_event *fsn_event;
__kernel_fsid_t fsid = {};
BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC);
BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM);
BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 12);
mask = fanotify_group_event_mask(group, iter_info, mask, data,
data_type);
if (!mask)
return 0;
pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode,
mask);
if (fanotify_is_perm_event(mask)) {
/*
* fsnotify_prepare_user_wait() fails if we race with mark
* deletion. Just let the operation pass in that case.
*/
if (!fsnotify_prepare_user_wait(iter_info))
return 0;
}
if (FAN_GROUP_FLAG(group, FAN_REPORT_FID))
fsid = fanotify_get_fsid(iter_info);
event = fanotify_alloc_event(group, inode, mask, data, data_type,
&fsid);
ret = -ENOMEM;
if (unlikely(!event)) {
/*
* We don't queue overflow events for permission events as
* there the access is denied and so no event is in fact lost.
*/
if (!fanotify_is_perm_event(mask))
fsnotify_queue_overflow(group);
goto finish;
}
fsn_event = &event->fse;
ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
if (ret) {
/* Permission events shouldn't be merged */
BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS);
/* Our event wasn't used in the end. Free it. */
fsnotify_destroy_event(group, fsn_event);
ret = 0;
} else if (fanotify_is_perm_event(mask)) {
ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event),
iter_info);
fsnotify_destroy_event(group, fsn_event);
}
finish:
if (fanotify_is_perm_event(mask))
fsnotify_finish_user_wait(iter_info);
return ret;
}
static void fanotify_free_group_priv(struct fsnotify_group *group)
{
struct user_struct *user;
user = group->fanotify_data.user;
atomic_dec(&user->fanotify_listeners);
free_uid(user);
}
static void fanotify_free_event(struct fsnotify_event *fsn_event)
{
struct fanotify_event *event;
event = FANOTIFY_E(fsn_event);
if (fanotify_event_has_path(event))
path_put(&event->path);
else if (fanotify_event_has_ext_fh(event))
kfree(event->fid.ext_fh);
put_pid(event->pid);
if (fanotify_is_perm_event(event->mask)) {
kmem_cache_free(fanotify_perm_event_cachep,
FANOTIFY_PE(fsn_event));
return;
}
kmem_cache_free(fanotify_event_cachep, event);
}
static void fanotify_free_mark(struct fsnotify_mark *fsn_mark)
{
kmem_cache_free(fanotify_mark_cache, fsn_mark);
}
const struct fsnotify_ops fanotify_fsnotify_ops = {
.handle_event = fanotify_handle_event,
.free_group_priv = fanotify_free_group_priv,
.free_event = fanotify_free_event,
.free_mark = fanotify_free_mark,
};