linux/fs/netfs/objects.c
David Howells 7b589a9b45
netfs: Fix handling of USE_PGPRIV2 and WRITE_TO_CACHE flags
The NETFS_RREQ_USE_PGPRIV2 and NETFS_RREQ_WRITE_TO_CACHE flags aren't used
correctly.  The problem is that we try to set them up in the request
initialisation, but we the cache may be in the process of setting up still,
and so the state may not be correct.  Further, we secondarily sample the
cache state and make contradictory decisions later.

The issue arises because we set up the cache resources, which allows the
cache's ->prepare_read() to switch on NETFS_SREQ_COPY_TO_CACHE - which
triggers cache writing even if we didn't set the flags when allocating.

Fix this in the following way:

 (1) Drop NETFS_ICTX_USE_PGPRIV2 and instead set NETFS_RREQ_USE_PGPRIV2 in
     ->init_request() rather than trying to juggle that in
     netfs_alloc_request().

 (2) Repurpose NETFS_RREQ_USE_PGPRIV2 to merely indicate that if caching is
     to be done, then PG_private_2 is to be used rather than only setting
     it if we decide to cache and then having netfs_rreq_unlock_folios()
     set the non-PG_private_2 writeback-to-cache if it wasn't set.

 (3) Split netfs_rreq_unlock_folios() into two functions, one of which
     contains the deprecated code for using PG_private_2 to avoid
     accidentally doing the writeback path - and always use it if
     USE_PGPRIV2 is set.

 (4) As NETFS_ICTX_USE_PGPRIV2 is removed, make netfs_write_begin() always
     wait for PG_private_2.  This function is deprecated and only used by
     ceph anyway, and so label it so.

 (5) Drop the NETFS_RREQ_WRITE_TO_CACHE flag and use
     fscache_operation_valid() on the cache_resources instead.  This has
     the advantage of picking up the result of netfs_begin_cache_read() and
     fscache_begin_write_operation() - which are called after the object is
     initialised and will wait for the cache to come to a usable state.

Just reverting ae678317b95e[1] isn't a sufficient fix, so this need to be
applied on top of that.  Without this as well, things like:

 rcu: INFO: rcu_sched detected expedited stalls on CPUs/tasks: {

and:

 WARNING: CPU: 13 PID: 3621 at fs/ceph/caps.c:3386

may happen, along with some UAFs due to PG_private_2 not getting used to
wait on writeback completion.

Fixes: 2ff1e97587 ("netfs: Replace PG_fscache by setting folio->private and marking dirty")
Reported-by: Max Kellermann <max.kellermann@ionos.com>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Xiubo Li <xiubli@redhat.com>
cc: Hristo Venev <hristo@venev.name>
cc: Jeff Layton <jlayton@kernel.org>
cc: Matthew Wilcox <willy@infradead.org>
cc: ceph-devel@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
Link: https://lore.kernel.org/r/3575457.1722355300@warthog.procyon.org.uk/ [1]
Link: https://lore.kernel.org/r/1173209.1723152682@warthog.procyon.org.uk
Signed-off-by: Christian Brauner <brauner@kernel.org>
2024-08-12 22:03:27 +02:00

230 lines
6.3 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/* Object lifetime handling and tracing.
*
* Copyright (C) 2022 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*/
#include <linux/slab.h>
#include <linux/mempool.h>
#include <linux/delay.h>
#include "internal.h"
/*
* Allocate an I/O request and initialise it.
*/
struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
struct file *file,
loff_t start, size_t len,
enum netfs_io_origin origin)
{
static atomic_t debug_ids;
struct inode *inode = file ? file_inode(file) : mapping->host;
struct netfs_inode *ctx = netfs_inode(inode);
struct netfs_io_request *rreq;
mempool_t *mempool = ctx->ops->request_pool ?: &netfs_request_pool;
struct kmem_cache *cache = mempool->pool_data;
int ret;
for (;;) {
rreq = mempool_alloc(mempool, GFP_KERNEL);
if (rreq)
break;
msleep(10);
}
memset(rreq, 0, kmem_cache_size(cache));
rreq->start = start;
rreq->len = len;
rreq->upper_len = len;
rreq->origin = origin;
rreq->netfs_ops = ctx->ops;
rreq->mapping = mapping;
rreq->inode = inode;
rreq->i_size = i_size_read(inode);
rreq->debug_id = atomic_inc_return(&debug_ids);
rreq->wsize = INT_MAX;
spin_lock_init(&rreq->lock);
INIT_LIST_HEAD(&rreq->io_streams[0].subrequests);
INIT_LIST_HEAD(&rreq->io_streams[1].subrequests);
INIT_LIST_HEAD(&rreq->subrequests);
INIT_WORK(&rreq->work, NULL);
refcount_set(&rreq->ref, 1);
__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
if (file && file->f_flags & O_NONBLOCK)
__set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags);
if (rreq->netfs_ops->init_request) {
ret = rreq->netfs_ops->init_request(rreq, file);
if (ret < 0) {
mempool_free(rreq, rreq->netfs_ops->request_pool ?: &netfs_request_pool);
return ERR_PTR(ret);
}
}
atomic_inc(&ctx->io_count);
trace_netfs_rreq_ref(rreq->debug_id, 1, netfs_rreq_trace_new);
netfs_proc_add_rreq(rreq);
netfs_stat(&netfs_n_rh_rreq);
return rreq;
}
void netfs_get_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what)
{
int r;
__refcount_inc(&rreq->ref, &r);
trace_netfs_rreq_ref(rreq->debug_id, r + 1, what);
}
void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async)
{
struct netfs_io_subrequest *subreq;
struct netfs_io_stream *stream;
int s;
while (!list_empty(&rreq->subrequests)) {
subreq = list_first_entry(&rreq->subrequests,
struct netfs_io_subrequest, rreq_link);
list_del(&subreq->rreq_link);
netfs_put_subrequest(subreq, was_async,
netfs_sreq_trace_put_clear);
}
for (s = 0; s < ARRAY_SIZE(rreq->io_streams); s++) {
stream = &rreq->io_streams[s];
while (!list_empty(&stream->subrequests)) {
subreq = list_first_entry(&stream->subrequests,
struct netfs_io_subrequest, rreq_link);
list_del(&subreq->rreq_link);
netfs_put_subrequest(subreq, was_async,
netfs_sreq_trace_put_clear);
}
}
}
static void netfs_free_request_rcu(struct rcu_head *rcu)
{
struct netfs_io_request *rreq = container_of(rcu, struct netfs_io_request, rcu);
mempool_free(rreq, rreq->netfs_ops->request_pool ?: &netfs_request_pool);
netfs_stat_d(&netfs_n_rh_rreq);
}
static void netfs_free_request(struct work_struct *work)
{
struct netfs_io_request *rreq =
container_of(work, struct netfs_io_request, work);
struct netfs_inode *ictx = netfs_inode(rreq->inode);
unsigned int i;
trace_netfs_rreq(rreq, netfs_rreq_trace_free);
netfs_proc_del_rreq(rreq);
netfs_clear_subrequests(rreq, false);
if (rreq->netfs_ops->free_request)
rreq->netfs_ops->free_request(rreq);
if (rreq->cache_resources.ops)
rreq->cache_resources.ops->end_operation(&rreq->cache_resources);
if (rreq->direct_bv) {
for (i = 0; i < rreq->direct_bv_count; i++) {
if (rreq->direct_bv[i].bv_page) {
if (rreq->direct_bv_unpin)
unpin_user_page(rreq->direct_bv[i].bv_page);
}
}
kvfree(rreq->direct_bv);
}
if (atomic_dec_and_test(&ictx->io_count))
wake_up_var(&ictx->io_count);
call_rcu(&rreq->rcu, netfs_free_request_rcu);
}
void netfs_put_request(struct netfs_io_request *rreq, bool was_async,
enum netfs_rreq_ref_trace what)
{
unsigned int debug_id;
bool dead;
int r;
if (rreq) {
debug_id = rreq->debug_id;
dead = __refcount_dec_and_test(&rreq->ref, &r);
trace_netfs_rreq_ref(debug_id, r - 1, what);
if (dead) {
if (was_async) {
rreq->work.func = netfs_free_request;
if (!queue_work(system_unbound_wq, &rreq->work))
BUG();
} else {
netfs_free_request(&rreq->work);
}
}
}
}
/*
* Allocate and partially initialise an I/O request structure.
*/
struct netfs_io_subrequest *netfs_alloc_subrequest(struct netfs_io_request *rreq)
{
struct netfs_io_subrequest *subreq;
mempool_t *mempool = rreq->netfs_ops->subrequest_pool ?: &netfs_subrequest_pool;
struct kmem_cache *cache = mempool->pool_data;
for (;;) {
subreq = mempool_alloc(rreq->netfs_ops->subrequest_pool ?: &netfs_subrequest_pool,
GFP_KERNEL);
if (subreq)
break;
msleep(10);
}
memset(subreq, 0, kmem_cache_size(cache));
INIT_WORK(&subreq->work, NULL);
INIT_LIST_HEAD(&subreq->rreq_link);
refcount_set(&subreq->ref, 2);
subreq->rreq = rreq;
subreq->debug_index = atomic_inc_return(&rreq->subreq_counter);
netfs_get_request(rreq, netfs_rreq_trace_get_subreq);
netfs_stat(&netfs_n_rh_sreq);
return subreq;
}
void netfs_get_subrequest(struct netfs_io_subrequest *subreq,
enum netfs_sreq_ref_trace what)
{
int r;
__refcount_inc(&subreq->ref, &r);
trace_netfs_sreq_ref(subreq->rreq->debug_id, subreq->debug_index, r + 1,
what);
}
static void netfs_free_subrequest(struct netfs_io_subrequest *subreq,
bool was_async)
{
struct netfs_io_request *rreq = subreq->rreq;
trace_netfs_sreq(subreq, netfs_sreq_trace_free);
if (rreq->netfs_ops->free_subrequest)
rreq->netfs_ops->free_subrequest(subreq);
mempool_free(subreq, rreq->netfs_ops->subrequest_pool ?: &netfs_subrequest_pool);
netfs_stat_d(&netfs_n_rh_sreq);
netfs_put_request(rreq, was_async, netfs_rreq_trace_put_subreq);
}
void netfs_put_subrequest(struct netfs_io_subrequest *subreq, bool was_async,
enum netfs_sreq_ref_trace what)
{
unsigned int debug_index = subreq->debug_index;
unsigned int debug_id = subreq->rreq->debug_id;
bool dead;
int r;
dead = __refcount_dec_and_test(&subreq->ref, &r);
trace_netfs_sreq_ref(debug_id, debug_index, r - 1, what);
if (dead)
netfs_free_subrequest(subreq, was_async);
}