linux/fs/erofs/fscache.c

613 lines
15 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2022, Alibaba Cloud
* Copyright (C) 2022, Bytedance Inc. All rights reserved.
*/
#include <linux/fscache.h>
#include "internal.h"
static DEFINE_MUTEX(erofs_domain_list_lock);
static DEFINE_MUTEX(erofs_domain_cookies_lock);
static LIST_HEAD(erofs_domain_list);
static LIST_HEAD(erofs_domain_cookies_list);
static struct vfsmount *erofs_pseudo_mnt;
struct erofs_fscache_request {
struct erofs_fscache_request *primary;
struct netfs_cache_resources cache_resources;
struct address_space *mapping; /* The mapping being accessed */
loff_t start; /* Start position */
size_t len; /* Length of the request */
size_t submitted; /* Length of submitted */
short error; /* 0 or error that occurred */
refcount_t ref;
};
static struct erofs_fscache_request *erofs_fscache_req_alloc(struct address_space *mapping,
loff_t start, size_t len)
{
struct erofs_fscache_request *req;
req = kzalloc(sizeof(struct erofs_fscache_request), GFP_KERNEL);
if (!req)
return ERR_PTR(-ENOMEM);
req->mapping = mapping;
req->start = start;
req->len = len;
refcount_set(&req->ref, 1);
return req;
}
static struct erofs_fscache_request *erofs_fscache_req_chain(struct erofs_fscache_request *primary,
size_t len)
{
struct erofs_fscache_request *req;
/* use primary request for the first submission */
if (!primary->submitted) {
refcount_inc(&primary->ref);
return primary;
}
req = erofs_fscache_req_alloc(primary->mapping,
primary->start + primary->submitted, len);
if (!IS_ERR(req)) {
req->primary = primary;
refcount_inc(&primary->ref);
}
return req;
}
static void erofs_fscache_req_complete(struct erofs_fscache_request *req)
{
struct folio *folio;
bool failed = req->error;
pgoff_t start_page = req->start / PAGE_SIZE;
pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1;
XA_STATE(xas, &req->mapping->i_pages, start_page);
rcu_read_lock();
xas_for_each(&xas, folio, last_page) {
if (xas_retry(&xas, folio))
continue;
if (!failed)
folio_mark_uptodate(folio);
folio_unlock(folio);
}
rcu_read_unlock();
}
static void erofs_fscache_req_put(struct erofs_fscache_request *req)
{
if (refcount_dec_and_test(&req->ref)) {
if (req->cache_resources.ops)
req->cache_resources.ops->end_operation(&req->cache_resources);
if (!req->primary)
erofs_fscache_req_complete(req);
else
erofs_fscache_req_put(req->primary);
kfree(req);
}
}
static void erofs_fscache_subreq_complete(void *priv,
ssize_t transferred_or_error, bool was_async)
{
struct erofs_fscache_request *req = priv;
if (IS_ERR_VALUE(transferred_or_error)) {
if (req->primary)
req->primary->error = transferred_or_error;
else
req->error = transferred_or_error;
}
erofs_fscache_req_put(req);
}
/*
* Read data from fscache (cookie, pstart, len), and fill the read data into
* page cache described by (req->mapping, lstart, len). @pstart describeis the
* start physical address in the cache file.
*/
static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie,
struct erofs_fscache_request *req, loff_t pstart, size_t len)
{
enum netfs_io_source source;
struct super_block *sb = req->mapping->host->i_sb;
struct netfs_cache_resources *cres = &req->cache_resources;
struct iov_iter iter;
loff_t lstart = req->start + req->submitted;
size_t done = 0;
int ret;
DBG_BUGON(len > req->len - req->submitted);
ret = fscache_begin_read_operation(cres, cookie);
if (ret)
return ret;
while (done < len) {
loff_t sstart = pstart + done;
size_t slen = len - done;
unsigned long flags = 1 << NETFS_SREQ_ONDEMAND;
source = cres->ops->prepare_ondemand_read(cres,
sstart, &slen, LLONG_MAX, &flags, 0);
if (WARN_ON(slen == 0))
source = NETFS_INVALID_READ;
if (source != NETFS_READ_FROM_CACHE) {
erofs_err(sb, "failed to fscache prepare_read (source %d)", source);
return -EIO;
}
refcount_inc(&req->ref);
Changes since the last update: - Enable large folios for iomap/fscache mode; - Avoid sysfs warning due to mounting twice with the same fsid and domain_id in fscache mode; - Refine fscache interface among erofs, fscache, and cachefiles; - Use kmap_local_page() only for metabuf; - Fixes around crafted images found by syzbot; - Minor cleanups and documentation updates. -----BEGIN PGP SIGNATURE----- iIcEABYIAC8WIQThPAmQN9sSA0DVxtI5NzHcH7XmBAUCY5S3khEceGlhbmdAa2Vy bmVsLm9yZwAKCRA5NzHcH7XmBLr3AQDA5xpztSsxfe0Gp+bwf12ySuntimJxXmAj 83EHCfSC+AEAu4fcWkIF38MBBVJvFVjFaXCZKmFossbI5Rp8TuqPpgk= =HDsJ -----END PGP SIGNATURE----- Merge tag 'erofs-for-6.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs Pull erofs updates from Gao Xiang: "In this cycle, large folios are now enabled in the iomap/fscache mode for uncompressed files first. In order to do that, we've also cleaned up better interfaces between erofs and fscache, which are acked by fscache/netfs folks and included in this pull request. Other than that, there are random fixes around erofs over fscache and crafted images by syzbot, minor cleanups and documentation updates. Summary: - Enable large folios for iomap/fscache mode - Avoid sysfs warning due to mounting twice with the same fsid and domain_id in fscache mode - Refine fscache interface among erofs, fscache, and cachefiles - Use kmap_local_page() only for metabuf - Fixes around crafted images found by syzbot - Minor cleanups and documentation updates" * tag 'erofs-for-6.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs: erofs: validate the extent length for uncompressed pclusters erofs: fix missing unmap if z_erofs_get_extent_compressedlen() fails erofs: Fix pcluster memleak when its block address is zero erofs: use kmap_local_page() only for erofs_bread() erofs: enable large folios for fscache mode erofs: support large folios for fscache mode erofs: switch to prepare_ondemand_read() in fscache mode fscache,cachefiles: add prepare_ondemand_read() callback erofs: clean up cached I/O strategies erofs: update documentation erofs: check the uniqueness of fsid in shared domain in advance erofs: enable large folios for iomap mode
2022-12-13 12:14:04 +08:00
iov_iter_xarray(&iter, ITER_DEST, &req->mapping->i_pages,
lstart + done, slen);
ret = fscache_read(cres, sstart, &iter, NETFS_READ_HOLE_FAIL,
erofs_fscache_subreq_complete, req);
if (ret == -EIOCBQUEUED)
ret = 0;
if (ret) {
erofs_err(sb, "failed to fscache_read (ret %d)", ret);
return ret;
}
done += slen;
}
DBG_BUGON(done != len);
return 0;
}
Page cache changes for 5.19 - Appoint myself page cache maintainer - Fix how scsicam uses the page cache - Use the memalloc_nofs_save() API to replace AOP_FLAG_NOFS - Remove the AOP flags entirely - Remove pagecache_write_begin() and pagecache_write_end() - Documentation updates - Convert several address_space operations to use folios: - is_dirty_writeback - readpage becomes read_folio - releasepage becomes release_folio - freepage becomes free_folio - Change filler_t to require a struct file pointer be the first argument like ->read_folio -----BEGIN PGP SIGNATURE----- iQEzBAABCgAdFiEEejHryeLBw/spnjHrDpNsjXcpgj4FAmKNMDUACgkQDpNsjXcp gj4/mwf/bpHhXH4ZoNIvtUpTF6rZbqeffmc0VrbxCZDZ6igRnRPglxZ9H9v6L53O 7B0FBQIfxgNKHZpdqGdOkv8cjg/GMe/HJUbEy5wOakYPo4L9fZpHbDZ9HM2Eankj xBqLIBgBJ7doKr+Y62DAN19TVD8jfRfVtli5mqXJoNKf65J7BkxljoTH1L3EXD9d nhLAgyQjR67JQrT/39KMW+17GqLhGefLQ4YnAMONtB6TVwX/lZmigKpzVaCi4r26 bnk5vaR/3PdjtNxIoYvxdc71y2Eg05n2jEq9Wcy1AaDv/5vbyZUlZ2aBSaIVbtKX WfrhN9O3L0bU5qS7p9PoyfLc9wpq8A== =djLv -----END PGP SIGNATURE----- Merge tag 'folio-5.19' of git://git.infradead.org/users/willy/pagecache Pull page cache updates from Matthew Wilcox: - Appoint myself page cache maintainer - Fix how scsicam uses the page cache - Use the memalloc_nofs_save() API to replace AOP_FLAG_NOFS - Remove the AOP flags entirely - Remove pagecache_write_begin() and pagecache_write_end() - Documentation updates - Convert several address_space operations to use folios: - is_dirty_writeback - readpage becomes read_folio - releasepage becomes release_folio - freepage becomes free_folio - Change filler_t to require a struct file pointer be the first argument like ->read_folio * tag 'folio-5.19' of git://git.infradead.org/users/willy/pagecache: (107 commits) nilfs2: Fix some kernel-doc comments Appoint myself page cache maintainer fs: Remove aops->freepage secretmem: Convert to free_folio nfs: Convert to free_folio orangefs: Convert to free_folio fs: Add free_folio address space operation fs: Convert drop_buffers() to use a folio fs: Change try_to_free_buffers() to take a folio jbd2: Convert release_buffer_page() to use a folio jbd2: Convert jbd2_journal_try_to_free_buffers to take a folio reiserfs: Convert release_buffer_page() to use a folio fs: Remove last vestiges of releasepage ubifs: Convert to release_folio reiserfs: Convert to release_folio orangefs: Convert to release_folio ocfs2: Convert to release_folio nilfs2: Remove comment about releasepage nfs: Convert to release_folio jfs: Convert to release_folio ...
2022-05-25 10:55:07 +08:00
static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
{
int ret;
struct erofs_fscache *ctx = folio_mapping(folio)->host->i_private;
struct erofs_fscache_request *req;
req = erofs_fscache_req_alloc(folio_mapping(folio),
folio_pos(folio), folio_size(folio));
if (IS_ERR(req)) {
folio_unlock(folio);
return PTR_ERR(req);
}
ret = erofs_fscache_read_folios_async(ctx->cookie, req,
folio_pos(folio), folio_size(folio));
if (ret)
req->error = ret;
erofs_fscache_req_put(req);
return ret;
}
static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary)
{
struct address_space *mapping = primary->mapping;
struct inode *inode = mapping->host;
struct super_block *sb = inode->i_sb;
struct erofs_fscache_request *req;
struct erofs_map_blocks map;
struct erofs_map_dev mdev;
struct iov_iter iter;
loff_t pos = primary->start + primary->submitted;
size_t count;
int ret;
map.m_la = pos;
ret = erofs_map_blocks(inode, &map);
if (ret)
return ret;
if (map.m_flags & EROFS_MAP_META) {
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
erofs_blk_t blknr;
size_t offset, size;
void *src;
/* For tail packing layout, the offset may be non-zero. */
erofs: avoid hardcoded blocksize for subpage block support As the first step of converting hardcoded blocksize to that specified in on-disk superblock, convert all call sites of hardcoded blocksize to sb->s_blocksize except for: 1) use sbi->blkszbits instead of sb->s_blocksize in erofs_superblock_csum_verify() since sb->s_blocksize has not been updated with the on-disk blocksize yet when the function is called. 2) use inode->i_blkbits instead of sb->s_blocksize in erofs_bread(), since the inode operated on may be an anonymous inode in fscache mode. Currently the anonymous inode is allocated from an anonymous mount maintained in erofs, while in the near future we may allocate anonymous inodes from a generic API directly and thus have no access to the anonymous inode's i_sb. Thus we keep the block size in i_blkbits for anonymous inodes in fscache mode. Be noted that this patch only gets rid of the hardcoded blocksize, in preparation for actually setting the on-disk block size in the following patch. The hard limit of constraining the block size to PAGE_SIZE still exists until the next patch. Signed-off-by: Jingbo Xu <jefflexu@linux.alibaba.com> Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com> Reviewed-by: Yue Hu <huyue2@coolpad.com> Reviewed-by: Chao Yu <chao@kernel.org> Link: https://lore.kernel.org/r/20230313135309.75269-2-jefflexu@linux.alibaba.com [ Gao Xiang: fold a patch to fix incorrect truncated offsets. ] Link: https://lore.kernel.org/r/20230413035734.15457-1-zhujia.zj@bytedance.com Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2023-03-13 21:53:08 +08:00
offset = erofs_blkoff(sb, map.m_pa);
blknr = erofs_blknr(sb, map.m_pa);
size = map.m_llen;
src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP);
if (IS_ERR(src))
return PTR_ERR(src);
iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE);
if (copy_to_iter(src + offset, size, &iter) != size) {
erofs_put_metabuf(&buf);
return -EFAULT;
}
iov_iter_zero(PAGE_SIZE - size, &iter);
erofs_put_metabuf(&buf);
primary->submitted += PAGE_SIZE;
return 0;
}
count = primary->len - primary->submitted;
if (!(map.m_flags & EROFS_MAP_MAPPED)) {
iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
iov_iter_zero(count, &iter);
primary->submitted += count;
return 0;
}
count = min_t(size_t, map.m_llen - (pos - map.m_la), count);
DBG_BUGON(!count || count % PAGE_SIZE);
mdev = (struct erofs_map_dev) {
.m_deviceid = map.m_deviceid,
.m_pa = map.m_pa,
};
ret = erofs_map_dev(sb, &mdev);
if (ret)
return ret;
req = erofs_fscache_req_chain(primary, count);
if (IS_ERR(req))
return PTR_ERR(req);
ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
req, mdev.m_pa + (pos - map.m_la), count);
erofs_fscache_req_put(req);
primary->submitted += count;
return ret;
}
static int erofs_fscache_data_read(struct erofs_fscache_request *req)
{
int ret;
do {
ret = erofs_fscache_data_read_slice(req);
if (ret)
req->error = ret;
} while (!ret && req->submitted < req->len);
return ret;
}
static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
{
struct erofs_fscache_request *req;
int ret;
req = erofs_fscache_req_alloc(folio_mapping(folio),
folio_pos(folio), folio_size(folio));
if (IS_ERR(req)) {
folio_unlock(folio);
return PTR_ERR(req);
}
ret = erofs_fscache_data_read(req);
erofs_fscache_req_put(req);
return ret;
}
static void erofs_fscache_readahead(struct readahead_control *rac)
{
struct erofs_fscache_request *req;
if (!readahead_count(rac))
return;
req = erofs_fscache_req_alloc(rac->mapping,
readahead_pos(rac), readahead_length(rac));
if (IS_ERR(req))
return;
/* The request completion will drop refs on the folios. */
while (readahead_folio(rac))
;
erofs_fscache_data_read(req);
erofs_fscache_req_put(req);
}
static const struct address_space_operations erofs_fscache_meta_aops = {
Page cache changes for 5.19 - Appoint myself page cache maintainer - Fix how scsicam uses the page cache - Use the memalloc_nofs_save() API to replace AOP_FLAG_NOFS - Remove the AOP flags entirely - Remove pagecache_write_begin() and pagecache_write_end() - Documentation updates - Convert several address_space operations to use folios: - is_dirty_writeback - readpage becomes read_folio - releasepage becomes release_folio - freepage becomes free_folio - Change filler_t to require a struct file pointer be the first argument like ->read_folio -----BEGIN PGP SIGNATURE----- iQEzBAABCgAdFiEEejHryeLBw/spnjHrDpNsjXcpgj4FAmKNMDUACgkQDpNsjXcp gj4/mwf/bpHhXH4ZoNIvtUpTF6rZbqeffmc0VrbxCZDZ6igRnRPglxZ9H9v6L53O 7B0FBQIfxgNKHZpdqGdOkv8cjg/GMe/HJUbEy5wOakYPo4L9fZpHbDZ9HM2Eankj xBqLIBgBJ7doKr+Y62DAN19TVD8jfRfVtli5mqXJoNKf65J7BkxljoTH1L3EXD9d nhLAgyQjR67JQrT/39KMW+17GqLhGefLQ4YnAMONtB6TVwX/lZmigKpzVaCi4r26 bnk5vaR/3PdjtNxIoYvxdc71y2Eg05n2jEq9Wcy1AaDv/5vbyZUlZ2aBSaIVbtKX WfrhN9O3L0bU5qS7p9PoyfLc9wpq8A== =djLv -----END PGP SIGNATURE----- Merge tag 'folio-5.19' of git://git.infradead.org/users/willy/pagecache Pull page cache updates from Matthew Wilcox: - Appoint myself page cache maintainer - Fix how scsicam uses the page cache - Use the memalloc_nofs_save() API to replace AOP_FLAG_NOFS - Remove the AOP flags entirely - Remove pagecache_write_begin() and pagecache_write_end() - Documentation updates - Convert several address_space operations to use folios: - is_dirty_writeback - readpage becomes read_folio - releasepage becomes release_folio - freepage becomes free_folio - Change filler_t to require a struct file pointer be the first argument like ->read_folio * tag 'folio-5.19' of git://git.infradead.org/users/willy/pagecache: (107 commits) nilfs2: Fix some kernel-doc comments Appoint myself page cache maintainer fs: Remove aops->freepage secretmem: Convert to free_folio nfs: Convert to free_folio orangefs: Convert to free_folio fs: Add free_folio address space operation fs: Convert drop_buffers() to use a folio fs: Change try_to_free_buffers() to take a folio jbd2: Convert release_buffer_page() to use a folio jbd2: Convert jbd2_journal_try_to_free_buffers to take a folio reiserfs: Convert release_buffer_page() to use a folio fs: Remove last vestiges of releasepage ubifs: Convert to release_folio reiserfs: Convert to release_folio orangefs: Convert to release_folio ocfs2: Convert to release_folio nilfs2: Remove comment about releasepage nfs: Convert to release_folio jfs: Convert to release_folio ...
2022-05-25 10:55:07 +08:00
.read_folio = erofs_fscache_meta_read_folio,
};
const struct address_space_operations erofs_fscache_access_aops = {
Page cache changes for 5.19 - Appoint myself page cache maintainer - Fix how scsicam uses the page cache - Use the memalloc_nofs_save() API to replace AOP_FLAG_NOFS - Remove the AOP flags entirely - Remove pagecache_write_begin() and pagecache_write_end() - Documentation updates - Convert several address_space operations to use folios: - is_dirty_writeback - readpage becomes read_folio - releasepage becomes release_folio - freepage becomes free_folio - Change filler_t to require a struct file pointer be the first argument like ->read_folio -----BEGIN PGP SIGNATURE----- iQEzBAABCgAdFiEEejHryeLBw/spnjHrDpNsjXcpgj4FAmKNMDUACgkQDpNsjXcp gj4/mwf/bpHhXH4ZoNIvtUpTF6rZbqeffmc0VrbxCZDZ6igRnRPglxZ9H9v6L53O 7B0FBQIfxgNKHZpdqGdOkv8cjg/GMe/HJUbEy5wOakYPo4L9fZpHbDZ9HM2Eankj xBqLIBgBJ7doKr+Y62DAN19TVD8jfRfVtli5mqXJoNKf65J7BkxljoTH1L3EXD9d nhLAgyQjR67JQrT/39KMW+17GqLhGefLQ4YnAMONtB6TVwX/lZmigKpzVaCi4r26 bnk5vaR/3PdjtNxIoYvxdc71y2Eg05n2jEq9Wcy1AaDv/5vbyZUlZ2aBSaIVbtKX WfrhN9O3L0bU5qS7p9PoyfLc9wpq8A== =djLv -----END PGP SIGNATURE----- Merge tag 'folio-5.19' of git://git.infradead.org/users/willy/pagecache Pull page cache updates from Matthew Wilcox: - Appoint myself page cache maintainer - Fix how scsicam uses the page cache - Use the memalloc_nofs_save() API to replace AOP_FLAG_NOFS - Remove the AOP flags entirely - Remove pagecache_write_begin() and pagecache_write_end() - Documentation updates - Convert several address_space operations to use folios: - is_dirty_writeback - readpage becomes read_folio - releasepage becomes release_folio - freepage becomes free_folio - Change filler_t to require a struct file pointer be the first argument like ->read_folio * tag 'folio-5.19' of git://git.infradead.org/users/willy/pagecache: (107 commits) nilfs2: Fix some kernel-doc comments Appoint myself page cache maintainer fs: Remove aops->freepage secretmem: Convert to free_folio nfs: Convert to free_folio orangefs: Convert to free_folio fs: Add free_folio address space operation fs: Convert drop_buffers() to use a folio fs: Change try_to_free_buffers() to take a folio jbd2: Convert release_buffer_page() to use a folio jbd2: Convert jbd2_journal_try_to_free_buffers to take a folio reiserfs: Convert release_buffer_page() to use a folio fs: Remove last vestiges of releasepage ubifs: Convert to release_folio reiserfs: Convert to release_folio orangefs: Convert to release_folio ocfs2: Convert to release_folio nilfs2: Remove comment about releasepage nfs: Convert to release_folio jfs: Convert to release_folio ...
2022-05-25 10:55:07 +08:00
.read_folio = erofs_fscache_read_folio,
.readahead = erofs_fscache_readahead,
};
static void erofs_fscache_domain_put(struct erofs_domain *domain)
{
mutex_lock(&erofs_domain_list_lock);
if (refcount_dec_and_test(&domain->ref)) {
list_del(&domain->list);
if (list_empty(&erofs_domain_list)) {
kern_unmount(erofs_pseudo_mnt);
erofs_pseudo_mnt = NULL;
}
fscache_relinquish_volume(domain->volume, NULL, false);
mutex_unlock(&erofs_domain_list_lock);
kfree(domain->domain_id);
kfree(domain);
return;
}
mutex_unlock(&erofs_domain_list_lock);
}
static int erofs_fscache_register_volume(struct super_block *sb)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
char *domain_id = sbi->domain_id;
struct fscache_volume *volume;
char *name;
int ret = 0;
name = kasprintf(GFP_KERNEL, "erofs,%s",
domain_id ? domain_id : sbi->fsid);
if (!name)
return -ENOMEM;
volume = fscache_acquire_volume(name, NULL, NULL, 0);
if (IS_ERR_OR_NULL(volume)) {
erofs_err(sb, "failed to register volume for %s", name);
ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP;
volume = NULL;
}
sbi->volume = volume;
kfree(name);
return ret;
}
static int erofs_fscache_init_domain(struct super_block *sb)
{
int err;
struct erofs_domain *domain;
struct erofs_sb_info *sbi = EROFS_SB(sb);
domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL);
if (!domain)
return -ENOMEM;
domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL);
if (!domain->domain_id) {
kfree(domain);
return -ENOMEM;
}
err = erofs_fscache_register_volume(sb);
if (err)
goto out;
if (!erofs_pseudo_mnt) {
erofs_pseudo_mnt = kern_mount(&erofs_fs_type);
if (IS_ERR(erofs_pseudo_mnt)) {
err = PTR_ERR(erofs_pseudo_mnt);
goto out;
}
}
domain->volume = sbi->volume;
refcount_set(&domain->ref, 1);
list_add(&domain->list, &erofs_domain_list);
sbi->domain = domain;
return 0;
out:
kfree(domain->domain_id);
kfree(domain);
return err;
}
static int erofs_fscache_register_domain(struct super_block *sb)
{
int err;
struct erofs_domain *domain;
struct erofs_sb_info *sbi = EROFS_SB(sb);
mutex_lock(&erofs_domain_list_lock);
list_for_each_entry(domain, &erofs_domain_list, list) {
if (!strcmp(domain->domain_id, sbi->domain_id)) {
sbi->domain = domain;
sbi->volume = domain->volume;
refcount_inc(&domain->ref);
mutex_unlock(&erofs_domain_list_lock);
return 0;
}
}
err = erofs_fscache_init_domain(sb);
mutex_unlock(&erofs_domain_list_lock);
return err;
}
static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb,
char *name, unsigned int flags)
{
struct fscache_volume *volume = EROFS_SB(sb)->volume;
struct erofs_fscache *ctx;
struct fscache_cookie *cookie;
struct super_block *isb;
struct inode *inode;
int ret;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&ctx->node);
refcount_set(&ctx->ref, 1);
cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE,
name, strlen(name), NULL, 0, 0);
if (!cookie) {
erofs_err(sb, "failed to get cookie for %s", name);
ret = -EINVAL;
goto err;
}
fscache_use_cookie(cookie, false);
/*
* Allocate anonymous inode in global pseudo mount for shareable blobs,
* so that they are accessible among erofs fs instances.
*/
isb = flags & EROFS_REG_COOKIE_SHARE ? erofs_pseudo_mnt->mnt_sb : sb;
inode = new_inode(isb);
if (!inode) {
erofs_err(sb, "failed to get anon inode for %s", name);
ret = -ENOMEM;
goto err_cookie;
}
inode->i_size = OFFSET_MAX;
inode->i_mapping->a_ops = &erofs_fscache_meta_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
erofs: avoid hardcoded blocksize for subpage block support As the first step of converting hardcoded blocksize to that specified in on-disk superblock, convert all call sites of hardcoded blocksize to sb->s_blocksize except for: 1) use sbi->blkszbits instead of sb->s_blocksize in erofs_superblock_csum_verify() since sb->s_blocksize has not been updated with the on-disk blocksize yet when the function is called. 2) use inode->i_blkbits instead of sb->s_blocksize in erofs_bread(), since the inode operated on may be an anonymous inode in fscache mode. Currently the anonymous inode is allocated from an anonymous mount maintained in erofs, while in the near future we may allocate anonymous inodes from a generic API directly and thus have no access to the anonymous inode's i_sb. Thus we keep the block size in i_blkbits for anonymous inodes in fscache mode. Be noted that this patch only gets rid of the hardcoded blocksize, in preparation for actually setting the on-disk block size in the following patch. The hard limit of constraining the block size to PAGE_SIZE still exists until the next patch. Signed-off-by: Jingbo Xu <jefflexu@linux.alibaba.com> Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com> Reviewed-by: Yue Hu <huyue2@coolpad.com> Reviewed-by: Chao Yu <chao@kernel.org> Link: https://lore.kernel.org/r/20230313135309.75269-2-jefflexu@linux.alibaba.com [ Gao Xiang: fold a patch to fix incorrect truncated offsets. ] Link: https://lore.kernel.org/r/20230413035734.15457-1-zhujia.zj@bytedance.com Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2023-03-13 21:53:08 +08:00
inode->i_blkbits = EROFS_SB(sb)->blkszbits;
inode->i_private = ctx;
ctx->cookie = cookie;
ctx->inode = inode;
return ctx;
err_cookie:
fscache_unuse_cookie(cookie, NULL, NULL);
fscache_relinquish_cookie(cookie, false);
err:
kfree(ctx);
return ERR_PTR(ret);
}
static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx)
{
fscache_unuse_cookie(ctx->cookie, NULL, NULL);
fscache_relinquish_cookie(ctx->cookie, false);
iput(ctx->inode);
kfree(ctx->name);
kfree(ctx);
}
static struct erofs_fscache *erofs_domain_init_cookie(struct super_block *sb,
char *name, unsigned int flags)
{
struct erofs_fscache *ctx;
struct erofs_domain *domain = EROFS_SB(sb)->domain;
erofs: check the uniqueness of fsid in shared domain in advance When shared domain is enabled, doing mount twice with the same fsid and domain_id will trigger sysfs warning as shown below: sysfs: cannot create duplicate filename '/fs/erofs/d0,meta.bin' CPU: 15 PID: 1051 Comm: mount Not tainted 6.1.0-rc6+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) Call Trace: <TASK> dump_stack_lvl+0x38/0x49 dump_stack+0x10/0x12 sysfs_warn_dup.cold+0x17/0x27 sysfs_create_dir_ns+0xb8/0xd0 kobject_add_internal+0xb1/0x240 kobject_init_and_add+0x71/0xa0 erofs_register_sysfs+0x89/0x110 erofs_fc_fill_super+0x98c/0xaf0 vfs_get_super+0x7d/0x100 get_tree_nodev+0x16/0x20 erofs_fc_get_tree+0x20/0x30 vfs_get_tree+0x24/0xb0 path_mount+0x2fa/0xa90 do_mount+0x7c/0xa0 __x64_sys_mount+0x8b/0xe0 do_syscall_64+0x30/0x60 entry_SYSCALL_64_after_hwframe+0x46/0xb0 The reason is erofs_fscache_register_cookie() doesn't guarantee the primary data blob (aka fsid) is unique in the shared domain and erofs_register_sysfs() invoked by the second mount will fail due to the duplicated fsid in the shared domain and report warning. It would be better to check the uniqueness of fsid before doing erofs_register_sysfs(), so adding a new flags parameter for erofs_fscache_register_cookie() and doing the uniqueness check if EROFS_REG_COOKIE_NEED_NOEXIST is enabled. After the patch, the error in dmesg for the duplicated mount would be: erofs: ...: erofs_domain_register_cookie: XX already exists in domain YY Reviewed-by: Jia Zhu <zhujia.zj@bytedance.com> Reviewed-by: Jingbo Xu <jefflexu@linux.alibaba.com> Reviewed-by: Chao Yu <chao@kernel.org> Signed-off-by: Hou Tao <houtao1@huawei.com> Link: https://lore.kernel.org/r/20221125110822.3812942-1-houtao@huaweicloud.com Fixes: 7d41963759fe ("erofs: Support sharing cookies in the same domain") Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2022-11-25 19:08:22 +08:00
ctx = erofs_fscache_acquire_cookie(sb, name, flags);
if (IS_ERR(ctx))
return ctx;
ctx->name = kstrdup(name, GFP_KERNEL);
if (!ctx->name) {
erofs_fscache_relinquish_cookie(ctx);
return ERR_PTR(-ENOMEM);
}
refcount_inc(&domain->ref);
ctx->domain = domain;
list_add(&ctx->node, &erofs_domain_cookies_list);
return ctx;
}
static struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb,
char *name, unsigned int flags)
{
struct erofs_fscache *ctx;
struct erofs_domain *domain = EROFS_SB(sb)->domain;
flags |= EROFS_REG_COOKIE_SHARE;
mutex_lock(&erofs_domain_cookies_lock);
list_for_each_entry(ctx, &erofs_domain_cookies_list, node) {
if (ctx->domain != domain || strcmp(ctx->name, name))
continue;
erofs: check the uniqueness of fsid in shared domain in advance When shared domain is enabled, doing mount twice with the same fsid and domain_id will trigger sysfs warning as shown below: sysfs: cannot create duplicate filename '/fs/erofs/d0,meta.bin' CPU: 15 PID: 1051 Comm: mount Not tainted 6.1.0-rc6+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) Call Trace: <TASK> dump_stack_lvl+0x38/0x49 dump_stack+0x10/0x12 sysfs_warn_dup.cold+0x17/0x27 sysfs_create_dir_ns+0xb8/0xd0 kobject_add_internal+0xb1/0x240 kobject_init_and_add+0x71/0xa0 erofs_register_sysfs+0x89/0x110 erofs_fc_fill_super+0x98c/0xaf0 vfs_get_super+0x7d/0x100 get_tree_nodev+0x16/0x20 erofs_fc_get_tree+0x20/0x30 vfs_get_tree+0x24/0xb0 path_mount+0x2fa/0xa90 do_mount+0x7c/0xa0 __x64_sys_mount+0x8b/0xe0 do_syscall_64+0x30/0x60 entry_SYSCALL_64_after_hwframe+0x46/0xb0 The reason is erofs_fscache_register_cookie() doesn't guarantee the primary data blob (aka fsid) is unique in the shared domain and erofs_register_sysfs() invoked by the second mount will fail due to the duplicated fsid in the shared domain and report warning. It would be better to check the uniqueness of fsid before doing erofs_register_sysfs(), so adding a new flags parameter for erofs_fscache_register_cookie() and doing the uniqueness check if EROFS_REG_COOKIE_NEED_NOEXIST is enabled. After the patch, the error in dmesg for the duplicated mount would be: erofs: ...: erofs_domain_register_cookie: XX already exists in domain YY Reviewed-by: Jia Zhu <zhujia.zj@bytedance.com> Reviewed-by: Jingbo Xu <jefflexu@linux.alibaba.com> Reviewed-by: Chao Yu <chao@kernel.org> Signed-off-by: Hou Tao <houtao1@huawei.com> Link: https://lore.kernel.org/r/20221125110822.3812942-1-houtao@huaweicloud.com Fixes: 7d41963759fe ("erofs: Support sharing cookies in the same domain") Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2022-11-25 19:08:22 +08:00
if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) {
refcount_inc(&ctx->ref);
erofs: check the uniqueness of fsid in shared domain in advance When shared domain is enabled, doing mount twice with the same fsid and domain_id will trigger sysfs warning as shown below: sysfs: cannot create duplicate filename '/fs/erofs/d0,meta.bin' CPU: 15 PID: 1051 Comm: mount Not tainted 6.1.0-rc6+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) Call Trace: <TASK> dump_stack_lvl+0x38/0x49 dump_stack+0x10/0x12 sysfs_warn_dup.cold+0x17/0x27 sysfs_create_dir_ns+0xb8/0xd0 kobject_add_internal+0xb1/0x240 kobject_init_and_add+0x71/0xa0 erofs_register_sysfs+0x89/0x110 erofs_fc_fill_super+0x98c/0xaf0 vfs_get_super+0x7d/0x100 get_tree_nodev+0x16/0x20 erofs_fc_get_tree+0x20/0x30 vfs_get_tree+0x24/0xb0 path_mount+0x2fa/0xa90 do_mount+0x7c/0xa0 __x64_sys_mount+0x8b/0xe0 do_syscall_64+0x30/0x60 entry_SYSCALL_64_after_hwframe+0x46/0xb0 The reason is erofs_fscache_register_cookie() doesn't guarantee the primary data blob (aka fsid) is unique in the shared domain and erofs_register_sysfs() invoked by the second mount will fail due to the duplicated fsid in the shared domain and report warning. It would be better to check the uniqueness of fsid before doing erofs_register_sysfs(), so adding a new flags parameter for erofs_fscache_register_cookie() and doing the uniqueness check if EROFS_REG_COOKIE_NEED_NOEXIST is enabled. After the patch, the error in dmesg for the duplicated mount would be: erofs: ...: erofs_domain_register_cookie: XX already exists in domain YY Reviewed-by: Jia Zhu <zhujia.zj@bytedance.com> Reviewed-by: Jingbo Xu <jefflexu@linux.alibaba.com> Reviewed-by: Chao Yu <chao@kernel.org> Signed-off-by: Hou Tao <houtao1@huawei.com> Link: https://lore.kernel.org/r/20221125110822.3812942-1-houtao@huaweicloud.com Fixes: 7d41963759fe ("erofs: Support sharing cookies in the same domain") Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2022-11-25 19:08:22 +08:00
} else {
erofs_err(sb, "%s already exists in domain %s", name,
domain->domain_id);
ctx = ERR_PTR(-EEXIST);
}
mutex_unlock(&erofs_domain_cookies_lock);
return ctx;
}
ctx = erofs_domain_init_cookie(sb, name, flags);
mutex_unlock(&erofs_domain_cookies_lock);
return ctx;
}
struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
erofs: check the uniqueness of fsid in shared domain in advance When shared domain is enabled, doing mount twice with the same fsid and domain_id will trigger sysfs warning as shown below: sysfs: cannot create duplicate filename '/fs/erofs/d0,meta.bin' CPU: 15 PID: 1051 Comm: mount Not tainted 6.1.0-rc6+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) Call Trace: <TASK> dump_stack_lvl+0x38/0x49 dump_stack+0x10/0x12 sysfs_warn_dup.cold+0x17/0x27 sysfs_create_dir_ns+0xb8/0xd0 kobject_add_internal+0xb1/0x240 kobject_init_and_add+0x71/0xa0 erofs_register_sysfs+0x89/0x110 erofs_fc_fill_super+0x98c/0xaf0 vfs_get_super+0x7d/0x100 get_tree_nodev+0x16/0x20 erofs_fc_get_tree+0x20/0x30 vfs_get_tree+0x24/0xb0 path_mount+0x2fa/0xa90 do_mount+0x7c/0xa0 __x64_sys_mount+0x8b/0xe0 do_syscall_64+0x30/0x60 entry_SYSCALL_64_after_hwframe+0x46/0xb0 The reason is erofs_fscache_register_cookie() doesn't guarantee the primary data blob (aka fsid) is unique in the shared domain and erofs_register_sysfs() invoked by the second mount will fail due to the duplicated fsid in the shared domain and report warning. It would be better to check the uniqueness of fsid before doing erofs_register_sysfs(), so adding a new flags parameter for erofs_fscache_register_cookie() and doing the uniqueness check if EROFS_REG_COOKIE_NEED_NOEXIST is enabled. After the patch, the error in dmesg for the duplicated mount would be: erofs: ...: erofs_domain_register_cookie: XX already exists in domain YY Reviewed-by: Jia Zhu <zhujia.zj@bytedance.com> Reviewed-by: Jingbo Xu <jefflexu@linux.alibaba.com> Reviewed-by: Chao Yu <chao@kernel.org> Signed-off-by: Hou Tao <houtao1@huawei.com> Link: https://lore.kernel.org/r/20221125110822.3812942-1-houtao@huaweicloud.com Fixes: 7d41963759fe ("erofs: Support sharing cookies in the same domain") Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2022-11-25 19:08:22 +08:00
char *name,
unsigned int flags)
{
if (EROFS_SB(sb)->domain_id)
erofs: check the uniqueness of fsid in shared domain in advance When shared domain is enabled, doing mount twice with the same fsid and domain_id will trigger sysfs warning as shown below: sysfs: cannot create duplicate filename '/fs/erofs/d0,meta.bin' CPU: 15 PID: 1051 Comm: mount Not tainted 6.1.0-rc6+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) Call Trace: <TASK> dump_stack_lvl+0x38/0x49 dump_stack+0x10/0x12 sysfs_warn_dup.cold+0x17/0x27 sysfs_create_dir_ns+0xb8/0xd0 kobject_add_internal+0xb1/0x240 kobject_init_and_add+0x71/0xa0 erofs_register_sysfs+0x89/0x110 erofs_fc_fill_super+0x98c/0xaf0 vfs_get_super+0x7d/0x100 get_tree_nodev+0x16/0x20 erofs_fc_get_tree+0x20/0x30 vfs_get_tree+0x24/0xb0 path_mount+0x2fa/0xa90 do_mount+0x7c/0xa0 __x64_sys_mount+0x8b/0xe0 do_syscall_64+0x30/0x60 entry_SYSCALL_64_after_hwframe+0x46/0xb0 The reason is erofs_fscache_register_cookie() doesn't guarantee the primary data blob (aka fsid) is unique in the shared domain and erofs_register_sysfs() invoked by the second mount will fail due to the duplicated fsid in the shared domain and report warning. It would be better to check the uniqueness of fsid before doing erofs_register_sysfs(), so adding a new flags parameter for erofs_fscache_register_cookie() and doing the uniqueness check if EROFS_REG_COOKIE_NEED_NOEXIST is enabled. After the patch, the error in dmesg for the duplicated mount would be: erofs: ...: erofs_domain_register_cookie: XX already exists in domain YY Reviewed-by: Jia Zhu <zhujia.zj@bytedance.com> Reviewed-by: Jingbo Xu <jefflexu@linux.alibaba.com> Reviewed-by: Chao Yu <chao@kernel.org> Signed-off-by: Hou Tao <houtao1@huawei.com> Link: https://lore.kernel.org/r/20221125110822.3812942-1-houtao@huaweicloud.com Fixes: 7d41963759fe ("erofs: Support sharing cookies in the same domain") Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2022-11-25 19:08:22 +08:00
return erofs_domain_register_cookie(sb, name, flags);
return erofs_fscache_acquire_cookie(sb, name, flags);
}
void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx)
{
struct erofs_domain *domain = NULL;
if (!ctx)
return;
if (!ctx->domain)
return erofs_fscache_relinquish_cookie(ctx);
mutex_lock(&erofs_domain_cookies_lock);
if (refcount_dec_and_test(&ctx->ref)) {
domain = ctx->domain;
list_del(&ctx->node);
erofs_fscache_relinquish_cookie(ctx);
}
mutex_unlock(&erofs_domain_cookies_lock);
if (domain)
erofs_fscache_domain_put(domain);
}
int erofs_fscache_register_fs(struct super_block *sb)
{
int ret;
struct erofs_sb_info *sbi = EROFS_SB(sb);
struct erofs_fscache *fscache;
unsigned int flags = 0;
if (sbi->domain_id)
ret = erofs_fscache_register_domain(sb);
else
ret = erofs_fscache_register_volume(sb);
if (ret)
return ret;
erofs: check the uniqueness of fsid in shared domain in advance When shared domain is enabled, doing mount twice with the same fsid and domain_id will trigger sysfs warning as shown below: sysfs: cannot create duplicate filename '/fs/erofs/d0,meta.bin' CPU: 15 PID: 1051 Comm: mount Not tainted 6.1.0-rc6+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) Call Trace: <TASK> dump_stack_lvl+0x38/0x49 dump_stack+0x10/0x12 sysfs_warn_dup.cold+0x17/0x27 sysfs_create_dir_ns+0xb8/0xd0 kobject_add_internal+0xb1/0x240 kobject_init_and_add+0x71/0xa0 erofs_register_sysfs+0x89/0x110 erofs_fc_fill_super+0x98c/0xaf0 vfs_get_super+0x7d/0x100 get_tree_nodev+0x16/0x20 erofs_fc_get_tree+0x20/0x30 vfs_get_tree+0x24/0xb0 path_mount+0x2fa/0xa90 do_mount+0x7c/0xa0 __x64_sys_mount+0x8b/0xe0 do_syscall_64+0x30/0x60 entry_SYSCALL_64_after_hwframe+0x46/0xb0 The reason is erofs_fscache_register_cookie() doesn't guarantee the primary data blob (aka fsid) is unique in the shared domain and erofs_register_sysfs() invoked by the second mount will fail due to the duplicated fsid in the shared domain and report warning. It would be better to check the uniqueness of fsid before doing erofs_register_sysfs(), so adding a new flags parameter for erofs_fscache_register_cookie() and doing the uniqueness check if EROFS_REG_COOKIE_NEED_NOEXIST is enabled. After the patch, the error in dmesg for the duplicated mount would be: erofs: ...: erofs_domain_register_cookie: XX already exists in domain YY Reviewed-by: Jia Zhu <zhujia.zj@bytedance.com> Reviewed-by: Jingbo Xu <jefflexu@linux.alibaba.com> Reviewed-by: Chao Yu <chao@kernel.org> Signed-off-by: Hou Tao <houtao1@huawei.com> Link: https://lore.kernel.org/r/20221125110822.3812942-1-houtao@huaweicloud.com Fixes: 7d41963759fe ("erofs: Support sharing cookies in the same domain") Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2022-11-25 19:08:22 +08:00
/*
* When shared domain is enabled, using NEED_NOEXIST to guarantee
* the primary data blob (aka fsid) is unique in the shared domain.
*
* For non-shared-domain case, fscache_acquire_volume() invoked by
* erofs_fscache_register_volume() has already guaranteed
* the uniqueness of primary data blob.
*
* Acquired domain/volume will be relinquished in kill_sb() on error.
*/
if (sbi->domain_id)
flags |= EROFS_REG_COOKIE_NEED_NOEXIST;
fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags);
if (IS_ERR(fscache))
return PTR_ERR(fscache);
sbi->s_fscache = fscache;
return 0;
}
void erofs_fscache_unregister_fs(struct super_block *sb)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
erofs_fscache_unregister_cookie(sbi->s_fscache);
if (sbi->domain)
erofs_fscache_domain_put(sbi->domain);
else
fscache_relinquish_volume(sbi->volume, NULL, false);
sbi->s_fscache = NULL;
sbi->volume = NULL;
sbi->domain = NULL;
}