erofs: tidy up struct z_erofs_bvec

After revisiting the design, I believe `struct z_erofs_bvec` should
be page-based instead of folio-based due to the reasons below:

 - The minimized memory mapping block is a page;

 - Under the certain circumstances, only temporary pages needs to be
   used instead of folios since refcount, mapcount for such pages are
   unnecessary;

 - Decompressors handle all types of pages including temporary pages,
   not only folios.

When handling `struct z_erofs_bvec`, all folio-related information
is now accessed using the page_folio() helper.

The final goal of this round adaptation is to eliminate direct
accesses to `struct page` in the EROFS codebase, except for some
exceptions like `z_erofs_is_shortlived_page()` and
`z_erofs_page_is_invalidated()`, which require a new helper to
determine the memdesc type of an arbitrary page.

Actually large folios of compressed files seem to work now, yet I tend
to conduct more tests before officially enabling this for all scenarios.

Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20240703120051.3653452-4-hsiangkao@linux.alibaba.com
This commit is contained in:
Gao Xiang 2024-07-03 20:00:51 +08:00
parent 5b9654efb6
commit 2080ca1ed3

View File

@ -19,10 +19,7 @@
typedef void *z_erofs_next_pcluster_t;
struct z_erofs_bvec {
union {
struct page *page;
struct folio *folio;
};
struct page *page;
int offset;
unsigned int end;
};
@ -617,32 +614,31 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
}
/* called by erofs_shrinker to get rid of all cached compressed bvecs */
/* (erofs_shrinker) disconnect cached encoded data with pclusters */
int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
struct erofs_workgroup *grp)
{
struct z_erofs_pcluster *const pcl =
container_of(grp, struct z_erofs_pcluster, obj);
unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
struct folio *folio;
int i;
DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
/* There is no actice user since the pcluster is now freezed */
/* Each cached folio contains one page unless bs > ps is supported */
for (i = 0; i < pclusterpages; ++i) {
struct folio *folio = pcl->compressed_bvecs[i].folio;
if (pcl->compressed_bvecs[i].page) {
folio = page_folio(pcl->compressed_bvecs[i].page);
/* Avoid reclaiming or migrating this folio */
if (!folio_trylock(folio))
return -EBUSY;
if (!folio)
continue;
/* Avoid reclaiming or migrating this folio */
if (!folio_trylock(folio))
return -EBUSY;
if (!erofs_folio_is_managed(sbi, folio))
continue;
pcl->compressed_bvecs[i].folio = NULL;
folio_detach_private(folio);
folio_unlock(folio);
if (!erofs_folio_is_managed(sbi, folio))
continue;
pcl->compressed_bvecs[i].page = NULL;
folio_detach_private(folio);
folio_unlock(folio);
}
}
return 0;
}
@ -650,9 +646,9 @@ int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)
{
struct z_erofs_pcluster *pcl = folio_get_private(folio);
unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
struct z_erofs_bvec *bvec = pcl->compressed_bvecs;
struct z_erofs_bvec *end = bvec + z_erofs_pclusterpages(pcl);
bool ret;
int i;
if (!folio_test_private(folio))
return true;
@ -661,9 +657,9 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)
spin_lock(&pcl->obj.lockref.lock);
if (pcl->obj.lockref.count <= 0) {
DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
for (i = 0; i < pclusterpages; ++i) {
if (pcl->compressed_bvecs[i].folio == folio) {
pcl->compressed_bvecs[i].folio = NULL;
for (; bvec < end; ++bvec) {
if (bvec->page && page_folio(bvec->page) == folio) {
bvec->page = NULL;
folio_detach_private(folio);
ret = true;
break;
@ -1062,7 +1058,7 @@ static bool z_erofs_is_sync_decompress(struct erofs_sb_info *sbi,
static bool z_erofs_page_is_invalidated(struct page *page)
{
return !page->mapping && !z_erofs_is_shortlived_page(page);
return !page_folio(page)->mapping && !z_erofs_is_shortlived_page(page);
}
struct z_erofs_decompress_backend {
@ -1415,7 +1411,7 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
bool tocache = false;
struct z_erofs_bvec zbv;
struct address_space *mapping;
struct page *page;
struct folio *folio;
int bs = i_blocksize(f->inode);
/* Except for inplace folios, the entire folio can be used for I/Os */
@ -1425,23 +1421,25 @@ repeat:
spin_lock(&pcl->obj.lockref.lock);
zbv = pcl->compressed_bvecs[nr];
spin_unlock(&pcl->obj.lockref.lock);
if (!zbv.folio)
if (!zbv.page)
goto out_allocfolio;
bvec->bv_page = &zbv.folio->page;
bvec->bv_page = zbv.page;
DBG_BUGON(z_erofs_is_shortlived_page(bvec->bv_page));
folio = page_folio(zbv.page);
/*
* Handle preallocated cached folios. We tried to allocate such folios
* without triggering direct reclaim. If allocation failed, inplace
* file-backed folios will be used instead.
*/
if (zbv.folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) {
zbv.folio->private = 0;
if (folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) {
folio->private = 0;
tocache = true;
goto out_tocache;
}
mapping = READ_ONCE(zbv.folio->mapping);
mapping = READ_ONCE(folio->mapping);
/*
* File-backed folios for inplace I/Os are all locked steady,
* therefore it is impossible for `mapping` to be NULL.
@ -1453,21 +1451,21 @@ repeat:
return;
}
folio_lock(zbv.folio);
if (zbv.folio->mapping == mc) {
folio_lock(folio);
if (folio->mapping == mc) {
/*
* The cached folio is still in managed cache but without
* a valid `->private` pcluster hint. Let's reconnect them.
*/
if (!folio_test_private(zbv.folio)) {
folio_attach_private(zbv.folio, pcl);
if (!folio_test_private(folio)) {
folio_attach_private(folio, pcl);
/* compressed_bvecs[] already takes a ref before */
folio_put(zbv.folio);
folio_put(folio);
}
/* no need to submit if it is already up-to-date */
if (folio_test_uptodate(zbv.folio)) {
folio_unlock(zbv.folio);
if (folio_test_uptodate(folio)) {
folio_unlock(folio);
bvec->bv_page = NULL;
}
return;
@ -1477,32 +1475,31 @@ repeat:
* It has been truncated, so it's unsafe to reuse this one. Let's
* allocate a new page for compressed data.
*/
DBG_BUGON(zbv.folio->mapping);
DBG_BUGON(folio->mapping);
tocache = true;
folio_unlock(zbv.folio);
folio_put(zbv.folio);
folio_unlock(folio);
folio_put(folio);
out_allocfolio:
page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL);
zbv.page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL);
spin_lock(&pcl->obj.lockref.lock);
if (pcl->compressed_bvecs[nr].folio) {
erofs_pagepool_add(&f->pagepool, page);
if (pcl->compressed_bvecs[nr].page) {
erofs_pagepool_add(&f->pagepool, zbv.page);
spin_unlock(&pcl->obj.lockref.lock);
cond_resched();
goto repeat;
}
pcl->compressed_bvecs[nr].folio = zbv.folio = page_folio(page);
bvec->bv_page = pcl->compressed_bvecs[nr].page = zbv.page;
folio = page_folio(zbv.page);
/* first mark it as a temporary shortlived folio (now 1 ref) */
folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE;
spin_unlock(&pcl->obj.lockref.lock);
bvec->bv_page = page;
out_tocache:
if (!tocache || bs != PAGE_SIZE ||
filemap_add_folio(mc, zbv.folio, pcl->obj.index + nr, gfp)) {
/* turn into a temporary shortlived folio (1 ref) */
zbv.folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE;
filemap_add_folio(mc, folio, pcl->obj.index + nr, gfp))
return;
}
folio_attach_private(zbv.folio, pcl);
folio_attach_private(folio, pcl);
/* drop a refcount added by allocpage (then 2 refs in total here) */
folio_put(zbv.folio);
folio_put(folio);
}
static struct z_erofs_decompressqueue *jobqueue_init(struct super_block *sb,