2021-01-26 16:33:47 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
|
|
|
|
#ifndef BTRFS_SUBPAGE_H
|
|
|
|
#define BTRFS_SUBPAGE_H
|
|
|
|
|
|
|
|
#include <linux/spinlock.h>
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Maximum page size we support is 64K, minimum sector size is 4K, u16 bitmap
|
|
|
|
* is sufficient. Regular bitmap_* is not used due to size reasons.
|
|
|
|
*/
|
|
|
|
#define BTRFS_SUBPAGE_BITMAP_SIZE 16
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Structure to trace status of each sector inside a page, attached to
|
|
|
|
* page::private for both data and metadata inodes.
|
|
|
|
*/
|
|
|
|
struct btrfs_subpage {
|
|
|
|
/* Common members for both data and metadata pages */
|
|
|
|
spinlock_t lock;
|
2021-01-26 16:33:52 +08:00
|
|
|
u16 uptodate_bitmap;
|
2021-01-26 16:33:53 +08:00
|
|
|
u16 error_bitmap;
|
2021-01-26 16:33:48 +08:00
|
|
|
union {
|
btrfs: support subpage for extent buffer page release
In btrfs_release_extent_buffer_pages(), we need to add extra handling
for subpage.
Introduce a helper, detach_extent_buffer_page(), to do different
handling for regular and subpage cases.
For subpage case, handle detaching page private.
For unmapped (dummy or cloned) ebs, we can detach the page private
immediately as the page can only be attached to one unmapped eb.
For mapped ebs, we have to ensure there are no eb in the page range
before we delete it, as page->private is shared between all ebs in the
same page.
But there is a subpage specific race, where we can race with extent
buffer allocation, and clear the page private while new eb is still
being utilized, like this:
Extent buffer A is the new extent buffer which will be allocated,
while extent buffer B is the last existing extent buffer of the page.
T1 (eb A) | T2 (eb B)
-------------------------------+------------------------------
alloc_extent_buffer() | btrfs_release_extent_buffer_pages()
|- p = find_or_create_page() | |
|- attach_extent_buffer_page() | |
| | |- detach_extent_buffer_page()
| | |- if (!page_range_has_eb())
| | | No new eb in the page range yet
| | | As new eb A hasn't yet been
| | | inserted into radix tree.
| | |- btrfs_detach_subpage()
| | |- detach_page_private();
|- radix_tree_insert() |
Then we have a metadata eb whose page has no private bit.
To avoid such race, we introduce a subpage metadata-specific member,
btrfs_subpage::eb_refs.
In alloc_extent_buffer() we increase eb_refs in the critical section of
private_lock. Then page_range_has_eb() will return true for
detach_extent_buffer_page(), and will not detach page private.
The section is marked by:
- btrfs_page_inc_eb_refs()
- btrfs_page_dec_eb_refs()
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2021-01-26 16:33:50 +08:00
|
|
|
/*
|
|
|
|
* Structures only used by metadata
|
|
|
|
*
|
|
|
|
* @eb_refs should only be operated under private_lock, as it
|
|
|
|
* manages whether the subpage can be detached.
|
|
|
|
*/
|
|
|
|
atomic_t eb_refs;
|
2021-01-26 16:33:48 +08:00
|
|
|
/* Structures only used by data */
|
btrfs: integrate page status update for data read path into begin/end_page_read
In btrfs data page read path, the page status update are handled in two
different locations:
btrfs_do_read_page()
{
while (cur <= end) {
/* No need to read from disk */
if (HOLE/PREALLOC/INLINE){
memset();
set_extent_uptodate();
continue;
}
/* Read from disk */
ret = submit_extent_page(end_bio_extent_readpage);
}
end_bio_extent_readpage()
{
endio_readpage_uptodate_page_status();
}
This is fine for sectorsize == PAGE_SIZE case, as for above loop we
should only hit one branch and then exit.
But for subpage, there is more work to be done in page status update:
- Page Unlock condition
Unlike regular page size == sectorsize case, we can no longer just
unlock a page.
Only the last reader of the page can unlock the page.
This means, we can unlock the page either in the while() loop, or in
the endio function.
- Page uptodate condition
Since we have multiple sectors to read for a page, we can only mark
the full page uptodate if all sectors are uptodate.
To handle both subpage and regular cases, introduce a pair of functions
to help handling page status update:
- begin_page_read()
For regular case, it does nothing.
For subpage case, it updates the reader counters so that later
end_page_read() can know who is the last one to unlock the page.
- end_page_read()
This is just endio_readpage_uptodate_page_status() renamed.
The original name is a little too long and too specific for endio.
The new thing added is the condition for page unlock.
Now for subpage data, we unlock the page if we're the last reader.
This does not only provide the basis for subpage data read, but also
hide the special handling of page read from the main read loop.
Also, since we're changing how the page lock is handled, there are two
existing error paths where we need to manually unlock the page before
calling begin_page_read().
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2021-02-02 10:28:36 +08:00
|
|
|
struct {
|
|
|
|
atomic_t readers;
|
|
|
|
};
|
2021-01-26 16:33:48 +08:00
|
|
|
};
|
2021-01-26 16:33:47 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
enum btrfs_subpage_type {
|
|
|
|
BTRFS_SUBPAGE_METADATA,
|
|
|
|
BTRFS_SUBPAGE_DATA,
|
|
|
|
};
|
|
|
|
|
|
|
|
int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
|
|
|
|
struct page *page, enum btrfs_subpage_type type);
|
|
|
|
void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info,
|
|
|
|
struct page *page);
|
|
|
|
|
2021-01-26 16:33:48 +08:00
|
|
|
/* Allocate additional data where page represents more than one sector */
|
|
|
|
int btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
|
|
|
|
struct btrfs_subpage **ret,
|
|
|
|
enum btrfs_subpage_type type);
|
|
|
|
void btrfs_free_subpage(struct btrfs_subpage *subpage);
|
|
|
|
|
btrfs: support subpage for extent buffer page release
In btrfs_release_extent_buffer_pages(), we need to add extra handling
for subpage.
Introduce a helper, detach_extent_buffer_page(), to do different
handling for regular and subpage cases.
For subpage case, handle detaching page private.
For unmapped (dummy or cloned) ebs, we can detach the page private
immediately as the page can only be attached to one unmapped eb.
For mapped ebs, we have to ensure there are no eb in the page range
before we delete it, as page->private is shared between all ebs in the
same page.
But there is a subpage specific race, where we can race with extent
buffer allocation, and clear the page private while new eb is still
being utilized, like this:
Extent buffer A is the new extent buffer which will be allocated,
while extent buffer B is the last existing extent buffer of the page.
T1 (eb A) | T2 (eb B)
-------------------------------+------------------------------
alloc_extent_buffer() | btrfs_release_extent_buffer_pages()
|- p = find_or_create_page() | |
|- attach_extent_buffer_page() | |
| | |- detach_extent_buffer_page()
| | |- if (!page_range_has_eb())
| | | No new eb in the page range yet
| | | As new eb A hasn't yet been
| | | inserted into radix tree.
| | |- btrfs_detach_subpage()
| | |- detach_page_private();
|- radix_tree_insert() |
Then we have a metadata eb whose page has no private bit.
To avoid such race, we introduce a subpage metadata-specific member,
btrfs_subpage::eb_refs.
In alloc_extent_buffer() we increase eb_refs in the critical section of
private_lock. Then page_range_has_eb() will return true for
detach_extent_buffer_page(), and will not detach page private.
The section is marked by:
- btrfs_page_inc_eb_refs()
- btrfs_page_dec_eb_refs()
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2021-01-26 16:33:50 +08:00
|
|
|
void btrfs_page_inc_eb_refs(const struct btrfs_fs_info *fs_info,
|
|
|
|
struct page *page);
|
|
|
|
void btrfs_page_dec_eb_refs(const struct btrfs_fs_info *fs_info,
|
|
|
|
struct page *page);
|
|
|
|
|
btrfs: integrate page status update for data read path into begin/end_page_read
In btrfs data page read path, the page status update are handled in two
different locations:
btrfs_do_read_page()
{
while (cur <= end) {
/* No need to read from disk */
if (HOLE/PREALLOC/INLINE){
memset();
set_extent_uptodate();
continue;
}
/* Read from disk */
ret = submit_extent_page(end_bio_extent_readpage);
}
end_bio_extent_readpage()
{
endio_readpage_uptodate_page_status();
}
This is fine for sectorsize == PAGE_SIZE case, as for above loop we
should only hit one branch and then exit.
But for subpage, there is more work to be done in page status update:
- Page Unlock condition
Unlike regular page size == sectorsize case, we can no longer just
unlock a page.
Only the last reader of the page can unlock the page.
This means, we can unlock the page either in the while() loop, or in
the endio function.
- Page uptodate condition
Since we have multiple sectors to read for a page, we can only mark
the full page uptodate if all sectors are uptodate.
To handle both subpage and regular cases, introduce a pair of functions
to help handling page status update:
- begin_page_read()
For regular case, it does nothing.
For subpage case, it updates the reader counters so that later
end_page_read() can know who is the last one to unlock the page.
- end_page_read()
This is just endio_readpage_uptodate_page_status() renamed.
The original name is a little too long and too specific for endio.
The new thing added is the condition for page unlock.
Now for subpage data, we unlock the page if we're the last reader.
This does not only provide the basis for subpage data read, but also
hide the special handling of page read from the main read loop.
Also, since we're changing how the page lock is handled, there are two
existing error paths where we need to manually unlock the page before
calling begin_page_read().
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2021-02-02 10:28:36 +08:00
|
|
|
void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info,
|
|
|
|
struct page *page, u64 start, u32 len);
|
|
|
|
void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info,
|
|
|
|
struct page *page, u64 start, u32 len);
|
|
|
|
|
2021-01-26 16:33:52 +08:00
|
|
|
/*
|
|
|
|
* Template for subpage related operations.
|
|
|
|
*
|
|
|
|
* btrfs_subpage_*() are for call sites where the page has subpage attached and
|
|
|
|
* the range is ensured to be inside the page.
|
|
|
|
*
|
|
|
|
* btrfs_page_*() are for call sites where the page can either be subpage
|
|
|
|
* specific or regular page. The function will handle both cases.
|
|
|
|
* But the range still needs to be inside the page.
|
|
|
|
*/
|
|
|
|
#define DECLARE_BTRFS_SUBPAGE_OPS(name) \
|
|
|
|
void btrfs_subpage_set_##name(const struct btrfs_fs_info *fs_info, \
|
|
|
|
struct page *page, u64 start, u32 len); \
|
|
|
|
void btrfs_subpage_clear_##name(const struct btrfs_fs_info *fs_info, \
|
|
|
|
struct page *page, u64 start, u32 len); \
|
|
|
|
bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \
|
|
|
|
struct page *page, u64 start, u32 len); \
|
|
|
|
void btrfs_page_set_##name(const struct btrfs_fs_info *fs_info, \
|
|
|
|
struct page *page, u64 start, u32 len); \
|
|
|
|
void btrfs_page_clear_##name(const struct btrfs_fs_info *fs_info, \
|
|
|
|
struct page *page, u64 start, u32 len); \
|
|
|
|
bool btrfs_page_test_##name(const struct btrfs_fs_info *fs_info, \
|
|
|
|
struct page *page, u64 start, u32 len);
|
|
|
|
|
|
|
|
DECLARE_BTRFS_SUBPAGE_OPS(uptodate);
|
2021-01-26 16:33:53 +08:00
|
|
|
DECLARE_BTRFS_SUBPAGE_OPS(error);
|
2021-01-26 16:33:52 +08:00
|
|
|
|
2021-01-26 16:33:47 +08:00
|
|
|
#endif
|