diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 1ea896887ee4..b70124b9c7c1 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -142,6 +142,15 @@ enum pageflags { PG_readahead = PG_reclaim, + /* + * Depending on the way an anonymous folio can be mapped into a page + * table (e.g., single PMD/PUD/CONT of the head page vs. PTE-mapped + * THP), PG_anon_exclusive may be set only for the head page or for + * tail pages of an anonymous folio. For now, we only expect it to be + * set on tail pages for PTE-mapped THP. + */ + PG_anon_exclusive = PG_mappedtodisk, + /* Filesystems */ PG_checked = PG_owner_priv_1, @@ -176,7 +185,7 @@ enum pageflags { * Indicates that at least one subpage is hwpoisoned in the * THP. */ - PG_has_hwpoisoned = PG_mappedtodisk, + PG_has_hwpoisoned = PG_error, #endif /* non-lru isolated movable page */ @@ -1002,6 +1011,34 @@ extern bool is_free_buddy_page(struct page *page); PAGEFLAG(Isolated, isolated, PF_ANY); +static __always_inline int PageAnonExclusive(struct page *page) +{ + VM_BUG_ON_PGFLAGS(!PageAnon(page), page); + VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); + return test_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); +} + +static __always_inline void SetPageAnonExclusive(struct page *page) +{ + VM_BUG_ON_PGFLAGS(!PageAnon(page) || PageKsm(page), page); + VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); + set_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); +} + +static __always_inline void ClearPageAnonExclusive(struct page *page) +{ + VM_BUG_ON_PGFLAGS(!PageAnon(page) || PageKsm(page), page); + VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); + clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); +} + +static __always_inline void __ClearPageAnonExclusive(struct page *page) +{ + VM_BUG_ON_PGFLAGS(!PageAnon(page), page); + VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); + __clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); +} + #ifdef CONFIG_MMU #define __PG_MLOCKED (1UL << PG_mlocked) #else diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7a6052a984e9..03cbb75bcb54 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1677,6 +1677,8 @@ void free_huge_page(struct page *page) VM_BUG_ON_PAGE(page_mapcount(page), page); hugetlb_set_page_subpool(page, NULL); + if (PageAnon(page)) + __ClearPageAnonExclusive(page); page->mapping = NULL; restore_reserve = HPageRestoreReserve(page); ClearHPageRestoreReserve(page); diff --git a/mm/memory.c b/mm/memory.c index d4dba178e130..0b0727758c86 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3667,6 +3667,17 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) goto out_nomap; } + /* + * PG_anon_exclusive reuses PG_mappedtodisk for anon pages. A swap pte + * must never point at an anonymous page in the swapcache that is + * PG_anon_exclusive. Sanity check that this holds and especially, that + * no filesystem set PG_mappedtodisk on a page in the swapcache. Sanity + * check after taking the PT lock and making sure that nobody + * concurrently faulted in this page and set PG_anon_exclusive. + */ + BUG_ON(!PageAnon(page) && PageMappedToDisk(page)); + BUG_ON(PageAnon(page) && PageAnonExclusive(page)); + /* * Remove the swap entry and conditionally try to free up the swapcache. * We're already holding a reference on the page but haven't mapped it diff --git a/mm/memremap.c b/mm/memremap.c index c33bcd0398c9..2b92e97cb25b 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -459,6 +459,15 @@ void free_zone_device_page(struct page *page) mem_cgroup_uncharge(page_folio(page)); + /* + * Note: we don't expect anonymous compound pages yet. Once supported + * and we could PTE-map them similar to THP, we'd have to clear + * PG_anon_exclusive on all tail pages. + */ + VM_BUG_ON_PAGE(PageAnon(page) && PageCompound(page), page); + if (PageAnon(page)) + __ClearPageAnonExclusive(page); + /* * When a device managed page is freed, the page->mapping field * may still contain a (stale) mapping value. For example, the diff --git a/mm/swapfile.c b/mm/swapfile.c index 0ad7ed7ded21..a7847324d476 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1796,6 +1796,10 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, goto out; } + /* See do_swap_page() */ + BUG_ON(!PageAnon(page) && PageMappedToDisk(page)); + BUG_ON(PageAnon(page) && PageAnonExclusive(page)); + dec_mm_counter(vma->vm_mm, MM_SWAPENTS); inc_mm_counter(vma->vm_mm, MM_ANONPAGES); get_page(page); diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index b1ed76d9a979..381dcc00cb62 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -80,9 +80,10 @@ #define KPF_SOFTDIRTY 40 #define KPF_ARCH_2 41 -/* [48-] take some arbitrary free slots for expanding overloaded flags +/* [47-] take some arbitrary free slots for expanding overloaded flags * not part of kernel API */ +#define KPF_ANON_EXCLUSIVE 47 #define KPF_READAHEAD 48 #define KPF_SLOB_FREE 49 #define KPF_SLUB_FROZEN 50 @@ -138,6 +139,7 @@ static const char * const page_flag_names[] = { [KPF_SOFTDIRTY] = "f:softdirty", [KPF_ARCH_2] = "H:arch_2", + [KPF_ANON_EXCLUSIVE] = "d:anon_exclusive", [KPF_READAHEAD] = "I:readahead", [KPF_SLOB_FREE] = "P:slob_free", [KPF_SLUB_FROZEN] = "A:slub_frozen", @@ -472,6 +474,10 @@ static int bit_mask_ok(uint64_t flags) static uint64_t expand_overloaded_flags(uint64_t flags, uint64_t pme) { + /* Anonymous pages overload PG_mappedtodisk */ + if ((flags & BIT(ANON)) && (flags & BIT(MAPPEDTODISK))) + flags ^= BIT(MAPPEDTODISK) | BIT(ANON_EXCLUSIVE); + /* SLOB/SLUB overload several page flags */ if (flags & BIT(SLAB)) { if (flags & BIT(PRIVATE))