mm: move the page fragment allocator from page_alloc into its own file

Inspired by [1], move the page fragment allocator from page_alloc
into its own c file and header file, as we are about to make more
change for it to replace another page_frag implementation in
sock.c

As this patchset is going to replace 'struct page_frag' with
'struct page_frag_cache' in sched.h, including page_frag_cache.h
in sched.h has a compiler error caused by interdependence between
mm_types.h and mm.h for asm-offsets.c, see [2]. So avoid the compiler
error by moving 'struct page_frag_cache' to mm_types_task.h as
suggested by Alexander, see [3].

1. https://lore.kernel.org/all/20230411160902.4134381-3-dhowells@redhat.com/
2. https://lore.kernel.org/all/15623dac-9358-4597-b3ee-3694a5956920@gmail.com/
3. https://lore.kernel.org/all/CAKgT0UdH1yD=LSCXFJ=YM_aiA4OomD-2wXykO42bizaWMt_HOA@mail.gmail.com/
CC: David Howells <dhowells@redhat.com>
CC: Linux-MM <linux-mm@kvack.org>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Link: https://patch.msgid.link/20241028115343.3405838-3-linyunsheng@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Yunsheng Lin 2024-10-28 19:53:37 +08:00 committed by Jakub Kicinski
parent 7fef0dec41
commit 65941f10ca
9 changed files with 197 additions and 177 deletions

View File

@ -371,28 +371,6 @@ __meminit void *alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mas
extern void __free_pages(struct page *page, unsigned int order);
extern void free_pages(unsigned long addr, unsigned int order);
struct page_frag_cache;
void page_frag_cache_drain(struct page_frag_cache *nc);
extern void __page_frag_cache_drain(struct page *page, unsigned int count);
void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz,
gfp_t gfp_mask, unsigned int align_mask);
static inline void *page_frag_alloc_align(struct page_frag_cache *nc,
unsigned int fragsz, gfp_t gfp_mask,
unsigned int align)
{
WARN_ON_ONCE(!is_power_of_2(align));
return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align);
}
static inline void *page_frag_alloc(struct page_frag_cache *nc,
unsigned int fragsz, gfp_t gfp_mask)
{
return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
}
extern void page_frag_free(void *addr);
#define __free_page(page) __free_pages((page), 0)
#define free_page(addr) free_pages((addr), 0)

View File

@ -521,9 +521,6 @@ static_assert(sizeof(struct ptdesc) <= sizeof(struct page));
*/
#define STRUCT_PAGE_MAX_SHIFT (order_base_2(sizeof(struct page)))
#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK)
#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE)
/*
* page_private can be used on tail pages. However, PagePrivate is only
* checked by the VM on the head page. So page_private on the tail pages
@ -542,21 +539,6 @@ static inline void *folio_get_private(struct folio *folio)
return folio->private;
}
struct page_frag_cache {
void * va;
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
__u16 offset;
__u16 size;
#else
__u32 offset;
#endif
/* we maintain a pagecount bias, so that we dont dirty cache line
* containing page->_refcount every time we allocate a fragment.
*/
unsigned int pagecnt_bias;
bool pfmemalloc;
};
typedef unsigned long vm_flags_t;
/*

View File

@ -8,6 +8,7 @@
* (These are defined separately to decouple sched.h from mm_types.h as much as possible.)
*/
#include <linux/align.h>
#include <linux/types.h>
#include <asm/page.h>
@ -43,6 +44,23 @@ struct page_frag {
#endif
};
#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK)
#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE)
struct page_frag_cache {
void *va;
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
__u16 offset;
__u16 size;
#else
__u32 offset;
#endif
/* we maintain a pagecount bias, so that we dont dirty cache line
* containing page->_refcount every time we allocate a fragment.
*/
unsigned int pagecnt_bias;
bool pfmemalloc;
};
/* Track pages that require TLB flushes */
struct tlbflush_unmap_batch {
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH

View File

@ -0,0 +1,31 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_PAGE_FRAG_CACHE_H
#define _LINUX_PAGE_FRAG_CACHE_H
#include <linux/log2.h>
#include <linux/mm_types_task.h>
#include <linux/types.h>
void page_frag_cache_drain(struct page_frag_cache *nc);
void __page_frag_cache_drain(struct page *page, unsigned int count);
void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz,
gfp_t gfp_mask, unsigned int align_mask);
static inline void *page_frag_alloc_align(struct page_frag_cache *nc,
unsigned int fragsz, gfp_t gfp_mask,
unsigned int align)
{
WARN_ON_ONCE(!is_power_of_2(align));
return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align);
}
static inline void *page_frag_alloc(struct page_frag_cache *nc,
unsigned int fragsz, gfp_t gfp_mask)
{
return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
}
void page_frag_free(void *addr);
#endif

View File

@ -31,6 +31,7 @@
#include <linux/in6.h>
#include <linux/if_packet.h>
#include <linux/llist.h>
#include <linux/page_frag_cache.h>
#include <net/flow.h>
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <linux/netfilter/nf_conntrack_common.h>

View File

@ -65,6 +65,7 @@ page-alloc-$(CONFIG_SHUFFLE_PAGE_ALLOCATOR) += shuffle.o
memory-hotplug-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
obj-y += page-alloc.o
obj-y += page_frag_cache.o
obj-y += init-mm.o
obj-y += memblock.o
obj-y += $(memory-hotplug-y)

View File

@ -4836,142 +4836,6 @@ void free_pages(unsigned long addr, unsigned int order)
EXPORT_SYMBOL(free_pages);
/*
* Page Fragment:
* An arbitrary-length arbitrary-offset area of memory which resides
* within a 0 or higher order page. Multiple fragments within that page
* are individually refcounted, in the page's reference counter.
*
* The page_frag functions below provide a simple allocation framework for
* page fragments. This is used by the network stack and network device
* drivers to provide a backing region of memory for use as either an
* sk_buff->head, or to be used in the "frags" portion of skb_shared_info.
*/
static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
gfp_t gfp_mask)
{
struct page *page = NULL;
gfp_t gfp = gfp_mask;
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP |
__GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
PAGE_FRAG_CACHE_MAX_ORDER);
nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
#endif
if (unlikely(!page))
page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
nc->va = page ? page_address(page) : NULL;
return page;
}
void page_frag_cache_drain(struct page_frag_cache *nc)
{
if (!nc->va)
return;
__page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
nc->va = NULL;
}
EXPORT_SYMBOL(page_frag_cache_drain);
void __page_frag_cache_drain(struct page *page, unsigned int count)
{
VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
if (page_ref_sub_and_test(page, count))
free_unref_page(page, compound_order(page));
}
EXPORT_SYMBOL(__page_frag_cache_drain);
void *__page_frag_alloc_align(struct page_frag_cache *nc,
unsigned int fragsz, gfp_t gfp_mask,
unsigned int align_mask)
{
unsigned int size = PAGE_SIZE;
struct page *page;
int offset;
if (unlikely(!nc->va)) {
refill:
page = __page_frag_cache_refill(nc, gfp_mask);
if (!page)
return NULL;
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
/* if size can vary use size else just use PAGE_SIZE */
size = nc->size;
#endif
/* Even if we own the page, we do not use atomic_set().
* This would break get_page_unless_zero() users.
*/
page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
/* reset page count bias and offset to start of new frag */
nc->pfmemalloc = page_is_pfmemalloc(page);
nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
nc->offset = size;
}
offset = nc->offset - fragsz;
if (unlikely(offset < 0)) {
page = virt_to_page(nc->va);
if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
goto refill;
if (unlikely(nc->pfmemalloc)) {
free_unref_page(page, compound_order(page));
goto refill;
}
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
/* if size can vary use size else just use PAGE_SIZE */
size = nc->size;
#endif
/* OK, page count is 0, we can safely set it */
set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
/* reset page count bias and offset to start of new frag */
nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
offset = size - fragsz;
if (unlikely(offset < 0)) {
/*
* The caller is trying to allocate a fragment
* with fragsz > PAGE_SIZE but the cache isn't big
* enough to satisfy the request, this may
* happen in low memory conditions.
* We don't release the cache page because
* it could make memory pressure worse
* so we simply return NULL here.
*/
return NULL;
}
}
nc->pagecnt_bias--;
offset &= align_mask;
nc->offset = offset;
return nc->va + offset;
}
EXPORT_SYMBOL(__page_frag_alloc_align);
/*
* Frees a page fragment allocated out of either a compound or order 0 page.
*/
void page_frag_free(void *addr)
{
struct page *page = virt_to_head_page(addr);
if (unlikely(put_page_testzero(page)))
free_unref_page(page, compound_order(page));
}
EXPORT_SYMBOL(page_frag_free);
static void *make_alloc_exact(unsigned long addr, unsigned int order,
size_t size)
{

145
mm/page_frag_cache.c Normal file
View File

@ -0,0 +1,145 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Page fragment allocator
*
* Page Fragment:
* An arbitrary-length arbitrary-offset area of memory which resides within a
* 0 or higher order page. Multiple fragments within that page are
* individually refcounted, in the page's reference counter.
*
* The page_frag functions provide a simple allocation framework for page
* fragments. This is used by the network stack and network device drivers to
* provide a backing region of memory for use as either an sk_buff->head, or to
* be used in the "frags" portion of skb_shared_info.
*/
#include <linux/export.h>
#include <linux/gfp_types.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/page_frag_cache.h>
#include "internal.h"
static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
gfp_t gfp_mask)
{
struct page *page = NULL;
gfp_t gfp = gfp_mask;
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP |
__GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
PAGE_FRAG_CACHE_MAX_ORDER);
nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
#endif
if (unlikely(!page))
page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
nc->va = page ? page_address(page) : NULL;
return page;
}
void page_frag_cache_drain(struct page_frag_cache *nc)
{
if (!nc->va)
return;
__page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
nc->va = NULL;
}
EXPORT_SYMBOL(page_frag_cache_drain);
void __page_frag_cache_drain(struct page *page, unsigned int count)
{
VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
if (page_ref_sub_and_test(page, count))
free_unref_page(page, compound_order(page));
}
EXPORT_SYMBOL(__page_frag_cache_drain);
void *__page_frag_alloc_align(struct page_frag_cache *nc,
unsigned int fragsz, gfp_t gfp_mask,
unsigned int align_mask)
{
unsigned int size = PAGE_SIZE;
struct page *page;
int offset;
if (unlikely(!nc->va)) {
refill:
page = __page_frag_cache_refill(nc, gfp_mask);
if (!page)
return NULL;
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
/* if size can vary use size else just use PAGE_SIZE */
size = nc->size;
#endif
/* Even if we own the page, we do not use atomic_set().
* This would break get_page_unless_zero() users.
*/
page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
/* reset page count bias and offset to start of new frag */
nc->pfmemalloc = page_is_pfmemalloc(page);
nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
nc->offset = size;
}
offset = nc->offset - fragsz;
if (unlikely(offset < 0)) {
page = virt_to_page(nc->va);
if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
goto refill;
if (unlikely(nc->pfmemalloc)) {
free_unref_page(page, compound_order(page));
goto refill;
}
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
/* if size can vary use size else just use PAGE_SIZE */
size = nc->size;
#endif
/* OK, page count is 0, we can safely set it */
set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
/* reset page count bias and offset to start of new frag */
nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
offset = size - fragsz;
if (unlikely(offset < 0)) {
/*
* The caller is trying to allocate a fragment
* with fragsz > PAGE_SIZE but the cache isn't big
* enough to satisfy the request, this may
* happen in low memory conditions.
* We don't release the cache page because
* it could make memory pressure worse
* so we simply return NULL here.
*/
return NULL;
}
}
nc->pagecnt_bias--;
offset &= align_mask;
nc->offset = offset;
return nc->va + offset;
}
EXPORT_SYMBOL(__page_frag_alloc_align);
/*
* Frees a page fragment allocated out of either a compound or order 0 page.
*/
void page_frag_free(void *addr)
{
struct page *page = virt_to_head_page(addr);
if (unlikely(put_page_testzero(page)))
free_unref_page(page, compound_order(page));
}
EXPORT_SYMBOL(page_frag_free);

View File

@ -6,12 +6,12 @@
* Copyright (C) 2024 Yunsheng Lin <linyunsheng@huawei.com>
*/
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/cpumask.h>
#include <linux/completion.h>
#include <linux/ptr_ring.h>
#include <linux/kthread.h>
#include <linux/page_frag_cache.h>
#define TEST_FAILED_PREFIX "page_frag_test failed: "