mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-11 04:18:39 +08:00
Merge branch 'mm-hotfixes-stable' into mm-stable to pick up "mm: fix
crashes from deferred split racing folio migration", needed by "mm: migrate: split folio_migrate_mapping()".
This commit is contained in:
commit
8ef6fd0e9e
1
.mailmap
1
.mailmap
@ -384,6 +384,7 @@ Li Yang <leoyang.li@nxp.com> <leoli@freescale.com>
|
|||||||
Li Yang <leoyang.li@nxp.com> <leo@zh-kernel.org>
|
Li Yang <leoyang.li@nxp.com> <leo@zh-kernel.org>
|
||||||
Lior David <quic_liord@quicinc.com> <liord@codeaurora.org>
|
Lior David <quic_liord@quicinc.com> <liord@codeaurora.org>
|
||||||
Lorenzo Pieralisi <lpieralisi@kernel.org> <lorenzo.pieralisi@arm.com>
|
Lorenzo Pieralisi <lpieralisi@kernel.org> <lorenzo.pieralisi@arm.com>
|
||||||
|
Lorenzo Stoakes <lorenzo.stoakes@oracle.com> <lstoakes@gmail.com>
|
||||||
Luca Ceresoli <luca.ceresoli@bootlin.com> <luca@lucaceresoli.net>
|
Luca Ceresoli <luca.ceresoli@bootlin.com> <luca@lucaceresoli.net>
|
||||||
Lukasz Luba <lukasz.luba@arm.com> <l.luba@partner.samsung.com>
|
Lukasz Luba <lukasz.luba@arm.com> <l.luba@partner.samsung.com>
|
||||||
Luo Jie <quic_luoj@quicinc.com> <luoj@codeaurora.org>
|
Luo Jie <quic_luoj@quicinc.com> <luoj@codeaurora.org>
|
||||||
|
@ -14476,7 +14476,7 @@ MEMORY MAPPING
|
|||||||
M: Andrew Morton <akpm@linux-foundation.org>
|
M: Andrew Morton <akpm@linux-foundation.org>
|
||||||
R: Liam R. Howlett <Liam.Howlett@oracle.com>
|
R: Liam R. Howlett <Liam.Howlett@oracle.com>
|
||||||
R: Vlastimil Babka <vbabka@suse.cz>
|
R: Vlastimil Babka <vbabka@suse.cz>
|
||||||
R: Lorenzo Stoakes <lstoakes@gmail.com>
|
R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
|
||||||
L: linux-mm@kvack.org
|
L: linux-mm@kvack.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
W: http://www.linux-mm.org
|
W: http://www.linux-mm.org
|
||||||
|
@ -383,11 +383,39 @@ found:
|
|||||||
|
|
||||||
struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct folio **foliop)
|
struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct folio **foliop)
|
||||||
{
|
{
|
||||||
struct nilfs_dir_entry *de = nilfs_get_folio(dir, 0, foliop);
|
struct folio *folio;
|
||||||
|
struct nilfs_dir_entry *de, *next_de;
|
||||||
|
size_t limit;
|
||||||
|
char *msg;
|
||||||
|
|
||||||
|
de = nilfs_get_folio(dir, 0, &folio);
|
||||||
if (IS_ERR(de))
|
if (IS_ERR(de))
|
||||||
return NULL;
|
return NULL;
|
||||||
return nilfs_next_entry(de);
|
|
||||||
|
limit = nilfs_last_byte(dir, 0); /* is a multiple of chunk size */
|
||||||
|
if (unlikely(!limit || le64_to_cpu(de->inode) != dir->i_ino ||
|
||||||
|
!nilfs_match(1, ".", de))) {
|
||||||
|
msg = "missing '.'";
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
next_de = nilfs_next_entry(de);
|
||||||
|
/*
|
||||||
|
* If "next_de" has not reached the end of the chunk, there is
|
||||||
|
* at least one more record. Check whether it matches "..".
|
||||||
|
*/
|
||||||
|
if (unlikely((char *)next_de == (char *)de + nilfs_chunk_size(dir) ||
|
||||||
|
!nilfs_match(2, "..", next_de))) {
|
||||||
|
msg = "missing '..'";
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
*foliop = folio;
|
||||||
|
return next_de;
|
||||||
|
|
||||||
|
fail:
|
||||||
|
nilfs_error(dir->i_sb, "directory #%lu %s", dir->i_ino, msg);
|
||||||
|
folio_release_kmap(folio, de);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr)
|
ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr)
|
||||||
|
@ -2057,7 +2057,7 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
|
|||||||
goto out;
|
goto out;
|
||||||
features = uffdio_api.features;
|
features = uffdio_api.features;
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
if (uffdio_api.api != UFFD_API || (features & ~UFFD_API_FEATURES))
|
if (uffdio_api.api != UFFD_API)
|
||||||
goto err_out;
|
goto err_out;
|
||||||
ret = -EPERM;
|
ret = -EPERM;
|
||||||
if ((features & UFFD_FEATURE_EVENT_FORK) && !capable(CAP_SYS_PTRACE))
|
if ((features & UFFD_FEATURE_EVENT_FORK) && !capable(CAP_SYS_PTRACE))
|
||||||
@ -2081,6 +2081,11 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
|
|||||||
uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED;
|
uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED;
|
||||||
uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC;
|
uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
ret = -EINVAL;
|
||||||
|
if (features & ~uffdio_api.features)
|
||||||
|
goto err_out;
|
||||||
|
|
||||||
uffdio_api.ioctls = UFFD_API_IOCTLS;
|
uffdio_api.ioctls = UFFD_API_IOCTLS;
|
||||||
ret = -EFAULT;
|
ret = -EFAULT;
|
||||||
if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
|
if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
|
||||||
|
@ -1981,8 +1981,9 @@ static inline int subsection_map_index(unsigned long pfn)
|
|||||||
static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
|
static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
|
||||||
{
|
{
|
||||||
int idx = subsection_map_index(pfn);
|
int idx = subsection_map_index(pfn);
|
||||||
|
struct mem_section_usage *usage = READ_ONCE(ms->usage);
|
||||||
|
|
||||||
return test_bit(idx, READ_ONCE(ms->usage)->subsection_map);
|
return usage ? test_bit(idx, usage->subsection_map) : 0;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
|
static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
|
||||||
|
@ -230,7 +230,13 @@ static inline int folio_ref_dec_return(struct folio *folio)
|
|||||||
|
|
||||||
static inline bool page_ref_add_unless(struct page *page, int nr, int u)
|
static inline bool page_ref_add_unless(struct page *page, int nr, int u)
|
||||||
{
|
{
|
||||||
bool ret = atomic_add_unless(&page->_refcount, nr, u);
|
bool ret = false;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
/* avoid writing to the vmemmap area being remapped */
|
||||||
|
if (!page_is_fake_head(page) && page_ref_count(page) != u)
|
||||||
|
ret = atomic_add_unless(&page->_refcount, nr, u);
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
if (page_ref_tracepoint_active(page_ref_mod_unless))
|
if (page_ref_tracepoint_active(page_ref_mod_unless))
|
||||||
__page_ref_mod_unless(page, nr, ret);
|
__page_ref_mod_unless(page, nr, ret);
|
||||||
@ -258,54 +264,9 @@ static inline bool folio_try_get(struct folio *folio)
|
|||||||
return folio_ref_add_unless(folio, 1, 0);
|
return folio_ref_add_unless(folio, 1, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool folio_ref_try_add_rcu(struct folio *folio, int count)
|
static inline bool folio_ref_try_add(struct folio *folio, int count)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_TINY_RCU
|
return folio_ref_add_unless(folio, count, 0);
|
||||||
/*
|
|
||||||
* The caller guarantees the folio will not be freed from interrupt
|
|
||||||
* context, so (on !SMP) we only need preemption to be disabled
|
|
||||||
* and TINY_RCU does that for us.
|
|
||||||
*/
|
|
||||||
# ifdef CONFIG_PREEMPT_COUNT
|
|
||||||
VM_BUG_ON(!in_atomic() && !irqs_disabled());
|
|
||||||
# endif
|
|
||||||
VM_BUG_ON_FOLIO(folio_ref_count(folio) == 0, folio);
|
|
||||||
folio_ref_add(folio, count);
|
|
||||||
#else
|
|
||||||
if (unlikely(!folio_ref_add_unless(folio, count, 0))) {
|
|
||||||
/* Either the folio has been freed, or will be freed. */
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* folio_try_get_rcu - Attempt to increase the refcount on a folio.
|
|
||||||
* @folio: The folio.
|
|
||||||
*
|
|
||||||
* This is a version of folio_try_get() optimised for non-SMP kernels.
|
|
||||||
* If you are still holding the rcu_read_lock() after looking up the
|
|
||||||
* page and know that the page cannot have its refcount decreased to
|
|
||||||
* zero in interrupt context, you can use this instead of folio_try_get().
|
|
||||||
*
|
|
||||||
* Example users include get_user_pages_fast() (as pages are not unmapped
|
|
||||||
* from interrupt context) and the page cache lookups (as pages are not
|
|
||||||
* truncated from interrupt context). We also know that pages are not
|
|
||||||
* frozen in interrupt context for the purposes of splitting or migration.
|
|
||||||
*
|
|
||||||
* You can also use this function if you're holding a lock that prevents
|
|
||||||
* pages being frozen & removed; eg the i_pages lock for the page cache
|
|
||||||
* or the mmap_lock or page table lock for page tables. In this case,
|
|
||||||
* it will always succeed, and you could have used a plain folio_get(),
|
|
||||||
* but it's sometimes more convenient to have a common function called
|
|
||||||
* from both locked and RCU-protected contexts.
|
|
||||||
*
|
|
||||||
* Return: True if the reference count was successfully incremented.
|
|
||||||
*/
|
|
||||||
static inline bool folio_try_get_rcu(struct folio *folio)
|
|
||||||
{
|
|
||||||
return folio_ref_try_add_rcu(folio, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int page_ref_freeze(struct page *page, int count)
|
static inline int page_ref_freeze(struct page *page, int count)
|
||||||
|
@ -354,11 +354,18 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
|
|||||||
* a good order (that's 1MB if you're using 4kB pages)
|
* a good order (that's 1MB if you're using 4kB pages)
|
||||||
*/
|
*/
|
||||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||||
#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
|
#define PREFERRED_MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
|
||||||
#else
|
#else
|
||||||
#define MAX_PAGECACHE_ORDER 8
|
#define PREFERRED_MAX_PAGECACHE_ORDER 8
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* xas_split_alloc() does not support arbitrary orders. This implies no
|
||||||
|
* 512MB THP on ARM64 with 64KB base page size.
|
||||||
|
*/
|
||||||
|
#define MAX_XAS_ORDER (XA_CHUNK_SHIFT * 2 - 1)
|
||||||
|
#define MAX_PAGECACHE_ORDER min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* mapping_set_large_folios() - Indicate the file supports large folios.
|
* mapping_set_large_folios() - Indicate the file supports large folios.
|
||||||
* @mapping: The file.
|
* @mapping: The file.
|
||||||
|
@ -354,7 +354,8 @@ static inline swp_entry_t page_swap_entry(struct page *page)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* linux/mm/workingset.c */
|
/* linux/mm/workingset.c */
|
||||||
bool workingset_test_recent(void *shadow, bool file, bool *workingset);
|
bool workingset_test_recent(void *shadow, bool file, bool *workingset,
|
||||||
|
bool flush);
|
||||||
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
|
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
|
||||||
void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
|
void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
|
||||||
void workingset_refault(struct folio *folio, void *shadow);
|
void workingset_refault(struct folio *folio, void *shadow);
|
||||||
|
@ -38,7 +38,9 @@ close IN_FILE || die;
|
|||||||
#
|
#
|
||||||
open C_FILE, ">$ARGV[1]" or die;
|
open C_FILE, ">$ARGV[1]" or die;
|
||||||
print C_FILE "/*\n";
|
print C_FILE "/*\n";
|
||||||
print C_FILE " * Automatically generated by ", $0 =~ s#^\Q$abs_srctree/\E##r, ". Do not edit\n";
|
my $scriptname = $0;
|
||||||
|
$scriptname =~ s#^\Q$abs_srctree/\E##;
|
||||||
|
print C_FILE " * Automatically generated by ", $scriptname, ". Do not edit\n";
|
||||||
print C_FILE " */\n";
|
print C_FILE " */\n";
|
||||||
|
|
||||||
#
|
#
|
||||||
|
@ -1694,14 +1694,31 @@ static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
|
|||||||
* access frequencies are similar. This is for minimizing the monitoring
|
* access frequencies are similar. This is for minimizing the monitoring
|
||||||
* overhead under the dynamically changeable access pattern. If a merge was
|
* overhead under the dynamically changeable access pattern. If a merge was
|
||||||
* unnecessarily made, later 'kdamond_split_regions()' will revert it.
|
* unnecessarily made, later 'kdamond_split_regions()' will revert it.
|
||||||
|
*
|
||||||
|
* The total number of regions could be higher than the user-defined limit,
|
||||||
|
* max_nr_regions for some cases. For example, the user can update
|
||||||
|
* max_nr_regions to a number that lower than the current number of regions
|
||||||
|
* while DAMON is running. For such a case, repeat merging until the limit is
|
||||||
|
* met while increasing @threshold up to possible maximum level.
|
||||||
*/
|
*/
|
||||||
static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,
|
static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,
|
||||||
unsigned long sz_limit)
|
unsigned long sz_limit)
|
||||||
{
|
{
|
||||||
struct damon_target *t;
|
struct damon_target *t;
|
||||||
|
unsigned int nr_regions;
|
||||||
|
unsigned int max_thres;
|
||||||
|
|
||||||
damon_for_each_target(t, c)
|
max_thres = c->attrs.aggr_interval /
|
||||||
damon_merge_regions_of(t, threshold, sz_limit);
|
(c->attrs.sample_interval ? c->attrs.sample_interval : 1);
|
||||||
|
do {
|
||||||
|
nr_regions = 0;
|
||||||
|
damon_for_each_target(t, c) {
|
||||||
|
damon_merge_regions_of(t, threshold, sz_limit);
|
||||||
|
nr_regions += damon_nr_regions(t);
|
||||||
|
}
|
||||||
|
threshold = max(1, threshold * 2);
|
||||||
|
} while (nr_regions > c->attrs.max_nr_regions &&
|
||||||
|
threshold / 2 < max_thres);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
17
mm/filemap.c
17
mm/filemap.c
@ -1847,7 +1847,7 @@ repeat:
|
|||||||
if (!folio || xa_is_value(folio))
|
if (!folio || xa_is_value(folio))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
if (!folio_try_get_rcu(folio))
|
if (!folio_try_get(folio))
|
||||||
goto repeat;
|
goto repeat;
|
||||||
|
|
||||||
if (unlikely(folio != xas_reload(&xas))) {
|
if (unlikely(folio != xas_reload(&xas))) {
|
||||||
@ -2001,7 +2001,7 @@ retry:
|
|||||||
if (!folio || xa_is_value(folio))
|
if (!folio || xa_is_value(folio))
|
||||||
return folio;
|
return folio;
|
||||||
|
|
||||||
if (!folio_try_get_rcu(folio))
|
if (!folio_try_get(folio))
|
||||||
goto reset;
|
goto reset;
|
||||||
|
|
||||||
if (unlikely(folio != xas_reload(xas))) {
|
if (unlikely(folio != xas_reload(xas))) {
|
||||||
@ -2181,7 +2181,7 @@ unsigned filemap_get_folios_contig(struct address_space *mapping,
|
|||||||
if (xa_is_value(folio))
|
if (xa_is_value(folio))
|
||||||
goto update_start;
|
goto update_start;
|
||||||
|
|
||||||
if (!folio_try_get_rcu(folio))
|
if (!folio_try_get(folio))
|
||||||
goto retry;
|
goto retry;
|
||||||
|
|
||||||
if (unlikely(folio != xas_reload(&xas)))
|
if (unlikely(folio != xas_reload(&xas)))
|
||||||
@ -2313,7 +2313,7 @@ static void filemap_get_read_batch(struct address_space *mapping,
|
|||||||
break;
|
break;
|
||||||
if (xa_is_sibling(folio))
|
if (xa_is_sibling(folio))
|
||||||
break;
|
break;
|
||||||
if (!folio_try_get_rcu(folio))
|
if (!folio_try_get(folio))
|
||||||
goto retry;
|
goto retry;
|
||||||
|
|
||||||
if (unlikely(folio != xas_reload(&xas)))
|
if (unlikely(folio != xas_reload(&xas)))
|
||||||
@ -3124,7 +3124,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
|
|||||||
|
|
||||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||||
/* Use the readahead code, even if readahead is disabled */
|
/* Use the readahead code, even if readahead is disabled */
|
||||||
if (vm_flags & VM_HUGEPAGE) {
|
if ((vm_flags & VM_HUGEPAGE) && HPAGE_PMD_ORDER <= MAX_PAGECACHE_ORDER) {
|
||||||
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
|
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
|
||||||
ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1);
|
ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1);
|
||||||
ra->size = HPAGE_PMD_NR;
|
ra->size = HPAGE_PMD_NR;
|
||||||
@ -3472,7 +3472,7 @@ static struct folio *next_uptodate_folio(struct xa_state *xas,
|
|||||||
continue;
|
continue;
|
||||||
if (folio_test_locked(folio))
|
if (folio_test_locked(folio))
|
||||||
continue;
|
continue;
|
||||||
if (!folio_try_get_rcu(folio))
|
if (!folio_try_get(folio))
|
||||||
continue;
|
continue;
|
||||||
/* Has the page moved or been split? */
|
/* Has the page moved or been split? */
|
||||||
if (unlikely(folio != xas_reload(xas)))
|
if (unlikely(folio != xas_reload(xas)))
|
||||||
@ -4248,6 +4248,9 @@ static void filemap_cachestat(struct address_space *mapping,
|
|||||||
XA_STATE(xas, &mapping->i_pages, first_index);
|
XA_STATE(xas, &mapping->i_pages, first_index);
|
||||||
struct folio *folio;
|
struct folio *folio;
|
||||||
|
|
||||||
|
/* Flush stats (and potentially sleep) outside the RCU read section. */
|
||||||
|
mem_cgroup_flush_stats_ratelimited(NULL);
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
xas_for_each(&xas, folio, last_index) {
|
xas_for_each(&xas, folio, last_index) {
|
||||||
int order;
|
int order;
|
||||||
@ -4311,7 +4314,7 @@ static void filemap_cachestat(struct address_space *mapping,
|
|||||||
goto resched;
|
goto resched;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (workingset_test_recent(shadow, true, &workingset))
|
if (workingset_test_recent(shadow, true, &workingset, false))
|
||||||
cs->nr_recently_evicted += nr_pages;
|
cs->nr_recently_evicted += nr_pages;
|
||||||
|
|
||||||
goto resched;
|
goto resched;
|
||||||
|
291
mm/gup.c
291
mm/gup.c
@ -76,7 +76,7 @@ retry:
|
|||||||
folio = page_folio(page);
|
folio = page_folio(page);
|
||||||
if (WARN_ON_ONCE(folio_ref_count(folio) < 0))
|
if (WARN_ON_ONCE(folio_ref_count(folio) < 0))
|
||||||
return NULL;
|
return NULL;
|
||||||
if (unlikely(!folio_ref_try_add_rcu(folio, refs)))
|
if (unlikely(!folio_ref_try_add(folio, refs)))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -97,95 +97,6 @@ retry:
|
|||||||
return folio;
|
return folio;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* try_grab_folio() - Attempt to get or pin a folio.
|
|
||||||
* @page: pointer to page to be grabbed
|
|
||||||
* @refs: the value to (effectively) add to the folio's refcount
|
|
||||||
* @flags: gup flags: these are the FOLL_* flag values.
|
|
||||||
*
|
|
||||||
* "grab" names in this file mean, "look at flags to decide whether to use
|
|
||||||
* FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount.
|
|
||||||
*
|
|
||||||
* Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the
|
|
||||||
* same time. (That's true throughout the get_user_pages*() and
|
|
||||||
* pin_user_pages*() APIs.) Cases:
|
|
||||||
*
|
|
||||||
* FOLL_GET: folio's refcount will be incremented by @refs.
|
|
||||||
*
|
|
||||||
* FOLL_PIN on large folios: folio's refcount will be incremented by
|
|
||||||
* @refs, and its pincount will be incremented by @refs.
|
|
||||||
*
|
|
||||||
* FOLL_PIN on single-page folios: folio's refcount will be incremented by
|
|
||||||
* @refs * GUP_PIN_COUNTING_BIAS.
|
|
||||||
*
|
|
||||||
* Return: The folio containing @page (with refcount appropriately
|
|
||||||
* incremented) for success, or NULL upon failure. If neither FOLL_GET
|
|
||||||
* nor FOLL_PIN was set, that's considered failure, and furthermore,
|
|
||||||
* a likely bug in the caller, so a warning is also emitted.
|
|
||||||
*/
|
|
||||||
struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
|
|
||||||
{
|
|
||||||
struct folio *folio;
|
|
||||||
|
|
||||||
if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0))
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)))
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
if (flags & FOLL_GET)
|
|
||||||
return try_get_folio(page, refs);
|
|
||||||
|
|
||||||
/* FOLL_PIN is set */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Don't take a pin on the zero page - it's not going anywhere
|
|
||||||
* and it is used in a *lot* of places.
|
|
||||||
*/
|
|
||||||
if (is_zero_page(page))
|
|
||||||
return page_folio(page);
|
|
||||||
|
|
||||||
folio = try_get_folio(page, refs);
|
|
||||||
if (!folio)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a
|
|
||||||
* right zone, so fail and let the caller fall back to the slow
|
|
||||||
* path.
|
|
||||||
*/
|
|
||||||
if (unlikely((flags & FOLL_LONGTERM) &&
|
|
||||||
!folio_is_longterm_pinnable(folio))) {
|
|
||||||
if (!put_devmap_managed_folio_refs(folio, refs))
|
|
||||||
folio_put_refs(folio, refs);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* When pinning a large folio, use an exact count to track it.
|
|
||||||
*
|
|
||||||
* However, be sure to *also* increment the normal folio
|
|
||||||
* refcount field at least once, so that the folio really
|
|
||||||
* is pinned. That's why the refcount from the earlier
|
|
||||||
* try_get_folio() is left intact.
|
|
||||||
*/
|
|
||||||
if (folio_test_large(folio))
|
|
||||||
atomic_add(refs, &folio->_pincount);
|
|
||||||
else
|
|
||||||
folio_ref_add(folio,
|
|
||||||
refs * (GUP_PIN_COUNTING_BIAS - 1));
|
|
||||||
/*
|
|
||||||
* Adjust the pincount before re-checking the PTE for changes.
|
|
||||||
* This is essentially a smp_mb() and is paired with a memory
|
|
||||||
* barrier in folio_try_share_anon_rmap_*().
|
|
||||||
*/
|
|
||||||
smp_mb__after_atomic();
|
|
||||||
|
|
||||||
node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);
|
|
||||||
|
|
||||||
return folio;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
|
static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
|
||||||
{
|
{
|
||||||
if (flags & FOLL_PIN) {
|
if (flags & FOLL_PIN) {
|
||||||
@ -203,58 +114,59 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* try_grab_page() - elevate a page's refcount by a flag-dependent amount
|
* try_grab_folio() - add a folio's refcount by a flag-dependent amount
|
||||||
* @page: pointer to page to be grabbed
|
* @folio: pointer to folio to be grabbed
|
||||||
* @flags: gup flags: these are the FOLL_* flag values.
|
* @refs: the value to (effectively) add to the folio's refcount
|
||||||
|
* @flags: gup flags: these are the FOLL_* flag values
|
||||||
*
|
*
|
||||||
* This might not do anything at all, depending on the flags argument.
|
* This might not do anything at all, depending on the flags argument.
|
||||||
*
|
*
|
||||||
* "grab" names in this file mean, "look at flags to decide whether to use
|
* "grab" names in this file mean, "look at flags to decide whether to use
|
||||||
* FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount.
|
* FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount.
|
||||||
*
|
*
|
||||||
* Either FOLL_PIN or FOLL_GET (or neither) may be set, but not both at the same
|
* Either FOLL_PIN or FOLL_GET (or neither) may be set, but not both at the same
|
||||||
* time. Cases: please see the try_grab_folio() documentation, with
|
* time.
|
||||||
* "refs=1".
|
|
||||||
*
|
*
|
||||||
* Return: 0 for success, or if no action was required (if neither FOLL_PIN
|
* Return: 0 for success, or if no action was required (if neither FOLL_PIN
|
||||||
* nor FOLL_GET was set, nothing is done). A negative error code for failure:
|
* nor FOLL_GET was set, nothing is done). A negative error code for failure:
|
||||||
*
|
*
|
||||||
* -ENOMEM FOLL_GET or FOLL_PIN was set, but the page could not
|
* -ENOMEM FOLL_GET or FOLL_PIN was set, but the folio could not
|
||||||
* be grabbed.
|
* be grabbed.
|
||||||
|
*
|
||||||
|
* It is called when we have a stable reference for the folio, typically in
|
||||||
|
* GUP slow path.
|
||||||
*/
|
*/
|
||||||
int __must_check try_grab_page(struct page *page, unsigned int flags)
|
int __must_check try_grab_folio(struct folio *folio, int refs,
|
||||||
|
unsigned int flags)
|
||||||
{
|
{
|
||||||
struct folio *folio = page_folio(page);
|
|
||||||
|
|
||||||
if (WARN_ON_ONCE(folio_ref_count(folio) <= 0))
|
if (WARN_ON_ONCE(folio_ref_count(folio) <= 0))
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)))
|
if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(&folio->page)))
|
||||||
return -EREMOTEIO;
|
return -EREMOTEIO;
|
||||||
|
|
||||||
if (flags & FOLL_GET)
|
if (flags & FOLL_GET)
|
||||||
folio_ref_inc(folio);
|
folio_ref_add(folio, refs);
|
||||||
else if (flags & FOLL_PIN) {
|
else if (flags & FOLL_PIN) {
|
||||||
/*
|
/*
|
||||||
* Don't take a pin on the zero page - it's not going anywhere
|
* Don't take a pin on the zero page - it's not going anywhere
|
||||||
* and it is used in a *lot* of places.
|
* and it is used in a *lot* of places.
|
||||||
*/
|
*/
|
||||||
if (is_zero_page(page))
|
if (is_zero_folio(folio))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Similar to try_grab_folio(): be sure to *also*
|
* Increment the normal page refcount field at least once,
|
||||||
* increment the normal page refcount field at least once,
|
|
||||||
* so that the page really is pinned.
|
* so that the page really is pinned.
|
||||||
*/
|
*/
|
||||||
if (folio_test_large(folio)) {
|
if (folio_test_large(folio)) {
|
||||||
folio_ref_add(folio, 1);
|
folio_ref_add(folio, refs);
|
||||||
atomic_add(1, &folio->_pincount);
|
atomic_add(refs, &folio->_pincount);
|
||||||
} else {
|
} else {
|
||||||
folio_ref_add(folio, GUP_PIN_COUNTING_BIAS);
|
folio_ref_add(folio, refs * GUP_PIN_COUNTING_BIAS);
|
||||||
}
|
}
|
||||||
|
|
||||||
node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1);
|
node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -515,6 +427,102 @@ static int record_subpages(struct page *page, unsigned long sz,
|
|||||||
|
|
||||||
return nr;
|
return nr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* try_grab_folio_fast() - Attempt to get or pin a folio in fast path.
|
||||||
|
* @page: pointer to page to be grabbed
|
||||||
|
* @refs: the value to (effectively) add to the folio's refcount
|
||||||
|
* @flags: gup flags: these are the FOLL_* flag values.
|
||||||
|
*
|
||||||
|
* "grab" names in this file mean, "look at flags to decide whether to use
|
||||||
|
* FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount.
|
||||||
|
*
|
||||||
|
* Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the
|
||||||
|
* same time. (That's true throughout the get_user_pages*() and
|
||||||
|
* pin_user_pages*() APIs.) Cases:
|
||||||
|
*
|
||||||
|
* FOLL_GET: folio's refcount will be incremented by @refs.
|
||||||
|
*
|
||||||
|
* FOLL_PIN on large folios: folio's refcount will be incremented by
|
||||||
|
* @refs, and its pincount will be incremented by @refs.
|
||||||
|
*
|
||||||
|
* FOLL_PIN on single-page folios: folio's refcount will be incremented by
|
||||||
|
* @refs * GUP_PIN_COUNTING_BIAS.
|
||||||
|
*
|
||||||
|
* Return: The folio containing @page (with refcount appropriately
|
||||||
|
* incremented) for success, or NULL upon failure. If neither FOLL_GET
|
||||||
|
* nor FOLL_PIN was set, that's considered failure, and furthermore,
|
||||||
|
* a likely bug in the caller, so a warning is also emitted.
|
||||||
|
*
|
||||||
|
* It uses add ref unless zero to elevate the folio refcount and must be called
|
||||||
|
* in fast path only.
|
||||||
|
*/
|
||||||
|
static struct folio *try_grab_folio_fast(struct page *page, int refs,
|
||||||
|
unsigned int flags)
|
||||||
|
{
|
||||||
|
struct folio *folio;
|
||||||
|
|
||||||
|
/* Raise warn if it is not called in fast GUP */
|
||||||
|
VM_WARN_ON_ONCE(!irqs_disabled());
|
||||||
|
|
||||||
|
if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (flags & FOLL_GET)
|
||||||
|
return try_get_folio(page, refs);
|
||||||
|
|
||||||
|
/* FOLL_PIN is set */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Don't take a pin on the zero page - it's not going anywhere
|
||||||
|
* and it is used in a *lot* of places.
|
||||||
|
*/
|
||||||
|
if (is_zero_page(page))
|
||||||
|
return page_folio(page);
|
||||||
|
|
||||||
|
folio = try_get_folio(page, refs);
|
||||||
|
if (!folio)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a
|
||||||
|
* right zone, so fail and let the caller fall back to the slow
|
||||||
|
* path.
|
||||||
|
*/
|
||||||
|
if (unlikely((flags & FOLL_LONGTERM) &&
|
||||||
|
!folio_is_longterm_pinnable(folio))) {
|
||||||
|
if (!put_devmap_managed_folio_refs(folio, refs))
|
||||||
|
folio_put_refs(folio, refs);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When pinning a large folio, use an exact count to track it.
|
||||||
|
*
|
||||||
|
* However, be sure to *also* increment the normal folio
|
||||||
|
* refcount field at least once, so that the folio really
|
||||||
|
* is pinned. That's why the refcount from the earlier
|
||||||
|
* try_get_folio() is left intact.
|
||||||
|
*/
|
||||||
|
if (folio_test_large(folio))
|
||||||
|
atomic_add(refs, &folio->_pincount);
|
||||||
|
else
|
||||||
|
folio_ref_add(folio,
|
||||||
|
refs * (GUP_PIN_COUNTING_BIAS - 1));
|
||||||
|
/*
|
||||||
|
* Adjust the pincount before re-checking the PTE for changes.
|
||||||
|
* This is essentially a smp_mb() and is paired with a memory
|
||||||
|
* barrier in folio_try_share_anon_rmap_*().
|
||||||
|
*/
|
||||||
|
smp_mb__after_atomic();
|
||||||
|
|
||||||
|
node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);
|
||||||
|
|
||||||
|
return folio;
|
||||||
|
}
|
||||||
#endif /* CONFIG_ARCH_HAS_HUGEPD || CONFIG_HAVE_GUP_FAST */
|
#endif /* CONFIG_ARCH_HAS_HUGEPD || CONFIG_HAVE_GUP_FAST */
|
||||||
|
|
||||||
#ifdef CONFIG_ARCH_HAS_HUGEPD
|
#ifdef CONFIG_ARCH_HAS_HUGEPD
|
||||||
@ -535,7 +543,7 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
|
|||||||
*/
|
*/
|
||||||
static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz,
|
static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz,
|
||||||
unsigned long addr, unsigned long end, unsigned int flags,
|
unsigned long addr, unsigned long end, unsigned int flags,
|
||||||
struct page **pages, int *nr)
|
struct page **pages, int *nr, bool fast)
|
||||||
{
|
{
|
||||||
unsigned long pte_end;
|
unsigned long pte_end;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
@ -558,9 +566,15 @@ static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz
|
|||||||
page = pte_page(pte);
|
page = pte_page(pte);
|
||||||
refs = record_subpages(page, sz, addr, end, pages + *nr);
|
refs = record_subpages(page, sz, addr, end, pages + *nr);
|
||||||
|
|
||||||
folio = try_grab_folio(page, refs, flags);
|
if (fast) {
|
||||||
if (!folio)
|
folio = try_grab_folio_fast(page, refs, flags);
|
||||||
return 0;
|
if (!folio)
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
folio = page_folio(page);
|
||||||
|
if (try_grab_folio(folio, refs, flags))
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) {
|
if (unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) {
|
||||||
gup_put_folio(folio, refs, flags);
|
gup_put_folio(folio, refs, flags);
|
||||||
@ -588,7 +602,7 @@ static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz
|
|||||||
static int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
|
static int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
|
||||||
unsigned long addr, unsigned int pdshift,
|
unsigned long addr, unsigned int pdshift,
|
||||||
unsigned long end, unsigned int flags,
|
unsigned long end, unsigned int flags,
|
||||||
struct page **pages, int *nr)
|
struct page **pages, int *nr, bool fast)
|
||||||
{
|
{
|
||||||
pte_t *ptep;
|
pte_t *ptep;
|
||||||
unsigned long sz = 1UL << hugepd_shift(hugepd);
|
unsigned long sz = 1UL << hugepd_shift(hugepd);
|
||||||
@ -598,7 +612,8 @@ static int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
|
|||||||
ptep = hugepte_offset(hugepd, addr, pdshift);
|
ptep = hugepte_offset(hugepd, addr, pdshift);
|
||||||
do {
|
do {
|
||||||
next = hugepte_addr_end(addr, end, sz);
|
next = hugepte_addr_end(addr, end, sz);
|
||||||
ret = gup_hugepte(vma, ptep, sz, addr, end, flags, pages, nr);
|
ret = gup_hugepte(vma, ptep, sz, addr, end, flags, pages, nr,
|
||||||
|
fast);
|
||||||
if (ret != 1)
|
if (ret != 1)
|
||||||
return ret;
|
return ret;
|
||||||
} while (ptep++, addr = next, addr != end);
|
} while (ptep++, addr = next, addr != end);
|
||||||
@ -625,7 +640,7 @@ static struct page *follow_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
|
|||||||
ptep = hugepte_offset(hugepd, addr, pdshift);
|
ptep = hugepte_offset(hugepd, addr, pdshift);
|
||||||
ptl = huge_pte_lock(h, vma->vm_mm, ptep);
|
ptl = huge_pte_lock(h, vma->vm_mm, ptep);
|
||||||
ret = gup_hugepd(vma, hugepd, addr, pdshift, addr + PAGE_SIZE,
|
ret = gup_hugepd(vma, hugepd, addr, pdshift, addr + PAGE_SIZE,
|
||||||
flags, &page, &nr);
|
flags, &page, &nr, false);
|
||||||
spin_unlock(ptl);
|
spin_unlock(ptl);
|
||||||
|
|
||||||
if (ret == 1) {
|
if (ret == 1) {
|
||||||
@ -642,7 +657,7 @@ static struct page *follow_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
|
|||||||
static inline int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
|
static inline int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
|
||||||
unsigned long addr, unsigned int pdshift,
|
unsigned long addr, unsigned int pdshift,
|
||||||
unsigned long end, unsigned int flags,
|
unsigned long end, unsigned int flags,
|
||||||
struct page **pages, int *nr)
|
struct page **pages, int *nr, bool fast)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -729,7 +744,7 @@ static struct page *follow_huge_pud(struct vm_area_struct *vma,
|
|||||||
gup_must_unshare(vma, flags, page))
|
gup_must_unshare(vma, flags, page))
|
||||||
return ERR_PTR(-EMLINK);
|
return ERR_PTR(-EMLINK);
|
||||||
|
|
||||||
ret = try_grab_page(page, flags);
|
ret = try_grab_folio(page_folio(page), 1, flags);
|
||||||
if (ret)
|
if (ret)
|
||||||
page = ERR_PTR(ret);
|
page = ERR_PTR(ret);
|
||||||
else
|
else
|
||||||
@ -806,7 +821,7 @@ static struct page *follow_huge_pmd(struct vm_area_struct *vma,
|
|||||||
VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
|
VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
|
||||||
!PageAnonExclusive(page), page);
|
!PageAnonExclusive(page), page);
|
||||||
|
|
||||||
ret = try_grab_page(page, flags);
|
ret = try_grab_folio(page_folio(page), 1, flags);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ERR_PTR(ret);
|
return ERR_PTR(ret);
|
||||||
|
|
||||||
@ -968,8 +983,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
|
|||||||
VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
|
VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
|
||||||
!PageAnonExclusive(page), page);
|
!PageAnonExclusive(page), page);
|
||||||
|
|
||||||
/* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */
|
/* try_grab_folio() does nothing unless FOLL_GET or FOLL_PIN is set. */
|
||||||
ret = try_grab_page(page, flags);
|
ret = try_grab_folio(page_folio(page), 1, flags);
|
||||||
if (unlikely(ret)) {
|
if (unlikely(ret)) {
|
||||||
page = ERR_PTR(ret);
|
page = ERR_PTR(ret);
|
||||||
goto out;
|
goto out;
|
||||||
@ -1233,7 +1248,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
|
|||||||
goto unmap;
|
goto unmap;
|
||||||
*page = pte_page(entry);
|
*page = pte_page(entry);
|
||||||
}
|
}
|
||||||
ret = try_grab_page(*page, gup_flags);
|
ret = try_grab_folio(page_folio(*page), 1, gup_flags);
|
||||||
if (unlikely(ret))
|
if (unlikely(ret))
|
||||||
goto unmap;
|
goto unmap;
|
||||||
out:
|
out:
|
||||||
@ -1636,20 +1651,19 @@ next_page:
|
|||||||
* pages.
|
* pages.
|
||||||
*/
|
*/
|
||||||
if (page_increm > 1) {
|
if (page_increm > 1) {
|
||||||
struct folio *folio;
|
struct folio *folio = page_folio(page);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Since we already hold refcount on the
|
* Since we already hold refcount on the
|
||||||
* large folio, this should never fail.
|
* large folio, this should never fail.
|
||||||
*/
|
*/
|
||||||
folio = try_grab_folio(page, page_increm - 1,
|
if (try_grab_folio(folio, page_increm - 1,
|
||||||
foll_flags);
|
foll_flags)) {
|
||||||
if (WARN_ON_ONCE(!folio)) {
|
|
||||||
/*
|
/*
|
||||||
* Release the 1st page ref if the
|
* Release the 1st page ref if the
|
||||||
* folio is problematic, fail hard.
|
* folio is problematic, fail hard.
|
||||||
*/
|
*/
|
||||||
gup_put_folio(page_folio(page), 1,
|
gup_put_folio(folio, 1,
|
||||||
foll_flags);
|
foll_flags);
|
||||||
ret = -EFAULT;
|
ret = -EFAULT;
|
||||||
goto out;
|
goto out;
|
||||||
@ -2797,7 +2811,6 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
|
|||||||
* This code is based heavily on the PowerPC implementation by Nick Piggin.
|
* This code is based heavily on the PowerPC implementation by Nick Piggin.
|
||||||
*/
|
*/
|
||||||
#ifdef CONFIG_HAVE_GUP_FAST
|
#ifdef CONFIG_HAVE_GUP_FAST
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Used in the GUP-fast path to determine whether GUP is permitted to work on
|
* Used in the GUP-fast path to determine whether GUP is permitted to work on
|
||||||
* a specific folio.
|
* a specific folio.
|
||||||
@ -2962,7 +2975,7 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
|
|||||||
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
|
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
|
||||||
page = pte_page(pte);
|
page = pte_page(pte);
|
||||||
|
|
||||||
folio = try_grab_folio(page, 1, flags);
|
folio = try_grab_folio_fast(page, 1, flags);
|
||||||
if (!folio)
|
if (!folio)
|
||||||
goto pte_unmap;
|
goto pte_unmap;
|
||||||
|
|
||||||
@ -3049,7 +3062,7 @@ static int gup_fast_devmap_leaf(unsigned long pfn, unsigned long addr,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
folio = try_grab_folio(page, 1, flags);
|
folio = try_grab_folio_fast(page, 1, flags);
|
||||||
if (!folio) {
|
if (!folio) {
|
||||||
gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
|
gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
|
||||||
break;
|
break;
|
||||||
@ -3138,7 +3151,7 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr,
|
|||||||
page = pmd_page(orig);
|
page = pmd_page(orig);
|
||||||
refs = record_subpages(page, PMD_SIZE, addr, end, pages + *nr);
|
refs = record_subpages(page, PMD_SIZE, addr, end, pages + *nr);
|
||||||
|
|
||||||
folio = try_grab_folio(page, refs, flags);
|
folio = try_grab_folio_fast(page, refs, flags);
|
||||||
if (!folio)
|
if (!folio)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
@ -3182,7 +3195,7 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr,
|
|||||||
page = pud_page(orig);
|
page = pud_page(orig);
|
||||||
refs = record_subpages(page, PUD_SIZE, addr, end, pages + *nr);
|
refs = record_subpages(page, PUD_SIZE, addr, end, pages + *nr);
|
||||||
|
|
||||||
folio = try_grab_folio(page, refs, flags);
|
folio = try_grab_folio_fast(page, refs, flags);
|
||||||
if (!folio)
|
if (!folio)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
@ -3222,7 +3235,7 @@ static int gup_fast_pgd_leaf(pgd_t orig, pgd_t *pgdp, unsigned long addr,
|
|||||||
page = pgd_page(orig);
|
page = pgd_page(orig);
|
||||||
refs = record_subpages(page, PGDIR_SIZE, addr, end, pages + *nr);
|
refs = record_subpages(page, PGDIR_SIZE, addr, end, pages + *nr);
|
||||||
|
|
||||||
folio = try_grab_folio(page, refs, flags);
|
folio = try_grab_folio_fast(page, refs, flags);
|
||||||
if (!folio)
|
if (!folio)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
@ -3276,7 +3289,8 @@ static int gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
|
|||||||
* pmd format and THP pmd format
|
* pmd format and THP pmd format
|
||||||
*/
|
*/
|
||||||
if (gup_hugepd(NULL, __hugepd(pmd_val(pmd)), addr,
|
if (gup_hugepd(NULL, __hugepd(pmd_val(pmd)), addr,
|
||||||
PMD_SHIFT, next, flags, pages, nr) != 1)
|
PMD_SHIFT, next, flags, pages, nr,
|
||||||
|
true) != 1)
|
||||||
return 0;
|
return 0;
|
||||||
} else if (!gup_fast_pte_range(pmd, pmdp, addr, next, flags,
|
} else if (!gup_fast_pte_range(pmd, pmdp, addr, next, flags,
|
||||||
pages, nr))
|
pages, nr))
|
||||||
@ -3306,7 +3320,8 @@ static int gup_fast_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr,
|
|||||||
return 0;
|
return 0;
|
||||||
} else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
|
} else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
|
||||||
if (gup_hugepd(NULL, __hugepd(pud_val(pud)), addr,
|
if (gup_hugepd(NULL, __hugepd(pud_val(pud)), addr,
|
||||||
PUD_SHIFT, next, flags, pages, nr) != 1)
|
PUD_SHIFT, next, flags, pages, nr,
|
||||||
|
true) != 1)
|
||||||
return 0;
|
return 0;
|
||||||
} else if (!gup_fast_pmd_range(pudp, pud, addr, next, flags,
|
} else if (!gup_fast_pmd_range(pudp, pud, addr, next, flags,
|
||||||
pages, nr))
|
pages, nr))
|
||||||
@ -3333,7 +3348,8 @@ static int gup_fast_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
|
|||||||
BUILD_BUG_ON(p4d_leaf(p4d));
|
BUILD_BUG_ON(p4d_leaf(p4d));
|
||||||
if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
|
if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
|
||||||
if (gup_hugepd(NULL, __hugepd(p4d_val(p4d)), addr,
|
if (gup_hugepd(NULL, __hugepd(p4d_val(p4d)), addr,
|
||||||
P4D_SHIFT, next, flags, pages, nr) != 1)
|
P4D_SHIFT, next, flags, pages, nr,
|
||||||
|
true) != 1)
|
||||||
return 0;
|
return 0;
|
||||||
} else if (!gup_fast_pud_range(p4dp, p4d, addr, next, flags,
|
} else if (!gup_fast_pud_range(p4dp, p4d, addr, next, flags,
|
||||||
pages, nr))
|
pages, nr))
|
||||||
@ -3362,7 +3378,8 @@ static void gup_fast_pgd_range(unsigned long addr, unsigned long end,
|
|||||||
return;
|
return;
|
||||||
} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
|
} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
|
||||||
if (gup_hugepd(NULL, __hugepd(pgd_val(pgd)), addr,
|
if (gup_hugepd(NULL, __hugepd(pgd_val(pgd)), addr,
|
||||||
PGDIR_SHIFT, next, flags, pages, nr) != 1)
|
PGDIR_SHIFT, next, flags, pages, nr,
|
||||||
|
true) != 1)
|
||||||
return;
|
return;
|
||||||
} else if (!gup_fast_p4d_range(pgdp, pgd, addr, next, flags,
|
} else if (!gup_fast_p4d_range(pgdp, pgd, addr, next, flags,
|
||||||
pages, nr))
|
pages, nr))
|
||||||
|
@ -1333,7 +1333,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
|
|||||||
if (!*pgmap)
|
if (!*pgmap)
|
||||||
return ERR_PTR(-EFAULT);
|
return ERR_PTR(-EFAULT);
|
||||||
page = pfn_to_page(pfn);
|
page = pfn_to_page(pfn);
|
||||||
ret = try_grab_page(page, flags);
|
ret = try_grab_folio(page_folio(page), 1, flags);
|
||||||
if (ret)
|
if (ret)
|
||||||
page = ERR_PTR(ret);
|
page = ERR_PTR(ret);
|
||||||
|
|
||||||
|
53
mm/hugetlb.c
53
mm/hugetlb.c
@ -1629,13 +1629,10 @@ static inline void destroy_compound_gigantic_folio(struct folio *folio,
|
|||||||
* folio appears as just a compound page. Otherwise, wait until after
|
* folio appears as just a compound page. Otherwise, wait until after
|
||||||
* allocating vmemmap to clear the flag.
|
* allocating vmemmap to clear the flag.
|
||||||
*
|
*
|
||||||
* A reference is held on the folio, except in the case of demote.
|
|
||||||
*
|
|
||||||
* Must be called with hugetlb lock held.
|
* Must be called with hugetlb lock held.
|
||||||
*/
|
*/
|
||||||
static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
|
static void remove_hugetlb_folio(struct hstate *h, struct folio *folio,
|
||||||
bool adjust_surplus,
|
bool adjust_surplus)
|
||||||
bool demote)
|
|
||||||
{
|
{
|
||||||
int nid = folio_nid(folio);
|
int nid = folio_nid(folio);
|
||||||
|
|
||||||
@ -1649,6 +1646,7 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
|
|||||||
list_del(&folio->lru);
|
list_del(&folio->lru);
|
||||||
|
|
||||||
if (folio_test_hugetlb_freed(folio)) {
|
if (folio_test_hugetlb_freed(folio)) {
|
||||||
|
folio_clear_hugetlb_freed(folio);
|
||||||
h->free_huge_pages--;
|
h->free_huge_pages--;
|
||||||
h->free_huge_pages_node[nid]--;
|
h->free_huge_pages_node[nid]--;
|
||||||
}
|
}
|
||||||
@ -1665,33 +1663,13 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
|
|||||||
if (!folio_test_hugetlb_vmemmap_optimized(folio))
|
if (!folio_test_hugetlb_vmemmap_optimized(folio))
|
||||||
__folio_clear_hugetlb(folio);
|
__folio_clear_hugetlb(folio);
|
||||||
|
|
||||||
/*
|
|
||||||
* In the case of demote we do not ref count the page as it will soon
|
|
||||||
* be turned into a page of smaller size.
|
|
||||||
*/
|
|
||||||
if (!demote)
|
|
||||||
folio_ref_unfreeze(folio, 1);
|
|
||||||
|
|
||||||
h->nr_huge_pages--;
|
h->nr_huge_pages--;
|
||||||
h->nr_huge_pages_node[nid]--;
|
h->nr_huge_pages_node[nid]--;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void remove_hugetlb_folio(struct hstate *h, struct folio *folio,
|
|
||||||
bool adjust_surplus)
|
|
||||||
{
|
|
||||||
__remove_hugetlb_folio(h, folio, adjust_surplus, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void remove_hugetlb_folio_for_demote(struct hstate *h, struct folio *folio,
|
|
||||||
bool adjust_surplus)
|
|
||||||
{
|
|
||||||
__remove_hugetlb_folio(h, folio, adjust_surplus, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void add_hugetlb_folio(struct hstate *h, struct folio *folio,
|
static void add_hugetlb_folio(struct hstate *h, struct folio *folio,
|
||||||
bool adjust_surplus)
|
bool adjust_surplus)
|
||||||
{
|
{
|
||||||
int zeroed;
|
|
||||||
int nid = folio_nid(folio);
|
int nid = folio_nid(folio);
|
||||||
|
|
||||||
VM_BUG_ON_FOLIO(!folio_test_hugetlb_vmemmap_optimized(folio), folio);
|
VM_BUG_ON_FOLIO(!folio_test_hugetlb_vmemmap_optimized(folio), folio);
|
||||||
@ -1715,21 +1693,6 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio,
|
|||||||
*/
|
*/
|
||||||
folio_set_hugetlb_vmemmap_optimized(folio);
|
folio_set_hugetlb_vmemmap_optimized(folio);
|
||||||
|
|
||||||
/*
|
|
||||||
* This folio is about to be managed by the hugetlb allocator and
|
|
||||||
* should have no users. Drop our reference, and check for others
|
|
||||||
* just in case.
|
|
||||||
*/
|
|
||||||
zeroed = folio_put_testzero(folio);
|
|
||||||
if (unlikely(!zeroed))
|
|
||||||
/*
|
|
||||||
* It is VERY unlikely soneone else has taken a ref
|
|
||||||
* on the folio. In this case, we simply return as
|
|
||||||
* free_huge_folio() will be called when this other ref
|
|
||||||
* is dropped.
|
|
||||||
*/
|
|
||||||
return;
|
|
||||||
|
|
||||||
arch_clear_hugetlb_flags(folio);
|
arch_clear_hugetlb_flags(folio);
|
||||||
enqueue_hugetlb_folio(h, folio);
|
enqueue_hugetlb_folio(h, folio);
|
||||||
}
|
}
|
||||||
@ -1783,6 +1746,8 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
|
|||||||
spin_unlock_irq(&hugetlb_lock);
|
spin_unlock_irq(&hugetlb_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
folio_ref_unfreeze(folio, 1);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Non-gigantic pages demoted from CMA allocated gigantic pages
|
* Non-gigantic pages demoted from CMA allocated gigantic pages
|
||||||
* need to be given back to CMA in free_gigantic_folio.
|
* need to be given back to CMA in free_gigantic_folio.
|
||||||
@ -3106,11 +3071,8 @@ retry:
|
|||||||
|
|
||||||
free_new:
|
free_new:
|
||||||
spin_unlock_irq(&hugetlb_lock);
|
spin_unlock_irq(&hugetlb_lock);
|
||||||
if (new_folio) {
|
if (new_folio)
|
||||||
/* Folio has a zero ref count, but needs a ref to be freed */
|
|
||||||
folio_ref_unfreeze(new_folio, 1);
|
|
||||||
update_and_free_hugetlb_folio(h, new_folio, false);
|
update_and_free_hugetlb_folio(h, new_folio, false);
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -3965,7 +3927,7 @@ static int demote_free_hugetlb_folio(struct hstate *h, struct folio *folio)
|
|||||||
|
|
||||||
target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order);
|
target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order);
|
||||||
|
|
||||||
remove_hugetlb_folio_for_demote(h, folio, false);
|
remove_hugetlb_folio(h, folio, false);
|
||||||
spin_unlock_irq(&hugetlb_lock);
|
spin_unlock_irq(&hugetlb_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -3979,7 +3941,6 @@ static int demote_free_hugetlb_folio(struct hstate *h, struct folio *folio)
|
|||||||
if (rc) {
|
if (rc) {
|
||||||
/* Allocation of vmemmmap failed, we can not demote folio */
|
/* Allocation of vmemmmap failed, we can not demote folio */
|
||||||
spin_lock_irq(&hugetlb_lock);
|
spin_lock_irq(&hugetlb_lock);
|
||||||
folio_ref_unfreeze(folio, 1);
|
|
||||||
add_hugetlb_folio(h, folio, false);
|
add_hugetlb_folio(h, folio, false);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
@ -455,6 +455,8 @@ static int __hugetlb_vmemmap_restore_folio(const struct hstate *h,
|
|||||||
unsigned long vmemmap_reuse;
|
unsigned long vmemmap_reuse;
|
||||||
|
|
||||||
VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio);
|
VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio);
|
||||||
|
VM_WARN_ON_ONCE_FOLIO(folio_ref_count(folio), folio);
|
||||||
|
|
||||||
if (!folio_test_hugetlb_vmemmap_optimized(folio))
|
if (!folio_test_hugetlb_vmemmap_optimized(folio))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
@ -490,6 +492,9 @@ static int __hugetlb_vmemmap_restore_folio(const struct hstate *h,
|
|||||||
*/
|
*/
|
||||||
int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio)
|
int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio)
|
||||||
{
|
{
|
||||||
|
/* avoid writes from page_ref_add_unless() while unfolding vmemmap */
|
||||||
|
synchronize_rcu();
|
||||||
|
|
||||||
return __hugetlb_vmemmap_restore_folio(h, folio, 0);
|
return __hugetlb_vmemmap_restore_folio(h, folio, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -514,6 +519,9 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h,
|
|||||||
long restored = 0;
|
long restored = 0;
|
||||||
long ret = 0;
|
long ret = 0;
|
||||||
|
|
||||||
|
/* avoid writes from page_ref_add_unless() while unfolding vmemmap */
|
||||||
|
synchronize_rcu();
|
||||||
|
|
||||||
list_for_each_entry_safe(folio, t_folio, folio_list, lru) {
|
list_for_each_entry_safe(folio, t_folio, folio_list, lru) {
|
||||||
if (folio_test_hugetlb_vmemmap_optimized(folio)) {
|
if (folio_test_hugetlb_vmemmap_optimized(folio)) {
|
||||||
ret = __hugetlb_vmemmap_restore_folio(h, folio,
|
ret = __hugetlb_vmemmap_restore_folio(h, folio,
|
||||||
@ -559,6 +567,8 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
|
|||||||
unsigned long vmemmap_reuse;
|
unsigned long vmemmap_reuse;
|
||||||
|
|
||||||
VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio);
|
VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio);
|
||||||
|
VM_WARN_ON_ONCE_FOLIO(folio_ref_count(folio), folio);
|
||||||
|
|
||||||
if (!vmemmap_should_optimize_folio(h, folio))
|
if (!vmemmap_should_optimize_folio(h, folio))
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
@ -610,6 +620,9 @@ void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio)
|
|||||||
{
|
{
|
||||||
LIST_HEAD(vmemmap_pages);
|
LIST_HEAD(vmemmap_pages);
|
||||||
|
|
||||||
|
/* avoid writes from page_ref_add_unless() while folding vmemmap */
|
||||||
|
synchronize_rcu();
|
||||||
|
|
||||||
__hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, 0);
|
__hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, 0);
|
||||||
free_vmemmap_page_list(&vmemmap_pages);
|
free_vmemmap_page_list(&vmemmap_pages);
|
||||||
}
|
}
|
||||||
@ -653,6 +666,9 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
|
|||||||
|
|
||||||
flush_tlb_all();
|
flush_tlb_all();
|
||||||
|
|
||||||
|
/* avoid writes from page_ref_add_unless() while folding vmemmap */
|
||||||
|
synchronize_rcu();
|
||||||
|
|
||||||
list_for_each_entry(folio, folio_list, lru) {
|
list_for_each_entry(folio, folio_list, lru) {
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
@ -1226,8 +1226,8 @@ int migrate_device_coherent_page(struct page *page);
|
|||||||
/*
|
/*
|
||||||
* mm/gup.c
|
* mm/gup.c
|
||||||
*/
|
*/
|
||||||
struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);
|
int __must_check try_grab_folio(struct folio *folio, int refs,
|
||||||
int __must_check try_grab_page(struct page *page, unsigned int flags);
|
unsigned int flags);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* mm/huge_memory.c
|
* mm/huge_memory.c
|
||||||
|
@ -4948,17 +4948,6 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new)
|
|||||||
|
|
||||||
/* Transfer the charge and the css ref */
|
/* Transfer the charge and the css ref */
|
||||||
commit_charge(new, memcg);
|
commit_charge(new, memcg);
|
||||||
/*
|
|
||||||
* If the old folio is a large folio and is in the split queue, it needs
|
|
||||||
* to be removed from the split queue now, in case getting an incorrect
|
|
||||||
* split queue in destroy_large_folio() after the memcg of the old folio
|
|
||||||
* is cleared.
|
|
||||||
*
|
|
||||||
* In addition, the old folio is about to be freed after migration, so
|
|
||||||
* removing from the split queue a bit earlier seems reasonable.
|
|
||||||
*/
|
|
||||||
if (folio_test_large(old) && folio_test_large_rmappable(old))
|
|
||||||
folio_undo_large_rmappable(old);
|
|
||||||
old->memcg_data = 0;
|
old->memcg_data = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
13
mm/migrate.c
13
mm/migrate.c
@ -415,6 +415,15 @@ int folio_migrate_mapping(struct address_space *mapping,
|
|||||||
if (folio_ref_count(folio) != expected_count)
|
if (folio_ref_count(folio) != expected_count)
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
|
|
||||||
|
/* Take off deferred split queue while frozen and memcg set */
|
||||||
|
if (folio_test_large(folio) &&
|
||||||
|
folio_test_large_rmappable(folio)) {
|
||||||
|
if (!folio_ref_freeze(folio, expected_count))
|
||||||
|
return -EAGAIN;
|
||||||
|
folio_undo_large_rmappable(folio);
|
||||||
|
folio_ref_unfreeze(folio, expected_count);
|
||||||
|
}
|
||||||
|
|
||||||
/* No turning back from here */
|
/* No turning back from here */
|
||||||
newfolio->index = folio->index;
|
newfolio->index = folio->index;
|
||||||
newfolio->mapping = folio->mapping;
|
newfolio->mapping = folio->mapping;
|
||||||
@ -433,6 +442,10 @@ int folio_migrate_mapping(struct address_space *mapping,
|
|||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Take off deferred split queue while frozen and memcg set */
|
||||||
|
if (folio_test_large(folio) && folio_test_large_rmappable(folio))
|
||||||
|
folio_undo_large_rmappable(folio);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now we know that no one else is looking at the folio:
|
* Now we know that no one else is looking at the folio:
|
||||||
* no turning back from here.
|
* no turning back from here.
|
||||||
|
@ -449,11 +449,11 @@ void page_cache_ra_order(struct readahead_control *ractl,
|
|||||||
|
|
||||||
limit = min(limit, index + ra->size - 1);
|
limit = min(limit, index + ra->size - 1);
|
||||||
|
|
||||||
if (new_order < MAX_PAGECACHE_ORDER) {
|
if (new_order < MAX_PAGECACHE_ORDER)
|
||||||
new_order += 2;
|
new_order += 2;
|
||||||
new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order);
|
|
||||||
new_order = min_t(unsigned int, new_order, ilog2(ra->size));
|
new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order);
|
||||||
}
|
new_order = min_t(unsigned int, new_order, ilog2(ra->size));
|
||||||
|
|
||||||
/* See comment in page_cache_ra_unbounded() */
|
/* See comment in page_cache_ra_unbounded() */
|
||||||
nofs = memalloc_nofs_save();
|
nofs = memalloc_nofs_save();
|
||||||
|
15
mm/shmem.c
15
mm/shmem.c
@ -548,8 +548,9 @@ static bool shmem_confirm_swap(struct address_space *mapping,
|
|||||||
|
|
||||||
static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER;
|
static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER;
|
||||||
|
|
||||||
bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
|
static bool __shmem_is_huge(struct inode *inode, pgoff_t index,
|
||||||
struct mm_struct *mm, unsigned long vm_flags)
|
bool shmem_huge_force, struct mm_struct *mm,
|
||||||
|
unsigned long vm_flags)
|
||||||
{
|
{
|
||||||
loff_t i_size;
|
loff_t i_size;
|
||||||
|
|
||||||
@ -580,6 +581,16 @@ bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool shmem_is_huge(struct inode *inode, pgoff_t index,
|
||||||
|
bool shmem_huge_force, struct mm_struct *mm,
|
||||||
|
unsigned long vm_flags)
|
||||||
|
{
|
||||||
|
if (HPAGE_PMD_ORDER > MAX_PAGECACHE_ORDER)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return __shmem_is_huge(inode, index, shmem_huge_force, mm, vm_flags);
|
||||||
|
}
|
||||||
|
|
||||||
#if defined(CONFIG_SYSFS)
|
#if defined(CONFIG_SYSFS)
|
||||||
static int shmem_parse_huge(const char *str)
|
static int shmem_parse_huge(const char *str)
|
||||||
{
|
{
|
||||||
|
10
mm/vmalloc.c
10
mm/vmalloc.c
@ -2544,7 +2544,15 @@ static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
|
|||||||
static struct xarray *
|
static struct xarray *
|
||||||
addr_to_vb_xa(unsigned long addr)
|
addr_to_vb_xa(unsigned long addr)
|
||||||
{
|
{
|
||||||
int index = (addr / VMAP_BLOCK_SIZE) % num_possible_cpus();
|
int index = (addr / VMAP_BLOCK_SIZE) % nr_cpu_ids;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Please note, nr_cpu_ids points on a highest set
|
||||||
|
* possible bit, i.e. we never invoke cpumask_next()
|
||||||
|
* if an index points on it which is nr_cpu_ids - 1.
|
||||||
|
*/
|
||||||
|
if (!cpu_possible(index))
|
||||||
|
index = cpumask_next(index, cpu_possible_mask);
|
||||||
|
|
||||||
return &per_cpu(vmap_block_queue, index).vmap_blocks;
|
return &per_cpu(vmap_block_queue, index).vmap_blocks;
|
||||||
}
|
}
|
||||||
|
@ -412,10 +412,12 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg)
|
|||||||
* @file: whether the corresponding folio is from the file lru.
|
* @file: whether the corresponding folio is from the file lru.
|
||||||
* @workingset: where the workingset value unpacked from shadow should
|
* @workingset: where the workingset value unpacked from shadow should
|
||||||
* be stored.
|
* be stored.
|
||||||
|
* @flush: whether to flush cgroup rstat.
|
||||||
*
|
*
|
||||||
* Return: true if the shadow is for a recently evicted folio; false otherwise.
|
* Return: true if the shadow is for a recently evicted folio; false otherwise.
|
||||||
*/
|
*/
|
||||||
bool workingset_test_recent(void *shadow, bool file, bool *workingset)
|
bool workingset_test_recent(void *shadow, bool file, bool *workingset,
|
||||||
|
bool flush)
|
||||||
{
|
{
|
||||||
struct mem_cgroup *eviction_memcg;
|
struct mem_cgroup *eviction_memcg;
|
||||||
struct lruvec *eviction_lruvec;
|
struct lruvec *eviction_lruvec;
|
||||||
@ -467,10 +469,16 @@ bool workingset_test_recent(void *shadow, bool file, bool *workingset)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Flush stats (and potentially sleep) outside the RCU read section.
|
* Flush stats (and potentially sleep) outside the RCU read section.
|
||||||
|
*
|
||||||
|
* Note that workingset_test_recent() itself might be called in RCU read
|
||||||
|
* section (for e.g, in cachestat) - these callers need to skip flushing
|
||||||
|
* stats (via the flush argument).
|
||||||
|
*
|
||||||
* XXX: With per-memcg flushing and thresholding, is ratelimiting
|
* XXX: With per-memcg flushing and thresholding, is ratelimiting
|
||||||
* still needed here?
|
* still needed here?
|
||||||
*/
|
*/
|
||||||
mem_cgroup_flush_stats_ratelimited(eviction_memcg);
|
if (flush)
|
||||||
|
mem_cgroup_flush_stats_ratelimited(eviction_memcg);
|
||||||
|
|
||||||
eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
|
eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
|
||||||
refault = atomic_long_read(&eviction_lruvec->nonresident_age);
|
refault = atomic_long_read(&eviction_lruvec->nonresident_age);
|
||||||
@ -558,7 +566,7 @@ void workingset_refault(struct folio *folio, void *shadow)
|
|||||||
|
|
||||||
mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);
|
mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);
|
||||||
|
|
||||||
if (!workingset_test_recent(shadow, file, &workingset))
|
if (!workingset_test_recent(shadow, file, &workingset, true))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
folio_set_active(folio);
|
folio_set_active(folio);
|
||||||
|
Loading…
Reference in New Issue
Block a user