Merge branch 'mm-hotfixes-unstable' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

This commit is contained in:
Stephen Rothwell 2024-08-29 08:32:49 +10:00
commit 30d8c4b01f
17 changed files with 187 additions and 127 deletions

View File

@ -1717,9 +1717,10 @@ The following nested keys are defined.
entries fault back in or are written out to disk.
memory.zswap.writeback
A read-write single value file. The default value is "1". The
initial value of the root cgroup is 1, and when a new cgroup is
created, it inherits the current value of its parent.
A read-write single value file. The default value is "1".
Note that this setting is hierarchical, i.e. the writeback would be
implicitly disabled for child cgroups if the upper hierarchy
does so.
When this is set to 0, all swapping attempts to swapping devices
are disabled. This included both zswap writebacks, and swapping due

View File

@ -715,6 +715,33 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
brelse(bh);
}
/**
* nilfs_abort_roll_forward - cleaning up after a failed rollforward recovery
* @nilfs: nilfs object
*/
static void nilfs_abort_roll_forward(struct the_nilfs *nilfs)
{
struct nilfs_inode_info *ii, *n;
LIST_HEAD(head);
/* Abandon inodes that have read recovery data */
spin_lock(&nilfs->ns_inode_lock);
list_splice_init(&nilfs->ns_dirty_files, &head);
spin_unlock(&nilfs->ns_inode_lock);
if (list_empty(&head))
return;
set_nilfs_purging(nilfs);
list_for_each_entry_safe(ii, n, &head, i_dirty) {
spin_lock(&nilfs->ns_inode_lock);
list_del_init(&ii->i_dirty);
spin_unlock(&nilfs->ns_inode_lock);
iput(&ii->vfs_inode);
}
clear_nilfs_purging(nilfs);
}
/**
* nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint
* @nilfs: nilfs object
@ -773,15 +800,19 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs,
if (unlikely(err)) {
nilfs_err(sb, "error %d writing segment for recovery",
err);
goto failed;
goto put_root;
}
nilfs_finish_roll_forward(nilfs, ri);
}
failed:
put_root:
nilfs_put_root(root);
return err;
failed:
nilfs_abort_roll_forward(nilfs);
goto put_root;
}
/**

View File

@ -1812,6 +1812,9 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
nilfs_abort_logs(&logs, ret ? : err);
list_splice_tail_init(&sci->sc_segbufs, &logs);
if (list_empty(&logs))
return; /* if the first segment buffer preparation failed */
nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
nilfs_free_incomplete_logs(&logs, nilfs);
@ -2056,7 +2059,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
err = nilfs_segctor_begin_construction(sci, nilfs);
if (unlikely(err))
goto out;
goto failed;
/* Update time stamp */
sci->sc_seg_ctime = ktime_get_real_seconds();
@ -2120,10 +2123,9 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
return err;
failed_to_write:
if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
nilfs_redirty_inodes(&sci->sc_dirty_files);
failed:
if (mode == SC_LSEG_SR && nilfs_sc_cstage_get(sci) >= NILFS_ST_IFILE)
nilfs_redirty_inodes(&sci->sc_dirty_files);
if (nilfs_doing_gc())
nilfs_redirty_inodes(&sci->sc_gc_inodes);
nilfs_segctor_abort_construction(sci, nilfs, err);

View File

@ -836,9 +836,15 @@ ssize_t nilfs_dev_revision_show(struct nilfs_dev_attr *attr,
struct the_nilfs *nilfs,
char *buf)
{
struct nilfs_super_block **sbp = nilfs->ns_sbp;
u32 major = le32_to_cpu(sbp[0]->s_rev_level);
u16 minor = le16_to_cpu(sbp[0]->s_minor_rev_level);
struct nilfs_super_block *raw_sb;
u32 major;
u16 minor;
down_read(&nilfs->ns_sem);
raw_sb = nilfs->ns_sbp[0];
major = le32_to_cpu(raw_sb->s_rev_level);
minor = le16_to_cpu(raw_sb->s_minor_rev_level);
up_read(&nilfs->ns_sem);
return sysfs_emit(buf, "%d.%d\n", major, minor);
}
@ -856,8 +862,13 @@ ssize_t nilfs_dev_device_size_show(struct nilfs_dev_attr *attr,
struct the_nilfs *nilfs,
char *buf)
{
struct nilfs_super_block **sbp = nilfs->ns_sbp;
u64 dev_size = le64_to_cpu(sbp[0]->s_dev_size);
struct nilfs_super_block *raw_sb;
u64 dev_size;
down_read(&nilfs->ns_sem);
raw_sb = nilfs->ns_sbp[0];
dev_size = le64_to_cpu(raw_sb->s_dev_size);
up_read(&nilfs->ns_sem);
return sysfs_emit(buf, "%llu\n", dev_size);
}
@ -879,9 +890,15 @@ ssize_t nilfs_dev_uuid_show(struct nilfs_dev_attr *attr,
struct the_nilfs *nilfs,
char *buf)
{
struct nilfs_super_block **sbp = nilfs->ns_sbp;
struct nilfs_super_block *raw_sb;
ssize_t len;
return sysfs_emit(buf, "%pUb\n", sbp[0]->s_uuid);
down_read(&nilfs->ns_sem);
raw_sb = nilfs->ns_sbp[0];
len = sysfs_emit(buf, "%pUb\n", raw_sb->s_uuid);
up_read(&nilfs->ns_sem);
return len;
}
static
@ -889,10 +906,16 @@ ssize_t nilfs_dev_volume_name_show(struct nilfs_dev_attr *attr,
struct the_nilfs *nilfs,
char *buf)
{
struct nilfs_super_block **sbp = nilfs->ns_sbp;
struct nilfs_super_block *raw_sb;
ssize_t len;
return scnprintf(buf, sizeof(sbp[0]->s_volume_name), "%s\n",
sbp[0]->s_volume_name);
down_read(&nilfs->ns_sem);
raw_sb = nilfs->ns_sbp[0];
len = scnprintf(buf, sizeof(raw_sb->s_volume_name), "%s\n",
raw_sb->s_volume_name);
up_read(&nilfs->ns_sem);
return len;
}
static const char dev_readme_str[] =

View File

@ -235,7 +235,6 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
if (bhs[i] == NULL) {
bhs[i] = sb_getblk(sb, block++);
if (bhs[i] == NULL) {
ocfs2_metadata_cache_io_unlock(ci);
status = -ENOMEM;
mlog_errno(status);
/* Don't forget to put previous bh! */
@ -389,7 +388,8 @@ read_failure:
/* Always set the buffer in the cache, even if it was
* a forced read, or read-ahead which hasn't yet
* completed. */
ocfs2_set_buffer_uptodate(ci, bh);
if (bh)
ocfs2_set_buffer_uptodate(ci, bh);
}
ocfs2_metadata_cache_io_unlock(ci);

View File

@ -752,7 +752,7 @@ static int kexec_calculate_store_digests(struct kimage *image)
#ifdef CONFIG_CRASH_HOTPLUG
/* Exclude elfcorehdr segment to allow future changes via hotplug */
if (j == image->elfcorehdr_index)
if (i == image->elfcorehdr_index)
continue;
#endif

View File

@ -509,21 +509,17 @@ void __init padata_do_multithreaded(struct padata_mt_job *job)
/*
* Chunk size is the amount of work a helper does per call to the
* thread function. Load balance large jobs between threads by
* thread function. Load balance large jobs between threads by
* increasing the number of chunks, guarantee at least the minimum
* chunk size from the caller, and honor the caller's alignment.
* Ensure chunk_size is at least 1 to prevent divide-by-0
* panic in padata_mt_helper().
*/
ps.chunk_size = job->size / (ps.nworks * load_balance_factor);
ps.chunk_size = max(ps.chunk_size, job->min_chunk);
ps.chunk_size = max(ps.chunk_size, 1ul);
ps.chunk_size = roundup(ps.chunk_size, job->align);
/*
* chunk_size can be 0 if the caller sets min_chunk to 0. So force it
* to at least 1 to prevent divide-by-0 panic in padata_mt_helper().`
*/
if (!ps.chunk_size)
ps.chunk_size = 1U;
list_for_each_entry(pw, &works, pw_list)
if (job->numa_aware) {
int old_node = atomic_read(&last_used_nid);

View File

@ -7566,14 +7566,14 @@ static void mt_validate_nulls(struct maple_tree *mt)
* 2. The gap is correctly set in the parents
*/
void mt_validate(struct maple_tree *mt)
__must_hold(mas->tree->ma_lock)
{
unsigned char end;
MA_STATE(mas, mt, 0, 0);
rcu_read_lock();
mas_start(&mas);
if (!mas_is_active(&mas))
goto done;
return;
while (!mte_is_leaf(mas.node))
mas_descend(&mas);
@ -7594,9 +7594,6 @@ void mt_validate(struct maple_tree *mt)
mas_dfs_postorder(&mas, ULONG_MAX);
}
mt_validate_nulls(mt);
done:
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(mt_validate);

View File

@ -3613,8 +3613,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
memcg1_soft_limit_reset(memcg);
#ifdef CONFIG_ZSWAP
memcg->zswap_max = PAGE_COUNTER_MAX;
WRITE_ONCE(memcg->zswap_writeback,
!parent || READ_ONCE(parent->zswap_writeback));
WRITE_ONCE(memcg->zswap_writeback, true);
#endif
page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX);
if (parent) {
@ -5320,7 +5319,14 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg)
{
/* if zswap is disabled, do not block pages going to the swapping device */
return !zswap_is_enabled() || !memcg || READ_ONCE(memcg->zswap_writeback);
if (!zswap_is_enabled())
return true;
for (; memcg; memcg = parent_mem_cgroup(memcg))
if (!READ_ONCE(memcg->zswap_writeback))
return false;
return true;
}
static u64 zswap_current_read(struct cgroup_subsys_state *css,

View File

@ -1054,6 +1054,13 @@ __always_inline bool free_pages_prepare(struct page *page,
reset_page_owner(page, order);
page_table_check_free(page, order);
pgalloc_tag_sub(page, 1 << order);
/*
* The page is isolated and accounted for.
* Mark the codetag as empty to avoid accounting error
* when the page is freed by unpoison_memory().
*/
clear_page_tag_ref(page);
return false;
}

View File

@ -2116,6 +2116,10 @@ alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
if (!mem_alloc_profiling_enabled())
return;
/* slab->obj_exts might not be NULL if it was created for MEMCG accounting. */
if (s->flags & (SLAB_NO_OBJ_EXT | SLAB_NOLEAKTRACE))
return;
obj_exts = slab_obj_exts(slab);
if (!obj_exts)
return;

View File

@ -787,27 +787,30 @@ retry:
}
dst_pmdval = pmdp_get_lockless(dst_pmd);
/*
* If the dst_pmd is mapped as THP don't
* override it and just be strict.
*/
if (unlikely(pmd_trans_huge(dst_pmdval))) {
err = -EEXIST;
break;
}
if (unlikely(pmd_none(dst_pmdval)) &&
unlikely(__pte_alloc(dst_mm, dst_pmd))) {
err = -ENOMEM;
break;
}
/* If an huge pmd materialized from under us fail */
if (unlikely(pmd_trans_huge(*dst_pmd))) {
dst_pmdval = pmdp_get_lockless(dst_pmd);
/*
* If the dst_pmd is THP don't override it and just be strict.
* (This includes the case where the PMD used to be THP and
* changed back to none after __pte_alloc().)
*/
if (unlikely(!pmd_present(dst_pmdval) || pmd_trans_huge(dst_pmdval) ||
pmd_devmap(dst_pmdval))) {
err = -EEXIST;
break;
}
if (unlikely(pmd_bad(dst_pmdval))) {
err = -EFAULT;
break;
}
BUG_ON(pmd_none(*dst_pmd));
BUG_ON(pmd_trans_huge(*dst_pmd));
/*
* For shmem mappings, khugepaged is allowed to remove page
* tables under us; pte_offset_map_lock() will deal with that.
*/
err = mfill_atomic_pte(dst_pmd, dst_vma, dst_addr,
src_addr, flags, &folio);

View File

@ -2626,6 +2626,7 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
vb->dirty_max = 0;
bitmap_set(vb->used_map, 0, (1UL << order));
INIT_LIST_HEAD(&vb->free_list);
vb->cpu = raw_smp_processor_id();
xa = addr_to_vb_xa(va->va_start);
vb_idx = addr_to_vb_idx(va->va_start);
@ -2642,7 +2643,6 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
* integrity together with list_for_each_rcu from read
* side.
*/
vb->cpu = raw_smp_processor_id();
vbq = per_cpu_ptr(&vmap_block_queue, vb->cpu);
spin_lock(&vbq->lock);
list_add_tail_rcu(&vb->free_list, &vbq->free);

View File

@ -1604,25 +1604,6 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
}
#ifdef CONFIG_CMA
/*
* It is waste of effort to scan and reclaim CMA pages if it is not available
* for current allocation context. Kswapd can not be enrolled as it can not
* distinguish this scenario by using sc->gfp_mask = GFP_KERNEL
*/
static bool skip_cma(struct folio *folio, struct scan_control *sc)
{
return !current_is_kswapd() &&
gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE &&
folio_migratetype(folio) == MIGRATE_CMA;
}
#else
static bool skip_cma(struct folio *folio, struct scan_control *sc)
{
return false;
}
#endif
/*
* Isolating page from the lruvec to fill in @dst list by nr_to_scan times.
*
@ -1669,8 +1650,7 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
nr_pages = folio_nr_pages(folio);
total_scan += nr_pages;
if (folio_zonenum(folio) > sc->reclaim_idx ||
skip_cma(folio, sc)) {
if (folio_zonenum(folio) > sc->reclaim_idx) {
nr_skipped[folio_zonenum(folio)] += nr_pages;
move_to = &folios_skipped;
goto move;
@ -4320,7 +4300,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
}
/* ineligible */
if (zone > sc->reclaim_idx || skip_cma(folio, sc)) {
if (zone > sc->reclaim_idx) {
gen = folio_inc_gen(lruvec, folio, false);
list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
return true;

View File

@ -62,25 +62,57 @@ if [ "$GFPMASK" = "none" ]; then
fi
# Extract GFP flags from the kernel source
TMPFILE=`mktemp -t gfptranslate-XXXXXX` || exit 1
grep -q ___GFP $SOURCE/include/linux/gfp_types.h
if [ $? -eq 0 ]; then
grep "^#define ___GFP" $SOURCE/include/linux/gfp_types.h | sed -e 's/u$//' | grep -v GFP_BITS > $TMPFILE
else
grep "^#define __GFP" $SOURCE/include/linux/gfp_types.h | sed -e 's/(__force gfp_t)//' | sed -e 's/u)/)/' | grep -v GFP_BITS | sed -e 's/)\//) \//' > $TMPFILE
fi
TMPFILE=`mktemp -t gfptranslate-XXXXXX.c` || exit 1
# Parse the flags
IFS="
"
echo Source: $SOURCE
echo Parsing: $GFPMASK
for LINE in `cat $TMPFILE`; do
MASK=`echo $LINE | awk '{print $3}'`
if [ $(($GFPMASK&$MASK)) -ne 0 ]; then
echo $LINE
fi
done
rm -f $TMPFILE
(
cat <<EOF
#include <stdint.h>
#include <stdio.h>
// Try to fool compiler.h into not including extra stuff
#define __ASSEMBLY__ 1
#include <generated/autoconf.h>
#include <linux/gfp_types.h>
static const char *masks[] = {
EOF
sed -nEe 's/^[[:space:]]+(___GFP_.*)_BIT,.*$/\1/p' $SOURCE/include/linux/gfp_types.h |
while read b; do
cat <<EOF
#if defined($b) && ($b > 0)
[${b}_BIT] = "$b",
#endif
EOF
done
cat <<EOF
};
int main(int argc, char *argv[])
{
unsigned long long mask = $GFPMASK;
for (int i = 0; i < sizeof(mask) * 8; i++) {
unsigned long long bit = 1ULL << i;
if (mask & bit)
printf("\t%-25s0x%llx\n",
(i < ___GFP_LAST_BIT && masks[i]) ?
masks[i] : "*** INVALID ***",
bit);
}
return 0;
}
EOF
) > $TMPFILE
${CC:-gcc} -Wall -o ${TMPFILE}.bin -I $SOURCE/include $TMPFILE && ${TMPFILE}.bin
rm -f $TMPFILE ${TMPFILE}.bin
exit 0

View File

@ -81,17 +81,6 @@ static int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
return sret;
}
static void *sys_mmap(void *addr, unsigned long len, unsigned long prot,
unsigned long flags, unsigned long fd, unsigned long offset)
{
void *sret;
errno = 0;
sret = (void *) syscall(__NR_mmap, addr, len, prot,
flags, fd, offset);
return sret;
}
static int sys_munmap(void *ptr, size_t size)
{
int sret;
@ -172,7 +161,7 @@ static void setup_single_address(int size, void **ptrOut)
{
void *ptr;
ptr = sys_mmap(NULL, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
ptr = mmap(NULL, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
*ptrOut = ptr;
}
@ -181,7 +170,7 @@ static void setup_single_address_rw(int size, void **ptrOut)
void *ptr;
unsigned long mapflags = MAP_ANONYMOUS | MAP_PRIVATE;
ptr = sys_mmap(NULL, size, PROT_READ | PROT_WRITE, mapflags, -1, 0);
ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, mapflags, -1, 0);
*ptrOut = ptr;
}
@ -205,7 +194,7 @@ bool seal_support(void)
void *ptr;
unsigned long page_size = getpagesize();
ptr = sys_mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
ptr = mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (ptr == (void *) -1)
return false;
@ -481,8 +470,8 @@ static void test_seal_zero_address(void)
int prot;
/* use mmap to change protection. */
ptr = sys_mmap(0, size, PROT_NONE,
MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
ptr = mmap(0, size, PROT_NONE,
MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
FAIL_TEST_IF_FALSE(ptr == 0);
size = get_vma_size(ptr, &prot);
@ -1209,8 +1198,8 @@ static void test_seal_mmap_overwrite_prot(bool seal)
}
/* use mmap to change protection. */
ret2 = sys_mmap(ptr, size, PROT_NONE,
MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
ret2 = mmap(ptr, size, PROT_NONE,
MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
if (seal) {
FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
FAIL_TEST_IF_FALSE(errno == EPERM);
@ -1240,8 +1229,8 @@ static void test_seal_mmap_expand(bool seal)
}
/* use mmap to expand. */
ret2 = sys_mmap(ptr, size, PROT_READ,
MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
ret2 = mmap(ptr, size, PROT_READ,
MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
if (seal) {
FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
FAIL_TEST_IF_FALSE(errno == EPERM);
@ -1268,8 +1257,8 @@ static void test_seal_mmap_shrink(bool seal)
}
/* use mmap to shrink. */
ret2 = sys_mmap(ptr, 8 * page_size, PROT_READ,
MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
ret2 = mmap(ptr, 8 * page_size, PROT_READ,
MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
if (seal) {
FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
FAIL_TEST_IF_FALSE(errno == EPERM);
@ -1650,7 +1639,7 @@ static void test_seal_discard_ro_anon_on_filebacked(bool seal)
ret = fallocate(fd, 0, 0, size);
FAIL_TEST_IF_FALSE(!ret);
ptr = sys_mmap(NULL, size, PROT_READ, mapflags, fd, 0);
ptr = mmap(NULL, size, PROT_READ, mapflags, fd, 0);
FAIL_TEST_IF_FALSE(ptr != MAP_FAILED);
if (seal) {
@ -1680,7 +1669,7 @@ static void test_seal_discard_ro_anon_on_shared(bool seal)
int ret;
unsigned long mapflags = MAP_ANONYMOUS | MAP_SHARED;
ptr = sys_mmap(NULL, size, PROT_READ, mapflags, -1, 0);
ptr = mmap(NULL, size, PROT_READ, mapflags, -1, 0);
FAIL_TEST_IF_FALSE(ptr != (void *)-1);
if (seal) {

View File

@ -30,17 +30,6 @@ static int sys_mseal(void *start, size_t len)
return sret;
}
static void *sys_mmap(void *addr, unsigned long len, unsigned long prot,
unsigned long flags, unsigned long fd, unsigned long offset)
{
void *sret;
errno = 0;
sret = (void *) syscall(__NR_mmap, addr, len, prot,
flags, fd, offset);
return sret;
}
static inline int sys_mprotect(void *ptr, size_t size, unsigned long prot)
{
int sret;
@ -56,7 +45,7 @@ static bool seal_support(void)
void *ptr;
unsigned long page_size = getpagesize();
ptr = sys_mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
ptr = mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (ptr == (void *) -1)
return false;