mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-12-05 01:54:09 +08:00
powerpc/64s/hash: add stress_hpt kernel boot option to increase hash faults
This option increases the number of hash misses by limiting the number of kernel HPT entries, by keeping a per-CPU record of the last kernel HPTEs installed, and removing that from the hash table on the next hash insertion. A timer round-robins CPUs removing remaining kernel HPTEs and clearing the TLB (in the case of bare metal) to increase and slightly randomise kernel fault activity. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> [mpe: Add comment about NR_CPUS usage, fixup whitespace] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20221024030150.852517-1-npiggin@gmail.com
This commit is contained in:
parent
dfecd06bc5
commit
6b34a099fa
@ -1042,6 +1042,11 @@
|
||||
them frequently to increase the rate of SLB faults
|
||||
on kernel addresses.
|
||||
|
||||
stress_hpt [PPC]
|
||||
Limits the number of kernel HPT entries in the hash
|
||||
page table to increase the rate of hash page table
|
||||
faults on kernel addresses.
|
||||
|
||||
disable= [IPV6]
|
||||
See Documentation/networking/ipv6.rst.
|
||||
|
||||
|
@ -16,6 +16,8 @@
|
||||
#include <asm/machdep.h>
|
||||
#include <asm/mmu.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
|
||||
pte_t *ptep, unsigned long trap, unsigned long flags,
|
||||
int ssize, int subpg_prot)
|
||||
@ -118,6 +120,9 @@ repeat:
|
||||
}
|
||||
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
|
||||
new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
|
||||
|
||||
if (stress_hpt())
|
||||
hpt_do_stress(ea, hpte_group);
|
||||
}
|
||||
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
|
||||
return 0;
|
||||
|
@ -16,6 +16,8 @@
|
||||
#include <asm/machdep.h>
|
||||
#include <asm/mmu.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
/*
|
||||
* Return true, if the entry has a slot value which
|
||||
* the software considers as invalid.
|
||||
@ -216,6 +218,9 @@ repeat:
|
||||
new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE);
|
||||
new_pte |= H_PAGE_HASHPTE;
|
||||
|
||||
if (stress_hpt())
|
||||
hpt_do_stress(ea, hpte_group);
|
||||
|
||||
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
|
||||
return 0;
|
||||
}
|
||||
@ -327,7 +332,12 @@ repeat:
|
||||
|
||||
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
|
||||
new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
|
||||
|
||||
if (stress_hpt())
|
||||
hpt_do_stress(ea, hpte_group);
|
||||
}
|
||||
|
||||
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -471,7 +471,7 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool disable_1tb_segments = false;
|
||||
static bool disable_1tb_segments __ro_after_init;
|
||||
|
||||
static int __init parse_disable_1tb_segments(char *p)
|
||||
{
|
||||
@ -480,6 +480,40 @@ static int __init parse_disable_1tb_segments(char *p)
|
||||
}
|
||||
early_param("disable_1tb_segments", parse_disable_1tb_segments);
|
||||
|
||||
bool stress_hpt_enabled __initdata;
|
||||
|
||||
static int __init parse_stress_hpt(char *p)
|
||||
{
|
||||
stress_hpt_enabled = true;
|
||||
return 0;
|
||||
}
|
||||
early_param("stress_hpt", parse_stress_hpt);
|
||||
|
||||
__ro_after_init DEFINE_STATIC_KEY_FALSE(stress_hpt_key);
|
||||
|
||||
/*
|
||||
* per-CPU array allocated if we enable stress_hpt.
|
||||
*/
|
||||
#define STRESS_MAX_GROUPS 16
|
||||
struct stress_hpt_struct {
|
||||
unsigned long last_group[STRESS_MAX_GROUPS];
|
||||
};
|
||||
|
||||
static inline int stress_nr_groups(void)
|
||||
{
|
||||
/*
|
||||
* LPAR H_REMOVE flushes TLB, so need some number > 1 of entries
|
||||
* to allow practical forward progress. Bare metal returns 1, which
|
||||
* seems to help uncover more bugs.
|
||||
*/
|
||||
if (firmware_has_feature(FW_FEATURE_LPAR))
|
||||
return STRESS_MAX_GROUPS;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
static struct stress_hpt_struct *stress_hpt_struct;
|
||||
|
||||
static int __init htab_dt_scan_seg_sizes(unsigned long node,
|
||||
const char *uname, int depth,
|
||||
void *data)
|
||||
@ -976,6 +1010,23 @@ static void __init hash_init_partition_table(phys_addr_t hash_table,
|
||||
pr_info("Partition table %p\n", partition_tb);
|
||||
}
|
||||
|
||||
void hpt_clear_stress(void);
|
||||
static struct timer_list stress_hpt_timer;
|
||||
void stress_hpt_timer_fn(struct timer_list *timer)
|
||||
{
|
||||
int next_cpu;
|
||||
|
||||
hpt_clear_stress();
|
||||
if (!firmware_has_feature(FW_FEATURE_LPAR))
|
||||
tlbiel_all();
|
||||
|
||||
next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
|
||||
if (next_cpu >= nr_cpu_ids)
|
||||
next_cpu = cpumask_first(cpu_online_mask);
|
||||
stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10);
|
||||
add_timer_on(&stress_hpt_timer, next_cpu);
|
||||
}
|
||||
|
||||
static void __init htab_initialize(void)
|
||||
{
|
||||
unsigned long table;
|
||||
@ -995,6 +1046,20 @@ static void __init htab_initialize(void)
|
||||
if (stress_slb_enabled)
|
||||
static_branch_enable(&stress_slb_key);
|
||||
|
||||
if (stress_hpt_enabled) {
|
||||
unsigned long tmp;
|
||||
static_branch_enable(&stress_hpt_key);
|
||||
// Too early to use nr_cpu_ids, so use NR_CPUS
|
||||
tmp = memblock_phys_alloc_range(sizeof(struct stress_hpt_struct) * NR_CPUS,
|
||||
0, 0, MEMBLOCK_ALLOC_ANYWHERE);
|
||||
memset((void *)tmp, 0xff, sizeof(struct stress_hpt_struct) * NR_CPUS);
|
||||
stress_hpt_struct = __va(tmp);
|
||||
|
||||
timer_setup(&stress_hpt_timer, stress_hpt_timer_fn, 0);
|
||||
stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10);
|
||||
add_timer(&stress_hpt_timer);
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the required size of the htab. We want the number of
|
||||
* PTEGs to equal one half the number of real pages.
|
||||
@ -1980,6 +2045,69 @@ repeat:
|
||||
return slot;
|
||||
}
|
||||
|
||||
void hpt_clear_stress(void)
|
||||
{
|
||||
int cpu = raw_smp_processor_id();
|
||||
int g;
|
||||
|
||||
for (g = 0; g < stress_nr_groups(); g++) {
|
||||
unsigned long last_group;
|
||||
last_group = stress_hpt_struct[cpu].last_group[g];
|
||||
|
||||
if (last_group != -1UL) {
|
||||
int i;
|
||||
for (i = 0; i < HPTES_PER_GROUP; i++) {
|
||||
if (mmu_hash_ops.hpte_remove(last_group) == -1)
|
||||
break;
|
||||
}
|
||||
stress_hpt_struct[cpu].last_group[g] = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void hpt_do_stress(unsigned long ea, unsigned long hpte_group)
|
||||
{
|
||||
unsigned long last_group;
|
||||
int cpu = raw_smp_processor_id();
|
||||
|
||||
last_group = stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1];
|
||||
if (hpte_group == last_group)
|
||||
return;
|
||||
|
||||
if (last_group != -1UL) {
|
||||
int i;
|
||||
/*
|
||||
* Concurrent CPUs might be inserting into this group, so
|
||||
* give up after a number of iterations, to prevent a live
|
||||
* lock.
|
||||
*/
|
||||
for (i = 0; i < HPTES_PER_GROUP; i++) {
|
||||
if (mmu_hash_ops.hpte_remove(last_group) == -1)
|
||||
break;
|
||||
}
|
||||
stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1] = -1;
|
||||
}
|
||||
|
||||
if (ea >= PAGE_OFFSET) {
|
||||
/*
|
||||
* We would really like to prefetch to get the TLB loaded, then
|
||||
* remove the PTE before returning from fault interrupt, to
|
||||
* increase the hash fault rate.
|
||||
*
|
||||
* Unfortunately QEMU TCG does not model the TLB in a way that
|
||||
* makes this possible, and systemsim (mambo) emulator does not
|
||||
* bring in TLBs with prefetches (although loads/stores do
|
||||
* work for non-CI PTEs).
|
||||
*
|
||||
* So remember this PTE and clear it on the next hash fault.
|
||||
*/
|
||||
memmove(&stress_hpt_struct[cpu].last_group[1],
|
||||
&stress_hpt_struct[cpu].last_group[0],
|
||||
(stress_nr_groups() - 1) * sizeof(unsigned long));
|
||||
stress_hpt_struct[cpu].last_group[0] = hpte_group;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
|
||||
static DEFINE_RAW_SPINLOCK(linear_map_hash_lock);
|
||||
|
||||
|
@ -13,6 +13,17 @@ static inline bool stress_slb(void)
|
||||
return static_branch_unlikely(&stress_slb_key);
|
||||
}
|
||||
|
||||
extern bool stress_hpt_enabled;
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(stress_hpt_key);
|
||||
|
||||
static inline bool stress_hpt(void)
|
||||
{
|
||||
return static_branch_unlikely(&stress_hpt_key);
|
||||
}
|
||||
|
||||
void hpt_do_stress(unsigned long ea, unsigned long hpte_group);
|
||||
|
||||
void slb_setup_new_exec(void);
|
||||
|
||||
void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush);
|
||||
|
Loading…
Reference in New Issue
Block a user