mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-30 23:54:04 +08:00
x86 MM handling code changes for v6.7:
- Add new NX-stack self-test - Improve NUMA partial-CFMWS handling - Fix #VC handler bugs resulting in SEV-SNP boot failures - Drop the 4MB memory size restriction on minimal NUMA nodes - Reorganize headers a bit, in preparation to header dependency reduction efforts - Misc cleanups & fixes Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmU9Ek4RHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1gIJQ/+Mg6mzMaThyNXqhJszeZJBmDaBv2sqjAB 5tcferg1nJBdNBzX8bJ95UFt9fIqeYAcgH00qlQCYSmyzbC1TQTk9U2Pre1zbOw4 042ONK8sygKSje1zdYleHoBeqwnxD2VNM0NwBElhGjumwHRng/tbLiI9wx6qiz+C VsFXavkBszHGA1pjy9wZLGixYIH5jCygMpH134Wp+CIhpS+C4nftcGdIL1D5Oil1 6Tm2XeI6uyfiQhm9IOwDjfoYeC7gUjx1rp8rHseGUMJxyO/BX9q5j1ixbsVriqfW 97ucYuRL9mza7ic516C9v7OlAA3AGH2xWV+SYOGK88i9Co4kYzP4WnamxXqOsD8+ popxG55oa6QelhaouTBZvgERpZ4fWupSDs/UccsDaE9leMCerNEbGHEzt/Mm/2sw xopjMQ0y5Kn6/fS0dLv8U+XHu4ANkvXJkFd6Ny0h/WfgGefuQOOTG9ruYgfeqqB8 dViQ4R7CO8ySjD45KawAZl/EqL86x1M/CI1nlt0YY4vNwUuOJbebL7Jn8w3Fjxm5 FVfUlDmcPdhZfL9Vnrsi6MIou1cU1yJPw4D6sXJ4sg4s7A4ebBcRRrjayVQ4msjv Q7cvBOMnWEHhOV11pvP50FmQuj74XW3bUqiuWrnK1SypvnhHavF6kc1XYpBLs1xZ y8nueJW2qPw= =tT5F -----END PGP SIGNATURE----- Merge tag 'x86-mm-2023-10-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 mm handling updates from Ingo Molnar: - Add new NX-stack self-test - Improve NUMA partial-CFMWS handling - Fix #VC handler bugs resulting in SEV-SNP boot failures - Drop the 4MB memory size restriction on minimal NUMA nodes - Reorganize headers a bit, in preparation to header dependency reduction efforts - Misc cleanups & fixes * tag 'x86-mm-2023-10-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mm: Drop the 4 MB restriction on minimal NUMA node memory size selftests/x86/lam: Zero out buffer for readlink() x86/sev: Drop unneeded #include x86/sev: Move sev_setup_arch() to mem_encrypt.c x86/tdx: Replace deprecated strncpy() with strtomem_pad() selftests/x86/mm: Add new test that userspace stack is in fact NX x86/sev: Make boot_ghcb_page[] static x86/boot: Move x86_cache_alignment initialization to correct spot x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach x86/sev-es: Allow copy_from_kernel_nofault() in earlier boot x86_64: Show CR4.PSE on auxiliaries like on BSP x86/iommu/docs: Update AMD IOMMU specification document URL x86/sev/docs: Update document URL in amd-memory-encryption.rst x86/mm: Move arch_memory_failure() and arch_is_platform_page() definitions from <asm/processor.h> to <asm/pgtable.h> ACPI/NUMA: Apply SRAT proximity domain to entire CFMWS window x86/numa: Introduce numa_fill_memblks()
This commit is contained in:
commit
f0d25b5d0f
@ -130,4 +130,4 @@ SNP feature support.
|
||||
|
||||
More details in AMD64 APM[1] Vol 2: 15.34.10 SEV_STATUS MSR
|
||||
|
||||
[1] https://www.amd.com/system/files/TechDocs/40332.pdf
|
||||
[1] https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/24593.pdf
|
||||
|
@ -5,7 +5,7 @@ x86 IOMMU Support
|
||||
The architecture specs can be obtained from the below locations.
|
||||
|
||||
- Intel: http://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/vt-directed-io-spec.pdf
|
||||
- AMD: https://www.amd.com/system/files/TechDocs/48882_IOMMU.pdf
|
||||
- AMD: https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/specifications/48882_3_07_PUB.pdf
|
||||
|
||||
This guide gives a quick cheat sheet for some basic understanding.
|
||||
|
||||
|
@ -25,7 +25,7 @@
|
||||
#include "error.h"
|
||||
#include "../msr.h"
|
||||
|
||||
struct ghcb boot_ghcb_page __aligned(PAGE_SIZE);
|
||||
static struct ghcb boot_ghcb_page __aligned(PAGE_SIZE);
|
||||
struct ghcb *boot_ghcb;
|
||||
|
||||
/*
|
||||
|
@ -119,7 +119,7 @@ static void __noreturn tdx_panic(const char *msg)
|
||||
} message;
|
||||
|
||||
/* VMM assumes '\0' in byte 65, if the message took all 64 bytes */
|
||||
strncpy(message.str, msg, 64);
|
||||
strtomem_pad(message.str, msg, '\0');
|
||||
|
||||
args.r8 = message.r8;
|
||||
args.r9 = message.r9;
|
||||
|
@ -19,8 +19,10 @@
|
||||
|
||||
#ifdef CONFIG_X86_MEM_ENCRYPT
|
||||
void __init mem_encrypt_init(void);
|
||||
void __init mem_encrypt_setup_arch(void);
|
||||
#else
|
||||
static inline void mem_encrypt_init(void) { }
|
||||
static inline void __init mem_encrypt_setup_arch(void) { }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_AMD_MEM_ENCRYPT
|
||||
@ -43,7 +45,6 @@ void __init sme_map_bootdata(char *real_mode_data);
|
||||
void __init sme_unmap_bootdata(char *real_mode_data);
|
||||
|
||||
void __init sme_early_init(void);
|
||||
void __init sev_setup_arch(void);
|
||||
|
||||
void __init sme_encrypt_kernel(struct boot_params *bp);
|
||||
void __init sme_enable(struct boot_params *bp);
|
||||
@ -73,7 +74,6 @@ static inline void __init sme_map_bootdata(char *real_mode_data) { }
|
||||
static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
|
||||
|
||||
static inline void __init sme_early_init(void) { }
|
||||
static inline void __init sev_setup_arch(void) { }
|
||||
|
||||
static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
|
||||
static inline void __init sme_enable(struct boot_params *bp) { }
|
||||
|
@ -12,13 +12,6 @@
|
||||
|
||||
#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
|
||||
|
||||
/*
|
||||
* Too small node sizes may confuse the VM badly. Usually they
|
||||
* result from BIOS bugs. So dont recognize nodes as standalone
|
||||
* NUMA entities that have less than this amount of RAM listed:
|
||||
*/
|
||||
#define NODE_MIN_SIZE (4*1024*1024)
|
||||
|
||||
extern int numa_off;
|
||||
|
||||
/*
|
||||
|
@ -1716,6 +1716,14 @@ static inline bool pud_user_accessible_page(pud_t pud)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_SGX
|
||||
int arch_memory_failure(unsigned long pfn, int flags);
|
||||
#define arch_memory_failure arch_memory_failure
|
||||
|
||||
bool arch_is_platform_page(u64 paddr);
|
||||
#define arch_is_platform_page arch_is_platform_page
|
||||
#endif
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif /* _ASM_X86_PGTABLE_H */
|
||||
|
@ -724,14 +724,6 @@ enum mds_mitigations {
|
||||
MDS_MITIGATION_VMWERV,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_X86_SGX
|
||||
int arch_memory_failure(unsigned long pfn, int flags);
|
||||
#define arch_memory_failure arch_memory_failure
|
||||
|
||||
bool arch_is_platform_page(u64 paddr);
|
||||
#define arch_is_platform_page arch_is_platform_page
|
||||
#endif
|
||||
|
||||
extern bool gds_ucode_mitigated(void);
|
||||
|
||||
#endif /* _ASM_X86_PROCESSOR_H */
|
||||
|
@ -37,6 +37,8 @@ extern int phys_to_target_node(phys_addr_t start);
|
||||
#define phys_to_target_node phys_to_target_node
|
||||
extern int memory_add_physaddr_to_nid(u64 start);
|
||||
#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid
|
||||
extern int numa_fill_memblks(u64 start, u64 end);
|
||||
#define numa_fill_memblks numa_fill_memblks
|
||||
#endif
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
|
@ -1115,18 +1115,34 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
|
||||
void get_cpu_address_sizes(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 eax, ebx, ecx, edx;
|
||||
bool vp_bits_from_cpuid = true;
|
||||
|
||||
if (c->extended_cpuid_level >= 0x80000008) {
|
||||
if (!cpu_has(c, X86_FEATURE_CPUID) ||
|
||||
(c->extended_cpuid_level < 0x80000008))
|
||||
vp_bits_from_cpuid = false;
|
||||
|
||||
if (vp_bits_from_cpuid) {
|
||||
cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
c->x86_virt_bits = (eax >> 8) & 0xff;
|
||||
c->x86_phys_bits = eax & 0xff;
|
||||
} else {
|
||||
if (IS_ENABLED(CONFIG_X86_64)) {
|
||||
c->x86_clflush_size = 64;
|
||||
c->x86_phys_bits = 36;
|
||||
c->x86_virt_bits = 48;
|
||||
} else {
|
||||
c->x86_clflush_size = 32;
|
||||
c->x86_virt_bits = 32;
|
||||
c->x86_phys_bits = 32;
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_PAE) ||
|
||||
cpu_has(c, X86_FEATURE_PSE36))
|
||||
c->x86_phys_bits = 36;
|
||||
}
|
||||
}
|
||||
#ifdef CONFIG_X86_32
|
||||
else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
|
||||
c->x86_phys_bits = 36;
|
||||
#endif
|
||||
c->x86_cache_bits = c->x86_phys_bits;
|
||||
c->x86_cache_alignment = c->x86_clflush_size;
|
||||
}
|
||||
|
||||
static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
|
||||
@ -1580,17 +1596,6 @@ static void __init cpu_parse_early_param(void)
|
||||
*/
|
||||
static void __init early_identify_cpu(struct cpuinfo_x86 *c)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
c->x86_clflush_size = 64;
|
||||
c->x86_phys_bits = 36;
|
||||
c->x86_virt_bits = 48;
|
||||
#else
|
||||
c->x86_clflush_size = 32;
|
||||
c->x86_phys_bits = 32;
|
||||
c->x86_virt_bits = 32;
|
||||
#endif
|
||||
c->x86_cache_alignment = c->x86_clflush_size;
|
||||
|
||||
memset(&c->x86_capability, 0, sizeof(c->x86_capability));
|
||||
c->extended_cpuid_level = 0;
|
||||
|
||||
@ -1602,7 +1607,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
|
||||
cpu_detect(c);
|
||||
get_cpu_vendor(c);
|
||||
get_cpu_cap(c);
|
||||
get_cpu_address_sizes(c);
|
||||
setup_force_cpu_cap(X86_FEATURE_CPUID);
|
||||
cpu_parse_early_param();
|
||||
|
||||
@ -1618,6 +1622,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
|
||||
setup_clear_cpu_cap(X86_FEATURE_CPUID);
|
||||
}
|
||||
|
||||
get_cpu_address_sizes(c);
|
||||
|
||||
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
|
||||
|
||||
cpu_set_bug_bits(c);
|
||||
|
@ -179,8 +179,8 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
|
||||
movl $0, %ecx
|
||||
#endif
|
||||
|
||||
/* Enable PAE mode, PGE and LA57 */
|
||||
orl $(X86_CR4_PAE | X86_CR4_PGE), %ecx
|
||||
/* Enable PAE mode, PSE, PGE and LA57 */
|
||||
orl $(X86_CR4_PAE | X86_CR4_PSE | X86_CR4_PGE), %ecx
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
testl $1, __pgtable_l5_enabled(%rip)
|
||||
jz 1f
|
||||
|
@ -1120,7 +1120,7 @@ void __init setup_arch(char **cmdline_p)
|
||||
* Needs to run after memblock setup because it needs the physical
|
||||
* memory size.
|
||||
*/
|
||||
sev_setup_arch();
|
||||
mem_encrypt_setup_arch();
|
||||
|
||||
efi_fake_memmap();
|
||||
efi_find_mirror();
|
||||
|
@ -9,12 +9,21 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
|
||||
unsigned long vaddr = (unsigned long)unsafe_src;
|
||||
|
||||
/*
|
||||
* Range covering the highest possible canonical userspace address
|
||||
* as well as non-canonical address range. For the canonical range
|
||||
* we also need to include the userspace guard page.
|
||||
* Do not allow userspace addresses. This disallows
|
||||
* normal userspace and the userspace guard page:
|
||||
*/
|
||||
return vaddr >= TASK_SIZE_MAX + PAGE_SIZE &&
|
||||
__is_canonical_address(vaddr, boot_cpu_data.x86_virt_bits);
|
||||
if (vaddr < TASK_SIZE_MAX + PAGE_SIZE)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Allow everything during early boot before 'x86_virt_bits'
|
||||
* is initialized. Needed for instruction decoding in early
|
||||
* exception handlers.
|
||||
*/
|
||||
if (!boot_cpu_data.x86_virt_bits)
|
||||
return true;
|
||||
|
||||
return __is_canonical_address(vaddr, boot_cpu_data.x86_virt_bits);
|
||||
}
|
||||
#else
|
||||
bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <linux/swiotlb.h>
|
||||
#include <linux/cc_platform.h>
|
||||
#include <linux/mem_encrypt.h>
|
||||
#include <linux/virtio_anchor.h>
|
||||
|
||||
/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
|
||||
bool force_dma_unencrypted(struct device *dev)
|
||||
@ -86,3 +87,36 @@ void __init mem_encrypt_init(void)
|
||||
|
||||
print_mem_encrypt_feature_info();
|
||||
}
|
||||
|
||||
void __init mem_encrypt_setup_arch(void)
|
||||
{
|
||||
phys_addr_t total_mem = memblock_phys_mem_size();
|
||||
unsigned long size;
|
||||
|
||||
if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
|
||||
return;
|
||||
|
||||
/*
|
||||
* For SEV and TDX, all DMA has to occur via shared/unencrypted pages.
|
||||
* Kernel uses SWIOTLB to make this happen without changing device
|
||||
* drivers. However, depending on the workload being run, the
|
||||
* default 64MB of SWIOTLB may not be enough and SWIOTLB may
|
||||
* run out of buffers for DMA, resulting in I/O errors and/or
|
||||
* performance degradation especially with high I/O workloads.
|
||||
*
|
||||
* Adjust the default size of SWIOTLB using a percentage of guest
|
||||
* memory for SWIOTLB buffers. Also, as the SWIOTLB bounce buffer
|
||||
* memory is allocated from low memory, ensure that the adjusted size
|
||||
* is within the limits of low available memory.
|
||||
*
|
||||
* The percentage of guest memory used here for SWIOTLB buffers
|
||||
* is more of an approximation of the static adjustment which
|
||||
* 64MB for <1G, and ~128M to 256M for 1G-to-4G, i.e., the 6%
|
||||
*/
|
||||
size = total_mem * 6 / 100;
|
||||
size = clamp_val(size, IO_TLB_DEFAULT_SIZE, SZ_1G);
|
||||
swiotlb_adjust_size(size);
|
||||
|
||||
/* Set restricted memory access for virtio. */
|
||||
virtio_set_mem_acc_cb(virtio_require_restricted_mem_acc);
|
||||
}
|
||||
|
@ -19,8 +19,6 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/virtio_config.h>
|
||||
#include <linux/virtio_anchor.h>
|
||||
#include <linux/cc_platform.h>
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
@ -215,40 +213,6 @@ void __init sme_map_bootdata(char *real_mode_data)
|
||||
__sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true);
|
||||
}
|
||||
|
||||
void __init sev_setup_arch(void)
|
||||
{
|
||||
phys_addr_t total_mem = memblock_phys_mem_size();
|
||||
unsigned long size;
|
||||
|
||||
if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
|
||||
return;
|
||||
|
||||
/*
|
||||
* For SEV, all DMA has to occur via shared/unencrypted pages.
|
||||
* SEV uses SWIOTLB to make this happen without changing device
|
||||
* drivers. However, depending on the workload being run, the
|
||||
* default 64MB of SWIOTLB may not be enough and SWIOTLB may
|
||||
* run out of buffers for DMA, resulting in I/O errors and/or
|
||||
* performance degradation especially with high I/O workloads.
|
||||
*
|
||||
* Adjust the default size of SWIOTLB for SEV guests using
|
||||
* a percentage of guest memory for SWIOTLB buffers.
|
||||
* Also, as the SWIOTLB bounce buffer memory is allocated
|
||||
* from low memory, ensure that the adjusted size is within
|
||||
* the limits of low available memory.
|
||||
*
|
||||
* The percentage of guest memory used here for SWIOTLB buffers
|
||||
* is more of an approximation of the static adjustment which
|
||||
* 64MB for <1G, and ~128M to 256M for 1G-to-4G, i.e., the 6%
|
||||
*/
|
||||
size = total_mem * 6 / 100;
|
||||
size = clamp_val(size, IO_TLB_DEFAULT_SIZE, SZ_1G);
|
||||
swiotlb_adjust_size(size);
|
||||
|
||||
/* Set restricted memory access for virtio. */
|
||||
virtio_set_mem_acc_cb(virtio_require_restricted_mem_acc);
|
||||
}
|
||||
|
||||
static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot)
|
||||
{
|
||||
unsigned long pfn = 0;
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <linux/nodemask.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/topology.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
#include <asm/e820/api.h>
|
||||
#include <asm/proto.h>
|
||||
@ -602,13 +603,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
|
||||
if (start >= end)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Don't confuse VM with a node that doesn't have the
|
||||
* minimum amount of memory:
|
||||
*/
|
||||
if (end && (end - start) < NODE_MIN_SIZE)
|
||||
continue;
|
||||
|
||||
alloc_node_data(nid);
|
||||
}
|
||||
|
||||
@ -964,4 +958,83 @@ int memory_add_physaddr_to_nid(u64 start)
|
||||
return nid;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
|
||||
|
||||
static int __init cmp_memblk(const void *a, const void *b)
|
||||
{
|
||||
const struct numa_memblk *ma = *(const struct numa_memblk **)a;
|
||||
const struct numa_memblk *mb = *(const struct numa_memblk **)b;
|
||||
|
||||
return ma->start - mb->start;
|
||||
}
|
||||
|
||||
static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata;
|
||||
|
||||
/**
|
||||
* numa_fill_memblks - Fill gaps in numa_meminfo memblks
|
||||
* @start: address to begin fill
|
||||
* @end: address to end fill
|
||||
*
|
||||
* Find and extend numa_meminfo memblks to cover the @start-@end
|
||||
* physical address range, such that the first memblk includes
|
||||
* @start, the last memblk includes @end, and any gaps in between
|
||||
* are filled.
|
||||
*
|
||||
* RETURNS:
|
||||
* 0 : Success
|
||||
* NUMA_NO_MEMBLK : No memblk exists in @start-@end range
|
||||
*/
|
||||
|
||||
int __init numa_fill_memblks(u64 start, u64 end)
|
||||
{
|
||||
struct numa_memblk **blk = &numa_memblk_list[0];
|
||||
struct numa_meminfo *mi = &numa_meminfo;
|
||||
int count = 0;
|
||||
u64 prev_end;
|
||||
|
||||
/*
|
||||
* Create a list of pointers to numa_meminfo memblks that
|
||||
* overlap start, end. Exclude (start == bi->end) since
|
||||
* end addresses in both a CFMWS range and a memblk range
|
||||
* are exclusive.
|
||||
*
|
||||
* This list of pointers is used to make in-place changes
|
||||
* that fill out the numa_meminfo memblks.
|
||||
*/
|
||||
for (int i = 0; i < mi->nr_blks; i++) {
|
||||
struct numa_memblk *bi = &mi->blk[i];
|
||||
|
||||
if (start < bi->end && end >= bi->start) {
|
||||
blk[count] = &mi->blk[i];
|
||||
count++;
|
||||
}
|
||||
}
|
||||
if (!count)
|
||||
return NUMA_NO_MEMBLK;
|
||||
|
||||
/* Sort the list of pointers in memblk->start order */
|
||||
sort(&blk[0], count, sizeof(blk[0]), cmp_memblk, NULL);
|
||||
|
||||
/* Make sure the first/last memblks include start/end */
|
||||
blk[0]->start = min(blk[0]->start, start);
|
||||
blk[count - 1]->end = max(blk[count - 1]->end, end);
|
||||
|
||||
/*
|
||||
* Fill any gaps by tracking the previous memblks
|
||||
* end address and backfilling to it if needed.
|
||||
*/
|
||||
prev_end = blk[0]->end;
|
||||
for (int i = 1; i < count; i++) {
|
||||
struct numa_memblk *curr = blk[i];
|
||||
|
||||
if (prev_end >= curr->start) {
|
||||
if (prev_end < curr->end)
|
||||
prev_end = curr->end;
|
||||
} else {
|
||||
curr->start = prev_end;
|
||||
prev_end = curr->end;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -310,11 +310,16 @@ static int __init acpi_parse_cfmws(union acpi_subtable_headers *header,
|
||||
start = cfmws->base_hpa;
|
||||
end = cfmws->base_hpa + cfmws->window_size;
|
||||
|
||||
/* Skip if the SRAT already described the NUMA details for this HPA */
|
||||
node = phys_to_target_node(start);
|
||||
if (node != NUMA_NO_NODE)
|
||||
/*
|
||||
* The SRAT may have already described NUMA details for all,
|
||||
* or a portion of, this CFMWS HPA range. Extend the memblks
|
||||
* found for any portion of the window to cover the entire
|
||||
* window.
|
||||
*/
|
||||
if (!numa_fill_memblks(start, end))
|
||||
return 0;
|
||||
|
||||
/* No SRAT description. Create a new node. */
|
||||
node = acpi_map_pxm_to_node(*fake_pxm);
|
||||
|
||||
if (node == NUMA_NO_NODE) {
|
||||
|
@ -12,6 +12,7 @@
|
||||
#define MAX_NUMNODES (1 << NODES_SHIFT)
|
||||
|
||||
#define NUMA_NO_NODE (-1)
|
||||
#define NUMA_NO_MEMBLK (-1)
|
||||
|
||||
/* optionally keep NUMA memory info available post init */
|
||||
#ifdef CONFIG_NUMA_KEEP_MEMINFO
|
||||
@ -43,6 +44,12 @@ static inline int phys_to_target_node(u64 start)
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
#ifndef numa_fill_memblks
|
||||
static inline int __init numa_fill_memblks(u64 start, u64 end)
|
||||
{
|
||||
return NUMA_NO_MEMBLK;
|
||||
}
|
||||
#endif
|
||||
#else /* !CONFIG_NUMA */
|
||||
static inline int numa_nearest_node(int node, unsigned int state)
|
||||
{
|
||||
|
@ -14,6 +14,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap
|
||||
check_initial_reg_state sigreturn iopl ioperm \
|
||||
test_vsyscall mov_ss_trap \
|
||||
syscall_arg_fault fsgsbase_restore sigaltstack
|
||||
TARGETS_C_BOTHBITS += nx_stack
|
||||
TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
|
||||
test_FCMOV test_FCOMI test_FISTTP \
|
||||
vdso_restorer
|
||||
@ -109,3 +110,6 @@ $(OUTPUT)/test_syscall_vdso_32: thunks_32.S
|
||||
# state.
|
||||
$(OUTPUT)/check_initial_reg_state_32: CFLAGS += -Wl,-ereal_start -static
|
||||
$(OUTPUT)/check_initial_reg_state_64: CFLAGS += -Wl,-ereal_start -static
|
||||
|
||||
$(OUTPUT)/nx_stack_32: CFLAGS += -Wl,-z,noexecstack
|
||||
$(OUTPUT)/nx_stack_64: CFLAGS += -Wl,-z,noexecstack
|
||||
|
@ -573,7 +573,7 @@ int do_uring(unsigned long lam)
|
||||
char path[PATH_MAX] = {0};
|
||||
|
||||
/* get current process path */
|
||||
if (readlink("/proc/self/exe", path, PATH_MAX) <= 0)
|
||||
if (readlink("/proc/self/exe", path, PATH_MAX - 1) <= 0)
|
||||
return 1;
|
||||
|
||||
int file_fd = open(path, O_RDONLY);
|
||||
@ -680,14 +680,14 @@ static int handle_execve(struct testcases *test)
|
||||
perror("Fork failed.");
|
||||
ret = 1;
|
||||
} else if (pid == 0) {
|
||||
char path[PATH_MAX];
|
||||
char path[PATH_MAX] = {0};
|
||||
|
||||
/* Set LAM mode in parent process */
|
||||
if (set_lam(lam) != 0)
|
||||
return 1;
|
||||
|
||||
/* Get current binary's path and the binary was run by execve */
|
||||
if (readlink("/proc/self/exe", path, PATH_MAX) <= 0)
|
||||
if (readlink("/proc/self/exe", path, PATH_MAX - 1) <= 0)
|
||||
exit(-1);
|
||||
|
||||
/* run binary to get LAM mode and return to parent process */
|
||||
|
212
tools/testing/selftests/x86/nx_stack.c
Normal file
212
tools/testing/selftests/x86/nx_stack.c
Normal file
@ -0,0 +1,212 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Alexey Dobriyan <adobriyan@gmail.com>
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
/*
|
||||
* Test that userspace stack is NX. Requires linking with -Wl,-z,noexecstack
|
||||
* because I don't want to bother with PT_GNU_STACK detection.
|
||||
*
|
||||
* Fill the stack with INT3's and then try to execute some of them:
|
||||
* SIGSEGV -- good, SIGTRAP -- bad.
|
||||
*
|
||||
* Regular stack is completely overwritten before testing.
|
||||
* Test doesn't exit SIGSEGV handler after first fault at INT3.
|
||||
*/
|
||||
#undef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#undef NDEBUG
|
||||
#include <assert.h>
|
||||
#include <signal.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/resource.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
/*
|
||||
* This is memset(rsp, 0xcc, -1); but down.
|
||||
* It will SIGSEGV when bottom of the stack is reached.
|
||||
* Byte-size access is important! (see rdi tweak in the signal handler).
|
||||
*/
|
||||
void make_stack1(void);
|
||||
asm(
|
||||
".pushsection .text\n"
|
||||
".globl make_stack1\n"
|
||||
".align 16\n"
|
||||
"make_stack1:\n"
|
||||
"mov $0xcc, %al\n"
|
||||
#if defined __amd64__
|
||||
"mov %rsp, %rdi\n"
|
||||
"mov $-1, %rcx\n"
|
||||
#elif defined __i386__
|
||||
"mov %esp, %edi\n"
|
||||
"mov $-1, %ecx\n"
|
||||
#else
|
||||
#error
|
||||
#endif
|
||||
"std\n"
|
||||
"rep stosb\n"
|
||||
/* unreachable */
|
||||
"hlt\n"
|
||||
".type make_stack1,@function\n"
|
||||
".size make_stack1,.-make_stack1\n"
|
||||
".popsection\n"
|
||||
);
|
||||
|
||||
/*
|
||||
* memset(p, 0xcc, -1);
|
||||
* It will SIGSEGV when top of the stack is reached.
|
||||
*/
|
||||
void make_stack2(uint64_t p);
|
||||
asm(
|
||||
".pushsection .text\n"
|
||||
".globl make_stack2\n"
|
||||
".align 16\n"
|
||||
"make_stack2:\n"
|
||||
"mov $0xcc, %al\n"
|
||||
#if defined __amd64__
|
||||
"mov $-1, %rcx\n"
|
||||
#elif defined __i386__
|
||||
"mov $-1, %ecx\n"
|
||||
#else
|
||||
#error
|
||||
#endif
|
||||
"cld\n"
|
||||
"rep stosb\n"
|
||||
/* unreachable */
|
||||
"hlt\n"
|
||||
".type make_stack2,@function\n"
|
||||
".size make_stack2,.-make_stack2\n"
|
||||
".popsection\n"
|
||||
);
|
||||
|
||||
static volatile int test_state = 0;
|
||||
static volatile unsigned long stack_min_addr;
|
||||
|
||||
#if defined __amd64__
|
||||
#define RDI REG_RDI
|
||||
#define RIP REG_RIP
|
||||
#define RIP_STRING "rip"
|
||||
#elif defined __i386__
|
||||
#define RDI REG_EDI
|
||||
#define RIP REG_EIP
|
||||
#define RIP_STRING "eip"
|
||||
#else
|
||||
#error
|
||||
#endif
|
||||
|
||||
static void sigsegv(int _, siginfo_t *__, void *uc_)
|
||||
{
|
||||
/*
|
||||
* Some Linux versions didn't clear DF before entering signal
|
||||
* handler. make_stack1() doesn't have a chance to clear DF
|
||||
* either so we clear it by hand here.
|
||||
*/
|
||||
asm volatile ("cld" ::: "memory");
|
||||
|
||||
ucontext_t *uc = uc_;
|
||||
|
||||
if (test_state == 0) {
|
||||
/* Stack is faulted and cleared from RSP to the lowest address. */
|
||||
stack_min_addr = ++uc->uc_mcontext.gregs[RDI];
|
||||
if (1) {
|
||||
printf("stack min %lx\n", stack_min_addr);
|
||||
}
|
||||
uc->uc_mcontext.gregs[RIP] = (uintptr_t)&make_stack2;
|
||||
test_state = 1;
|
||||
} else if (test_state == 1) {
|
||||
/* Stack has been cleared from top to bottom. */
|
||||
unsigned long stack_max_addr = uc->uc_mcontext.gregs[RDI];
|
||||
if (1) {
|
||||
printf("stack max %lx\n", stack_max_addr);
|
||||
}
|
||||
/* Start faulting pages on stack and see what happens. */
|
||||
uc->uc_mcontext.gregs[RIP] = stack_max_addr - PAGE_SIZE;
|
||||
test_state = 2;
|
||||
} else if (test_state == 2) {
|
||||
/* Stack page is NX -- good, test next page. */
|
||||
uc->uc_mcontext.gregs[RIP] -= PAGE_SIZE;
|
||||
if (uc->uc_mcontext.gregs[RIP] == stack_min_addr) {
|
||||
/* One more SIGSEGV and test ends. */
|
||||
test_state = 3;
|
||||
}
|
||||
} else {
|
||||
printf("PASS\tAll stack pages are NX\n");
|
||||
_exit(EXIT_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
static void sigtrap(int _, siginfo_t *__, void *uc_)
|
||||
{
|
||||
const ucontext_t *uc = uc_;
|
||||
unsigned long rip = uc->uc_mcontext.gregs[RIP];
|
||||
printf("FAIL\texecutable page on the stack: " RIP_STRING " %lx\n", rip);
|
||||
_exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
{
|
||||
struct sigaction act = {};
|
||||
sigemptyset(&act.sa_mask);
|
||||
act.sa_flags = SA_SIGINFO;
|
||||
act.sa_sigaction = &sigsegv;
|
||||
int rv = sigaction(SIGSEGV, &act, NULL);
|
||||
assert(rv == 0);
|
||||
}
|
||||
{
|
||||
struct sigaction act = {};
|
||||
sigemptyset(&act.sa_mask);
|
||||
act.sa_flags = SA_SIGINFO;
|
||||
act.sa_sigaction = &sigtrap;
|
||||
int rv = sigaction(SIGTRAP, &act, NULL);
|
||||
assert(rv == 0);
|
||||
}
|
||||
{
|
||||
struct rlimit rlim;
|
||||
int rv = getrlimit(RLIMIT_STACK, &rlim);
|
||||
assert(rv == 0);
|
||||
/* Cap stack at time-honored 8 MiB value. */
|
||||
rlim.rlim_max = rlim.rlim_cur;
|
||||
if (rlim.rlim_max > 8 * 1024 * 1024) {
|
||||
rlim.rlim_max = 8 * 1024 * 1024;
|
||||
}
|
||||
rv = setrlimit(RLIMIT_STACK, &rlim);
|
||||
assert(rv == 0);
|
||||
}
|
||||
{
|
||||
/*
|
||||
* We don't know now much stack SIGSEGV handler uses.
|
||||
* Bump this by 1 page every time someone complains,
|
||||
* or rewrite it in assembly.
|
||||
*/
|
||||
const size_t len = SIGSTKSZ;
|
||||
void *p = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
|
||||
assert(p != MAP_FAILED);
|
||||
stack_t ss = {};
|
||||
ss.ss_sp = p;
|
||||
ss.ss_size = len;
|
||||
int rv = sigaltstack(&ss, NULL);
|
||||
assert(rv == 0);
|
||||
}
|
||||
make_stack1();
|
||||
/*
|
||||
* Unreachable, but if _this_ INT3 is ever reached, it's a bug somewhere.
|
||||
* Fold it into main SIGTRAP pathway.
|
||||
*/
|
||||
__builtin_trap();
|
||||
}
|
Loading…
Reference in New Issue
Block a user