mirror of
https://github.com/edk2-porting/linux-next.git
synced 2025-01-01 10:13:58 +08:00
39b9552281
By this point we have functioning boot-time switching between 4- and 5-level paging mode. But naive approach comes with cost. Numbers below are for kernel build, allmodconfig, 5 times. CONFIG_X86_5LEVEL=n: Performance counter stats for 'sh -c make -j100 -B -k >/dev/null' (5 runs): 17308719.892691 task-clock:u (msec) # 26.772 CPUs utilized ( +- 0.11% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 331,993,164 page-faults:u # 0.019 M/sec ( +- 0.01% ) 43,614,978,867,455 cycles:u # 2.520 GHz ( +- 0.01% ) 39,371,534,575,126 stalled-cycles-frontend:u # 90.27% frontend cycles idle ( +- 0.09% ) 28,363,350,152,428 instructions:u # 0.65 insn per cycle # 1.39 stalled cycles per insn ( +- 0.00% ) 6,316,784,066,413 branches:u # 364.948 M/sec ( +- 0.00% ) 250,808,144,781 branch-misses:u # 3.97% of all branches ( +- 0.01% ) 646.531974142 seconds time elapsed ( +- 1.15% ) CONFIG_X86_5LEVEL=y: Performance counter stats for 'sh -c make -j100 -B -k >/dev/null' (5 runs): 17411536.780625 task-clock:u (msec) # 26.426 CPUs utilized ( +- 0.10% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 331,868,663 page-faults:u # 0.019 M/sec ( +- 0.01% ) 43,865,909,056,301 cycles:u # 2.519 GHz ( +- 0.01% ) 39,740,130,365,581 stalled-cycles-frontend:u # 90.59% frontend cycles idle ( +- 0.05% ) 28,363,358,997,959 instructions:u # 0.65 insn per cycle # 1.40 stalled cycles per insn ( +- 0.00% ) 6,316,784,937,460 branches:u # 362.793 M/sec ( +- 0.00% ) 251,531,919,485 branch-misses:u # 3.98% of all branches ( +- 0.00% ) 658.886307752 seconds time elapsed ( +- 0.92% ) The patch tries to fix the performance regression by using cpu_feature_enabled(X86_FEATURE_LA57) instead of pgtable_l5_enabled in all hot code paths. These will statically patch the target code for additional performance. CONFIG_X86_5LEVEL=y + the patch: Performance counter stats for 'sh -c make -j100 -B -k >/dev/null' (5 runs): 17381990.268506 task-clock:u (msec) # 26.907 CPUs utilized ( +- 0.19% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 331,862,625 page-faults:u # 0.019 M/sec ( +- 0.01% ) 43,697,726,320,051 cycles:u # 2.514 GHz ( +- 0.03% ) 39,480,408,690,401 stalled-cycles-frontend:u # 90.35% frontend cycles idle ( +- 0.05% ) 28,363,394,221,388 instructions:u # 0.65 insn per cycle # 1.39 stalled cycles per insn ( +- 0.00% ) 6,316,794,985,573 branches:u # 363.410 M/sec ( +- 0.00% ) 251,013,232,547 branch-misses:u # 3.97% of all branches ( +- 0.01% ) 645.991174661 seconds time elapsed ( +- 1.19% ) Unfortunately, this approach doesn't help with text size: vmlinux.before .text size: 8190319 vmlinux.after .text size: 8200623 The .text section is increased by about 4k. Not sure if we can do anything about this. Signed-off-by: Kirill A. Shuemov <kirill.shutemov@linux.intel.com> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Borislav Petkov <bp@suse.de> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Woodhouse <dwmw2@infradead.org> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180216114948.68868-4-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
120 lines
2.8 KiB
C
120 lines
2.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef BOOT_COMPRESSED_MISC_H
|
|
#define BOOT_COMPRESSED_MISC_H
|
|
|
|
/*
|
|
* Special hack: we have to be careful, because no indirections are allowed here,
|
|
* and paravirt_ops is a kind of one. As it will only run in baremetal anyway,
|
|
* we just keep it from happening. (This list needs to be extended when new
|
|
* paravirt and debugging variants are added.)
|
|
*/
|
|
#undef CONFIG_PARAVIRT
|
|
#undef CONFIG_PARAVIRT_SPINLOCKS
|
|
#undef CONFIG_KASAN
|
|
|
|
#ifdef CONFIG_X86_5LEVEL
|
|
/* cpu_feature_enabled() cannot be used that early */
|
|
#define pgtable_l5_enabled __pgtable_l5_enabled
|
|
#endif
|
|
|
|
#include <linux/linkage.h>
|
|
#include <linux/screen_info.h>
|
|
#include <linux/elf.h>
|
|
#include <linux/io.h>
|
|
#include <asm/page.h>
|
|
#include <asm/boot.h>
|
|
#include <asm/bootparam.h>
|
|
#include <asm/bootparam_utils.h>
|
|
|
|
#define BOOT_BOOT_H
|
|
#include "../ctype.h"
|
|
|
|
#ifdef CONFIG_X86_64
|
|
#define memptr long
|
|
#else
|
|
#define memptr unsigned
|
|
#endif
|
|
|
|
/* misc.c */
|
|
extern memptr free_mem_ptr;
|
|
extern memptr free_mem_end_ptr;
|
|
extern struct boot_params *boot_params;
|
|
void __putstr(const char *s);
|
|
void __puthex(unsigned long value);
|
|
#define error_putstr(__x) __putstr(__x)
|
|
#define error_puthex(__x) __puthex(__x)
|
|
|
|
#ifdef CONFIG_X86_VERBOSE_BOOTUP
|
|
|
|
#define debug_putstr(__x) __putstr(__x)
|
|
#define debug_puthex(__x) __puthex(__x)
|
|
#define debug_putaddr(__x) { \
|
|
debug_putstr(#__x ": 0x"); \
|
|
debug_puthex((unsigned long)(__x)); \
|
|
debug_putstr("\n"); \
|
|
}
|
|
|
|
#else
|
|
|
|
static inline void debug_putstr(const char *s)
|
|
{ }
|
|
static inline void debug_puthex(const char *s)
|
|
{ }
|
|
#define debug_putaddr(x) /* */
|
|
|
|
#endif
|
|
|
|
#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE
|
|
/* cmdline.c */
|
|
int cmdline_find_option(const char *option, char *buffer, int bufsize);
|
|
int cmdline_find_option_bool(const char *option);
|
|
#endif
|
|
|
|
|
|
#if CONFIG_RANDOMIZE_BASE
|
|
/* kaslr.c */
|
|
void choose_random_location(unsigned long input,
|
|
unsigned long input_size,
|
|
unsigned long *output,
|
|
unsigned long output_size,
|
|
unsigned long *virt_addr);
|
|
/* cpuflags.c */
|
|
bool has_cpuflag(int flag);
|
|
#else
|
|
static inline void choose_random_location(unsigned long input,
|
|
unsigned long input_size,
|
|
unsigned long *output,
|
|
unsigned long output_size,
|
|
unsigned long *virt_addr)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_X86_64
|
|
void initialize_identity_maps(void);
|
|
void add_identity_map(unsigned long start, unsigned long size);
|
|
void finalize_identity_maps(void);
|
|
extern unsigned char _pgtable[];
|
|
#else
|
|
static inline void initialize_identity_maps(void)
|
|
{ }
|
|
static inline void add_identity_map(unsigned long start, unsigned long size)
|
|
{ }
|
|
static inline void finalize_identity_maps(void)
|
|
{ }
|
|
#endif
|
|
|
|
#ifdef CONFIG_EARLY_PRINTK
|
|
/* early_serial_console.c */
|
|
extern int early_serial_base;
|
|
void console_init(void);
|
|
#else
|
|
static const int early_serial_base;
|
|
static inline void console_init(void)
|
|
{ }
|
|
#endif
|
|
|
|
unsigned long get_sev_encryption_mask(void);
|
|
|
|
#endif
|