mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-19 02:04:19 +08:00
Merge branch 'linus' into x86/urgent, to pick up dependencies for a fix
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
ba4e06d68e
@ -0,0 +1,26 @@
|
||||
Alpine MSIX controller
|
||||
|
||||
See arm,gic-v3.txt for SPI and MSI definitions.
|
||||
|
||||
Required properties:
|
||||
|
||||
- compatible: should be "al,alpine-msix"
|
||||
- reg: physical base address and size of the registers
|
||||
- interrupt-parent: specifies the parent interrupt controller.
|
||||
- interrupt-controller: identifies the node as an interrupt controller
|
||||
- msi-controller: identifies the node as an PCI Message Signaled Interrupt
|
||||
controller
|
||||
- al,msi-base-spi: SPI base of the MSI frame
|
||||
- al,msi-num-spis: number of SPIs assigned to the MSI frame, relative to SPI0
|
||||
|
||||
Example:
|
||||
|
||||
msix: msix {
|
||||
compatible = "al,alpine-msix";
|
||||
reg = <0x0 0xfbe00000 0x0 0x100000>;
|
||||
interrupt-parent = <&gic>;
|
||||
interrupt-controller;
|
||||
msi-controller;
|
||||
al,msi-base-spi = <160>;
|
||||
al,msi-num-spis = <160>;
|
||||
};
|
@ -16,6 +16,7 @@ Main node required properties:
|
||||
"arm,cortex-a15-gic"
|
||||
"arm,cortex-a7-gic"
|
||||
"arm,cortex-a9-gic"
|
||||
"arm,eb11mp-gic"
|
||||
"arm,gic-400"
|
||||
"arm,pl390"
|
||||
"arm,tc11mp-gic"
|
||||
|
@ -0,0 +1,44 @@
|
||||
|
||||
* Marvell ODMI for MSI support
|
||||
|
||||
Some Marvell SoCs have an On-Die Message Interrupt (ODMI) controller
|
||||
which can be used by on-board peripheral for MSI interrupts.
|
||||
|
||||
Required properties:
|
||||
|
||||
- compatible : The value here should contain:
|
||||
|
||||
"marvell,ap806-odmi-controller", "marvell,odmi-controller".
|
||||
|
||||
- interrupt,controller : Identifies the node as an interrupt controller.
|
||||
|
||||
- msi-controller : Identifies the node as an MSI controller.
|
||||
|
||||
- marvell,odmi-frames : Number of ODMI frames available. Each frame
|
||||
provides a number of events.
|
||||
|
||||
- reg : List of register definitions, one for each
|
||||
ODMI frame.
|
||||
|
||||
- marvell,spi-base : List of GIC base SPI interrupts, one for each
|
||||
ODMI frame. Those SPI interrupts are 0-based,
|
||||
i.e marvell,spi-base = <128> will use SPI #96.
|
||||
See Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt
|
||||
for details about the GIC Device Tree binding.
|
||||
|
||||
- interrupt-parent : Reference to the parent interrupt controller.
|
||||
|
||||
Example:
|
||||
|
||||
odmi: odmi@300000 {
|
||||
compatible = "marvell,ap806-odm-controller",
|
||||
"marvell,odmi-controller";
|
||||
interrupt-controller;
|
||||
msi-controller;
|
||||
marvell,odmi-frames = <4>;
|
||||
reg = <0x300000 0x4000>,
|
||||
<0x304000 0x4000>,
|
||||
<0x308000 0x4000>,
|
||||
<0x30C000 0x4000>;
|
||||
marvell,spi-base = <128>, <136>, <144>, <152>;
|
||||
};
|
@ -23,6 +23,12 @@ Optional properties:
|
||||
- mti,reserved-cpu-vectors : Specifies the list of CPU interrupt vectors
|
||||
to which the GIC may not route interrupts. Valid values are 2 - 7.
|
||||
This property is ignored if the CPU is started in EIC mode.
|
||||
- mti,reserved-ipi-vectors : Specifies the range of GIC interrupts that are
|
||||
reserved for IPIs.
|
||||
It accepts 2 values, the 1st is the starting interrupt and the 2nd is the size
|
||||
of the reserved range.
|
||||
If not specified, the driver will allocate the last 2 * number of VPEs in the
|
||||
system.
|
||||
|
||||
Required properties for timer sub-node:
|
||||
- compatible : Should be "mti,gic-timer".
|
||||
@ -44,6 +50,7 @@ Example:
|
||||
#interrupt-cells = <3>;
|
||||
|
||||
mti,reserved-cpu-vectors = <7>;
|
||||
mti,reserved-ipi-vectors = <40 8>;
|
||||
|
||||
timer {
|
||||
compatible = "mti,gic-timer";
|
||||
|
@ -0,0 +1,49 @@
|
||||
Sigma Designs SMP86xx/SMP87xx secondary interrupt controller
|
||||
|
||||
Required properties:
|
||||
- compatible: should be "sigma,smp8642-intc"
|
||||
- reg: physical address of MMIO region
|
||||
- ranges: address space mapping of child nodes
|
||||
- interrupt-parent: phandle of parent interrupt controller
|
||||
- interrupt-controller: boolean
|
||||
- #address-cells: should be <1>
|
||||
- #size-cells: should be <1>
|
||||
|
||||
One child node per control block with properties:
|
||||
- reg: address of registers for this control block
|
||||
- interrupt-controller: boolean
|
||||
- #interrupt-cells: should be <2>, interrupt index and flags per interrupts.txt
|
||||
- interrupts: interrupt spec of primary interrupt controller
|
||||
|
||||
Example:
|
||||
|
||||
interrupt-controller@6e000 {
|
||||
compatible = "sigma,smp8642-intc";
|
||||
reg = <0x6e000 0x400>;
|
||||
ranges = <0x0 0x6e000 0x400>;
|
||||
interrupt-parent = <&gic>;
|
||||
interrupt-controller;
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
|
||||
irq0: interrupt-controller@0 {
|
||||
reg = <0x000 0x100>;
|
||||
interrupt-controller;
|
||||
#interrupt-cells = <2>;
|
||||
interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
|
||||
};
|
||||
|
||||
irq1: interrupt-controller@100 {
|
||||
reg = <0x100 0x100>;
|
||||
interrupt-controller;
|
||||
#interrupt-cells = <2>;
|
||||
interrupts = <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>;
|
||||
};
|
||||
|
||||
irq2: interrupt-controller@300 {
|
||||
reg = <0x300 0x100>;
|
||||
interrupt-controller;
|
||||
#interrupt-cells = <2>;
|
||||
interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>;
|
||||
};
|
||||
};
|
@ -666,7 +666,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
||||
|
||||
clearcpuid=BITNUM [X86]
|
||||
Disable CPUID feature X for the kernel. See
|
||||
arch/x86/include/asm/cpufeature.h for the valid bit
|
||||
arch/x86/include/asm/cpufeatures.h for the valid bit
|
||||
numbers. Note the Linux specific bits are not necessarily
|
||||
stable over kernel options, but the vendor specific
|
||||
ones should be.
|
||||
@ -1687,6 +1687,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
||||
ip= [IP_PNP]
|
||||
See Documentation/filesystems/nfs/nfsroot.txt.
|
||||
|
||||
irqaffinity= [SMP] Set the default irq affinity mask
|
||||
Format:
|
||||
<cpu number>,...,<cpu number>
|
||||
or
|
||||
<cpu number>-<cpu number>
|
||||
(must be a positive range in ascending order)
|
||||
or a mixture
|
||||
<cpu number>,...,<cpu number>-<cpu number>
|
||||
|
||||
irqfixup [HW]
|
||||
When an interrupt is not handled search all handlers
|
||||
for it. Intended to get systems with badly broken
|
||||
@ -2566,6 +2575,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
||||
|
||||
nointroute [IA-64]
|
||||
|
||||
noinvpcid [X86] Disable the INVPCID cpu feature.
|
||||
|
||||
nojitter [IA-64] Disables jitter checking for ITC timers.
|
||||
|
||||
no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
|
||||
|
@ -277,13 +277,15 @@ int main(int argc, char *argv[])
|
||||
" %d external time stamp channels\n"
|
||||
" %d programmable periodic signals\n"
|
||||
" %d pulse per second\n"
|
||||
" %d programmable pins\n",
|
||||
" %d programmable pins\n"
|
||||
" %d cross timestamping\n",
|
||||
caps.max_adj,
|
||||
caps.n_alarm,
|
||||
caps.n_ext_ts,
|
||||
caps.n_per_out,
|
||||
caps.pps,
|
||||
caps.n_pins);
|
||||
caps.n_pins,
|
||||
caps.cross_timestamping);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -40,3 +40,28 @@ cp ../microcode.bin kernel/x86/microcode/GenuineIntel.bin (or AuthenticAMD.bin)
|
||||
find . | cpio -o -H newc >../ucode.cpio
|
||||
cd ..
|
||||
cat ucode.cpio /boot/initrd-3.5.0.img >/boot/initrd-3.5.0.ucode.img
|
||||
|
||||
Builtin microcode
|
||||
=================
|
||||
|
||||
We can also load builtin microcode supplied through the regular firmware
|
||||
builtin method CONFIG_FIRMWARE_IN_KERNEL. Here's an example:
|
||||
|
||||
CONFIG_FIRMWARE_IN_KERNEL=y
|
||||
CONFIG_EXTRA_FIRMWARE="intel-ucode/06-3a-09 amd-ucode/microcode_amd_fam15h.bin"
|
||||
CONFIG_EXTRA_FIRMWARE_DIR="/lib/firmware"
|
||||
|
||||
This basically means, you have the following tree structure locally:
|
||||
|
||||
/lib/firmware/
|
||||
|-- amd-ucode
|
||||
...
|
||||
| |-- microcode_amd_fam15h.bin
|
||||
...
|
||||
|-- intel-ucode
|
||||
...
|
||||
| |-- 06-3a-09
|
||||
...
|
||||
|
||||
so that the build system can find those files and integrate them into
|
||||
the final kernel image. The early loader finds them and applies them.
|
||||
|
@ -60,6 +60,8 @@ Machine check
|
||||
threshold to 1. Enabling this may make memory predictive failure
|
||||
analysis less effective if the bios sets thresholds for memory
|
||||
errors since we will not see details for all errors.
|
||||
mce=recovery
|
||||
Force-enable recoverable machine check code paths
|
||||
|
||||
nomce (for compatibility with i386): same as mce=off
|
||||
|
||||
|
@ -2422,6 +2422,7 @@ F: arch/mips/bmips/*
|
||||
F: arch/mips/include/asm/mach-bmips/*
|
||||
F: arch/mips/kernel/*bmips*
|
||||
F: arch/mips/boot/dts/brcm/bcm*.dts*
|
||||
F: drivers/irqchip/irq-bcm63*
|
||||
F: drivers/irqchip/irq-bcm7*
|
||||
F: drivers/irqchip/irq-brcmstb*
|
||||
F: include/linux/bcm963xx_nvram.h
|
||||
|
@ -168,7 +168,7 @@ smp_callin(void)
|
||||
cpuid, current, current->active_mm));
|
||||
|
||||
preempt_disable();
|
||||
cpu_startup_entry(CPUHP_ONLINE);
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
}
|
||||
|
||||
/* Wait until hwrpb->txrdy is clear for cpu. Return -1 on timeout. */
|
||||
|
@ -142,7 +142,7 @@ void start_kernel_secondary(void)
|
||||
|
||||
local_irq_enable();
|
||||
preempt_disable();
|
||||
cpu_startup_entry(CPUHP_ONLINE);
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -409,7 +409,7 @@ asmlinkage void secondary_start_kernel(void)
|
||||
/*
|
||||
* OK, it's off to the idle thread for us
|
||||
*/
|
||||
cpu_startup_entry(CPUHP_ONLINE);
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
}
|
||||
|
||||
void __init smp_cpus_done(unsigned int max_cpus)
|
||||
|
@ -3,7 +3,6 @@ menuconfig ARCH_MVEBU
|
||||
depends on ARCH_MULTI_V7 || ARCH_MULTI_V5
|
||||
select ARCH_SUPPORTS_BIG_ENDIAN
|
||||
select CLKSRC_MMIO
|
||||
select GENERIC_IRQ_CHIP
|
||||
select PINCTRL
|
||||
select PLAT_ORION
|
||||
select SOC_BUS
|
||||
@ -29,6 +28,7 @@ config MACH_ARMADA_370
|
||||
bool "Marvell Armada 370 boards"
|
||||
depends on ARCH_MULTI_V7
|
||||
select ARMADA_370_CLK
|
||||
select ARMADA_370_XP_IRQ
|
||||
select CPU_PJ4B
|
||||
select MACH_MVEBU_V7
|
||||
select PINCTRL_ARMADA_370
|
||||
@ -39,6 +39,7 @@ config MACH_ARMADA_370
|
||||
config MACH_ARMADA_375
|
||||
bool "Marvell Armada 375 boards"
|
||||
depends on ARCH_MULTI_V7
|
||||
select ARMADA_370_XP_IRQ
|
||||
select ARM_ERRATA_720789
|
||||
select ARM_ERRATA_753970
|
||||
select ARM_GIC
|
||||
@ -58,6 +59,7 @@ config MACH_ARMADA_38X
|
||||
select ARM_ERRATA_720789
|
||||
select ARM_ERRATA_753970
|
||||
select ARM_GIC
|
||||
select ARMADA_370_XP_IRQ
|
||||
select ARMADA_38X_CLK
|
||||
select HAVE_ARM_SCU
|
||||
select HAVE_ARM_TWD if SMP
|
||||
@ -72,6 +74,7 @@ config MACH_ARMADA_39X
|
||||
bool "Marvell Armada 39x boards"
|
||||
depends on ARCH_MULTI_V7
|
||||
select ARM_GIC
|
||||
select ARMADA_370_XP_IRQ
|
||||
select ARMADA_39X_CLK
|
||||
select CACHE_L2X0
|
||||
select HAVE_ARM_SCU
|
||||
@ -86,6 +89,7 @@ config MACH_ARMADA_39X
|
||||
config MACH_ARMADA_XP
|
||||
bool "Marvell Armada XP boards"
|
||||
depends on ARCH_MULTI_V7
|
||||
select ARMADA_370_XP_IRQ
|
||||
select ARMADA_XP_CLK
|
||||
select CPU_PJ4B
|
||||
select MACH_MVEBU_V7
|
||||
|
@ -195,7 +195,7 @@ asmlinkage void secondary_start_kernel(void)
|
||||
/*
|
||||
* OK, it's off to the idle thread for us
|
||||
*/
|
||||
cpu_startup_entry(CPUHP_ONLINE);
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
|
@ -333,7 +333,7 @@ void secondary_start_kernel(void)
|
||||
|
||||
/* We are done with local CPU inits, unblock the boot CPU. */
|
||||
set_cpu_online(cpu, true);
|
||||
cpu_startup_entry(CPUHP_ONLINE);
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
}
|
||||
|
||||
void __init smp_prepare_boot_cpu(void)
|
||||
|
@ -180,7 +180,7 @@ void start_secondary(void)
|
||||
|
||||
local_irq_enable();
|
||||
|
||||
cpu_startup_entry(CPUHP_ONLINE);
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
}
|
||||
|
||||
|
||||
|
@ -454,7 +454,7 @@ start_secondary (void *unused)
|
||||
preempt_disable();
|
||||
smp_callin();
|
||||
|
||||
cpu_startup_entry(CPUHP_ONLINE);
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -432,7 +432,7 @@ int __init start_secondary(void *unused)
|
||||
*/
|
||||
local_flush_tlb_all();
|
||||
|
||||
cpu_startup_entry(CPUHP_ONLINE);
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -396,7 +396,7 @@ asmlinkage void secondary_start_kernel(void)
|
||||
/*
|
||||
* OK, it's off to the idle thread for us
|
||||
*/
|
||||
cpu_startup_entry(CPUHP_ONLINE);
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
}
|
||||
|
||||
void __init smp_cpus_done(unsigned int max_cpus)
|
||||
|
@ -151,6 +151,7 @@ config BMIPS_GENERIC
|
||||
select CSRC_R4K
|
||||
select SYNC_R4K
|
||||
select COMMON_CLK
|
||||
select BCM6345_L1_IRQ
|
||||
select BCM7038_L1_IRQ
|
||||
select BCM7120_L2_IRQ
|
||||
select BRCMSTB_L2_IRQ
|
||||
@ -2169,7 +2170,6 @@ config MIPS_MT_SMP
|
||||
select CPU_MIPSR2_IRQ_VI
|
||||
select CPU_MIPSR2_IRQ_EI
|
||||
select SYNC_R4K
|
||||
select MIPS_GIC_IPI if MIPS_GIC
|
||||
select MIPS_MT
|
||||
select SMP
|
||||
select SMP_UP
|
||||
@ -2267,7 +2267,6 @@ config MIPS_VPE_APSP_API_MT
|
||||
config MIPS_CMP
|
||||
bool "MIPS CMP framework support (DEPRECATED)"
|
||||
depends on SYS_SUPPORTS_MIPS_CMP && !CPU_MIPSR6
|
||||
select MIPS_GIC_IPI if MIPS_GIC
|
||||
select SMP
|
||||
select SYNC_R4K
|
||||
select SYS_SUPPORTS_SMP
|
||||
@ -2287,7 +2286,6 @@ config MIPS_CPS
|
||||
select MIPS_CM
|
||||
select MIPS_CPC
|
||||
select MIPS_CPS_PM if HOTPLUG_CPU
|
||||
select MIPS_GIC_IPI if MIPS_GIC
|
||||
select SMP
|
||||
select SYNC_R4K if (CEVT_R4K || CSRC_R4K)
|
||||
select SYS_SUPPORTS_HOTPLUG_CPU
|
||||
@ -2305,10 +2303,6 @@ config MIPS_CPS_PM
|
||||
select MIPS_CPC
|
||||
bool
|
||||
|
||||
config MIPS_GIC_IPI
|
||||
depends on MIPS_GIC
|
||||
bool
|
||||
|
||||
config MIPS_CM
|
||||
bool
|
||||
|
||||
|
@ -26,90 +26,6 @@
|
||||
#include "common.h"
|
||||
#include "machtypes.h"
|
||||
|
||||
static void __init ath79_misc_intc_domain_init(
|
||||
struct device_node *node, int irq);
|
||||
|
||||
static void ath79_misc_irq_handler(struct irq_desc *desc)
|
||||
{
|
||||
struct irq_domain *domain = irq_desc_get_handler_data(desc);
|
||||
void __iomem *base = domain->host_data;
|
||||
u32 pending;
|
||||
|
||||
pending = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS) &
|
||||
__raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
|
||||
|
||||
if (!pending) {
|
||||
spurious_interrupt();
|
||||
return;
|
||||
}
|
||||
|
||||
while (pending) {
|
||||
int bit = __ffs(pending);
|
||||
|
||||
generic_handle_irq(irq_linear_revmap(domain, bit));
|
||||
pending &= ~BIT(bit);
|
||||
}
|
||||
}
|
||||
|
||||
static void ar71xx_misc_irq_unmask(struct irq_data *d)
|
||||
{
|
||||
void __iomem *base = irq_data_get_irq_chip_data(d);
|
||||
unsigned int irq = d->hwirq;
|
||||
u32 t;
|
||||
|
||||
t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
|
||||
__raw_writel(t | (1 << irq), base + AR71XX_RESET_REG_MISC_INT_ENABLE);
|
||||
|
||||
/* flush write */
|
||||
__raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
|
||||
}
|
||||
|
||||
static void ar71xx_misc_irq_mask(struct irq_data *d)
|
||||
{
|
||||
void __iomem *base = irq_data_get_irq_chip_data(d);
|
||||
unsigned int irq = d->hwirq;
|
||||
u32 t;
|
||||
|
||||
t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
|
||||
__raw_writel(t & ~(1 << irq), base + AR71XX_RESET_REG_MISC_INT_ENABLE);
|
||||
|
||||
/* flush write */
|
||||
__raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
|
||||
}
|
||||
|
||||
static void ar724x_misc_irq_ack(struct irq_data *d)
|
||||
{
|
||||
void __iomem *base = irq_data_get_irq_chip_data(d);
|
||||
unsigned int irq = d->hwirq;
|
||||
u32 t;
|
||||
|
||||
t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS);
|
||||
__raw_writel(t & ~(1 << irq), base + AR71XX_RESET_REG_MISC_INT_STATUS);
|
||||
|
||||
/* flush write */
|
||||
__raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS);
|
||||
}
|
||||
|
||||
static struct irq_chip ath79_misc_irq_chip = {
|
||||
.name = "MISC",
|
||||
.irq_unmask = ar71xx_misc_irq_unmask,
|
||||
.irq_mask = ar71xx_misc_irq_mask,
|
||||
};
|
||||
|
||||
static void __init ath79_misc_irq_init(void)
|
||||
{
|
||||
if (soc_is_ar71xx() || soc_is_ar913x())
|
||||
ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask;
|
||||
else if (soc_is_ar724x() ||
|
||||
soc_is_ar933x() ||
|
||||
soc_is_ar934x() ||
|
||||
soc_is_qca955x())
|
||||
ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack;
|
||||
else
|
||||
BUG();
|
||||
|
||||
ath79_misc_intc_domain_init(NULL, ATH79_CPU_IRQ(6));
|
||||
}
|
||||
|
||||
static void ar934x_ip2_irq_dispatch(struct irq_desc *desc)
|
||||
{
|
||||
@ -212,142 +128,12 @@ static void qca955x_irq_init(void)
|
||||
irq_set_chained_handler(ATH79_CPU_IRQ(3), qca955x_ip3_irq_dispatch);
|
||||
}
|
||||
|
||||
/*
|
||||
* The IP2/IP3 lines are tied to a PCI/WMAC/USB device. Drivers for
|
||||
* these devices typically allocate coherent DMA memory, however the
|
||||
* DMA controller may still have some unsynchronized data in the FIFO.
|
||||
* Issue a flush in the handlers to ensure that the driver sees
|
||||
* the update.
|
||||
*
|
||||
* This array map the interrupt lines to the DDR write buffer channels.
|
||||
*/
|
||||
|
||||
static unsigned irq_wb_chan[8] = {
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
};
|
||||
|
||||
asmlinkage void plat_irq_dispatch(void)
|
||||
{
|
||||
unsigned long pending;
|
||||
int irq;
|
||||
|
||||
pending = read_c0_status() & read_c0_cause() & ST0_IM;
|
||||
|
||||
if (!pending) {
|
||||
spurious_interrupt();
|
||||
return;
|
||||
}
|
||||
|
||||
pending >>= CAUSEB_IP;
|
||||
while (pending) {
|
||||
irq = fls(pending) - 1;
|
||||
if (irq < ARRAY_SIZE(irq_wb_chan) && irq_wb_chan[irq] != -1)
|
||||
ath79_ddr_wb_flush(irq_wb_chan[irq]);
|
||||
do_IRQ(MIPS_CPU_IRQ_BASE + irq);
|
||||
pending &= ~BIT(irq);
|
||||
}
|
||||
}
|
||||
|
||||
static int misc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
|
||||
{
|
||||
irq_set_chip_and_handler(irq, &ath79_misc_irq_chip, handle_level_irq);
|
||||
irq_set_chip_data(irq, d->host_data);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct irq_domain_ops misc_irq_domain_ops = {
|
||||
.xlate = irq_domain_xlate_onecell,
|
||||
.map = misc_map,
|
||||
};
|
||||
|
||||
static void __init ath79_misc_intc_domain_init(
|
||||
struct device_node *node, int irq)
|
||||
{
|
||||
void __iomem *base = ath79_reset_base;
|
||||
struct irq_domain *domain;
|
||||
|
||||
domain = irq_domain_add_legacy(node, ATH79_MISC_IRQ_COUNT,
|
||||
ATH79_MISC_IRQ_BASE, 0, &misc_irq_domain_ops, base);
|
||||
if (!domain)
|
||||
panic("Failed to add MISC irqdomain");
|
||||
|
||||
/* Disable and clear all interrupts */
|
||||
__raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE);
|
||||
__raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS);
|
||||
|
||||
irq_set_chained_handler_and_data(irq, ath79_misc_irq_handler, domain);
|
||||
}
|
||||
|
||||
static int __init ath79_misc_intc_of_init(
|
||||
struct device_node *node, struct device_node *parent)
|
||||
{
|
||||
int irq;
|
||||
|
||||
irq = irq_of_parse_and_map(node, 0);
|
||||
if (!irq)
|
||||
panic("Failed to get MISC IRQ");
|
||||
|
||||
ath79_misc_intc_domain_init(node, irq);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init ar7100_misc_intc_of_init(
|
||||
struct device_node *node, struct device_node *parent)
|
||||
{
|
||||
ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask;
|
||||
return ath79_misc_intc_of_init(node, parent);
|
||||
}
|
||||
|
||||
IRQCHIP_DECLARE(ar7100_misc_intc, "qca,ar7100-misc-intc",
|
||||
ar7100_misc_intc_of_init);
|
||||
|
||||
static int __init ar7240_misc_intc_of_init(
|
||||
struct device_node *node, struct device_node *parent)
|
||||
{
|
||||
ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack;
|
||||
return ath79_misc_intc_of_init(node, parent);
|
||||
}
|
||||
|
||||
IRQCHIP_DECLARE(ar7240_misc_intc, "qca,ar7240-misc-intc",
|
||||
ar7240_misc_intc_of_init);
|
||||
|
||||
static int __init ar79_cpu_intc_of_init(
|
||||
struct device_node *node, struct device_node *parent)
|
||||
{
|
||||
int err, i, count;
|
||||
|
||||
/* Fill the irq_wb_chan table */
|
||||
count = of_count_phandle_with_args(
|
||||
node, "qca,ddr-wb-channels", "#qca,ddr-wb-channel-cells");
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
struct of_phandle_args args;
|
||||
u32 irq = i;
|
||||
|
||||
of_property_read_u32_index(
|
||||
node, "qca,ddr-wb-channel-interrupts", i, &irq);
|
||||
if (irq >= ARRAY_SIZE(irq_wb_chan))
|
||||
continue;
|
||||
|
||||
err = of_parse_phandle_with_args(
|
||||
node, "qca,ddr-wb-channels",
|
||||
"#qca,ddr-wb-channel-cells",
|
||||
i, &args);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
irq_wb_chan[irq] = args.args[0];
|
||||
pr_info("IRQ: Set flush channel of IRQ%d to %d\n",
|
||||
irq, args.args[0]);
|
||||
}
|
||||
|
||||
return mips_cpu_irq_of_init(node, parent);
|
||||
}
|
||||
IRQCHIP_DECLARE(ar79_cpu_intc, "qca,ar7100-cpu-intc",
|
||||
ar79_cpu_intc_of_init);
|
||||
|
||||
void __init arch_init_irq(void)
|
||||
{
|
||||
unsigned irq_wb_chan2 = -1;
|
||||
unsigned irq_wb_chan3 = -1;
|
||||
bool misc_is_ar71xx;
|
||||
|
||||
if (mips_machtype == ATH79_MACH_GENERIC_OF) {
|
||||
irqchip_init();
|
||||
return;
|
||||
@ -355,14 +141,26 @@ void __init arch_init_irq(void)
|
||||
|
||||
if (soc_is_ar71xx() || soc_is_ar724x() ||
|
||||
soc_is_ar913x() || soc_is_ar933x()) {
|
||||
irq_wb_chan[2] = 3;
|
||||
irq_wb_chan[3] = 2;
|
||||
irq_wb_chan2 = 3;
|
||||
irq_wb_chan3 = 2;
|
||||
} else if (soc_is_ar934x()) {
|
||||
irq_wb_chan[3] = 2;
|
||||
irq_wb_chan3 = 2;
|
||||
}
|
||||
|
||||
mips_cpu_irq_init();
|
||||
ath79_misc_irq_init();
|
||||
ath79_cpu_irq_init(irq_wb_chan2, irq_wb_chan3);
|
||||
|
||||
if (soc_is_ar71xx() || soc_is_ar913x())
|
||||
misc_is_ar71xx = true;
|
||||
else if (soc_is_ar724x() ||
|
||||
soc_is_ar933x() ||
|
||||
soc_is_ar934x() ||
|
||||
soc_is_qca955x())
|
||||
misc_is_ar71xx = false;
|
||||
else
|
||||
BUG();
|
||||
ath79_misc_irq_init(
|
||||
ath79_reset_base + AR71XX_RESET_REG_MISC_INT_STATUS,
|
||||
ATH79_CPU_IRQ(6), ATH79_MISC_IRQ_BASE, misc_is_ar71xx);
|
||||
|
||||
if (soc_is_ar934x())
|
||||
ar934x_ip2_irq_init();
|
||||
|
@ -15,6 +15,12 @@
|
||||
#include <asm/irq_cpu.h>
|
||||
#include <asm/time.h>
|
||||
|
||||
static const struct of_device_id smp_intc_dt_match[] = {
|
||||
{ .compatible = "brcm,bcm7038-l1-intc" },
|
||||
{ .compatible = "brcm,bcm6345-l1-intc" },
|
||||
{}
|
||||
};
|
||||
|
||||
unsigned int get_c0_compare_int(void)
|
||||
{
|
||||
return CP0_LEGACY_COMPARE_IRQ;
|
||||
@ -24,8 +30,8 @@ void __init arch_init_irq(void)
|
||||
{
|
||||
struct device_node *dn;
|
||||
|
||||
/* Only the STB (bcm7038) controller supports SMP IRQ affinity */
|
||||
dn = of_find_compatible_node(NULL, NULL, "brcm,bcm7038-l1-intc");
|
||||
/* Only these controllers support SMP IRQ affinity */
|
||||
dn = of_find_matching_node(NULL, smp_intc_dt_match);
|
||||
if (dn)
|
||||
of_node_put(dn);
|
||||
else
|
||||
|
@ -144,4 +144,8 @@ static inline u32 ath79_reset_rr(unsigned reg)
|
||||
void ath79_device_reset_set(u32 mask);
|
||||
void ath79_device_reset_clear(u32 mask);
|
||||
|
||||
void ath79_cpu_irq_init(unsigned irq_wb_chan2, unsigned irq_wb_chan3);
|
||||
void ath79_misc_irq_init(void __iomem *regs, int irq,
|
||||
int irq_base, bool is_ar71xx);
|
||||
|
||||
#endif /* __ASM_MACH_ATH79_H */
|
||||
|
@ -44,8 +44,9 @@ static inline void plat_smp_setup(void)
|
||||
mp_ops->smp_setup();
|
||||
}
|
||||
|
||||
extern void gic_send_ipi_single(int cpu, unsigned int action);
|
||||
extern void gic_send_ipi_mask(const struct cpumask *mask, unsigned int action);
|
||||
extern void mips_smp_send_ipi_single(int cpu, unsigned int action);
|
||||
extern void mips_smp_send_ipi_mask(const struct cpumask *mask,
|
||||
unsigned int action);
|
||||
|
||||
#else /* !CONFIG_SMP */
|
||||
|
||||
|
@ -52,7 +52,6 @@ obj-$(CONFIG_MIPS_MT_SMP) += smp-mt.o
|
||||
obj-$(CONFIG_MIPS_CMP) += smp-cmp.o
|
||||
obj-$(CONFIG_MIPS_CPS) += smp-cps.o cps-vec.o
|
||||
obj-$(CONFIG_MIPS_CPS_NS16550) += cps-vec-ns16550.o
|
||||
obj-$(CONFIG_MIPS_GIC_IPI) += smp-gic.o
|
||||
obj-$(CONFIG_MIPS_SPRAM) += spram.o
|
||||
|
||||
obj-$(CONFIG_MIPS_VPE_LOADER) += vpe.o
|
||||
|
@ -149,8 +149,8 @@ void __init cmp_prepare_cpus(unsigned int max_cpus)
|
||||
}
|
||||
|
||||
struct plat_smp_ops cmp_smp_ops = {
|
||||
.send_ipi_single = gic_send_ipi_single,
|
||||
.send_ipi_mask = gic_send_ipi_mask,
|
||||
.send_ipi_single = mips_smp_send_ipi_single,
|
||||
.send_ipi_mask = mips_smp_send_ipi_mask,
|
||||
.init_secondary = cmp_init_secondary,
|
||||
.smp_finish = cmp_smp_finish,
|
||||
.boot_secondary = cmp_boot_secondary,
|
||||
|
@ -472,8 +472,8 @@ static struct plat_smp_ops cps_smp_ops = {
|
||||
.boot_secondary = cps_boot_secondary,
|
||||
.init_secondary = cps_init_secondary,
|
||||
.smp_finish = cps_smp_finish,
|
||||
.send_ipi_single = gic_send_ipi_single,
|
||||
.send_ipi_mask = gic_send_ipi_mask,
|
||||
.send_ipi_single = mips_smp_send_ipi_single,
|
||||
.send_ipi_mask = mips_smp_send_ipi_mask,
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
.cpu_disable = cps_cpu_disable,
|
||||
.cpu_die = cps_cpu_die,
|
||||
|
@ -121,7 +121,7 @@ static void vsmp_send_ipi_single(int cpu, unsigned int action)
|
||||
|
||||
#ifdef CONFIG_MIPS_GIC
|
||||
if (gic_present) {
|
||||
gic_send_ipi_single(cpu, action);
|
||||
mips_smp_send_ipi_single(cpu, action);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
@ -33,12 +33,16 @@
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/irqdomain.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/of_irq.h>
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <asm/cpu.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/idle.h>
|
||||
#include <asm/r4k-timer.h>
|
||||
#include <asm/mips-cpc.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/time.h>
|
||||
#include <asm/setup.h>
|
||||
@ -79,6 +83,11 @@ static cpumask_t cpu_core_setup_map;
|
||||
|
||||
cpumask_t cpu_coherent_mask;
|
||||
|
||||
#ifdef CONFIG_GENERIC_IRQ_IPI
|
||||
static struct irq_desc *call_desc;
|
||||
static struct irq_desc *sched_desc;
|
||||
#endif
|
||||
|
||||
static inline void set_cpu_sibling_map(int cpu)
|
||||
{
|
||||
int i;
|
||||
@ -146,6 +155,133 @@ void register_smp_ops(struct plat_smp_ops *ops)
|
||||
mp_ops = ops;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_GENERIC_IRQ_IPI
|
||||
void mips_smp_send_ipi_single(int cpu, unsigned int action)
|
||||
{
|
||||
mips_smp_send_ipi_mask(cpumask_of(cpu), action);
|
||||
}
|
||||
|
||||
void mips_smp_send_ipi_mask(const struct cpumask *mask, unsigned int action)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned int core;
|
||||
int cpu;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
switch (action) {
|
||||
case SMP_CALL_FUNCTION:
|
||||
__ipi_send_mask(call_desc, mask);
|
||||
break;
|
||||
|
||||
case SMP_RESCHEDULE_YOURSELF:
|
||||
__ipi_send_mask(sched_desc, mask);
|
||||
break;
|
||||
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (mips_cpc_present()) {
|
||||
for_each_cpu(cpu, mask) {
|
||||
core = cpu_data[cpu].core;
|
||||
|
||||
if (core == current_cpu_data.core)
|
||||
continue;
|
||||
|
||||
while (!cpumask_test_cpu(cpu, &cpu_coherent_mask)) {
|
||||
mips_cpc_lock_other(core);
|
||||
write_cpc_co_cmd(CPC_Cx_CMD_PWRUP);
|
||||
mips_cpc_unlock_other();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
|
||||
static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
|
||||
{
|
||||
scheduler_ipi();
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static irqreturn_t ipi_call_interrupt(int irq, void *dev_id)
|
||||
{
|
||||
generic_smp_call_function_interrupt();
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static struct irqaction irq_resched = {
|
||||
.handler = ipi_resched_interrupt,
|
||||
.flags = IRQF_PERCPU,
|
||||
.name = "IPI resched"
|
||||
};
|
||||
|
||||
static struct irqaction irq_call = {
|
||||
.handler = ipi_call_interrupt,
|
||||
.flags = IRQF_PERCPU,
|
||||
.name = "IPI call"
|
||||
};
|
||||
|
||||
static __init void smp_ipi_init_one(unsigned int virq,
|
||||
struct irqaction *action)
|
||||
{
|
||||
int ret;
|
||||
|
||||
irq_set_handler(virq, handle_percpu_irq);
|
||||
ret = setup_irq(virq, action);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
static int __init mips_smp_ipi_init(void)
|
||||
{
|
||||
unsigned int call_virq, sched_virq;
|
||||
struct irq_domain *ipidomain;
|
||||
struct device_node *node;
|
||||
|
||||
node = of_irq_find_parent(of_root);
|
||||
ipidomain = irq_find_matching_host(node, DOMAIN_BUS_IPI);
|
||||
|
||||
/*
|
||||
* Some platforms have half DT setup. So if we found irq node but
|
||||
* didn't find an ipidomain, try to search for one that is not in the
|
||||
* DT.
|
||||
*/
|
||||
if (node && !ipidomain)
|
||||
ipidomain = irq_find_matching_host(NULL, DOMAIN_BUS_IPI);
|
||||
|
||||
BUG_ON(!ipidomain);
|
||||
|
||||
call_virq = irq_reserve_ipi(ipidomain, cpu_possible_mask);
|
||||
BUG_ON(!call_virq);
|
||||
|
||||
sched_virq = irq_reserve_ipi(ipidomain, cpu_possible_mask);
|
||||
BUG_ON(!sched_virq);
|
||||
|
||||
if (irq_domain_is_ipi_per_cpu(ipidomain)) {
|
||||
int cpu;
|
||||
|
||||
for_each_cpu(cpu, cpu_possible_mask) {
|
||||
smp_ipi_init_one(call_virq + cpu, &irq_call);
|
||||
smp_ipi_init_one(sched_virq + cpu, &irq_resched);
|
||||
}
|
||||
} else {
|
||||
smp_ipi_init_one(call_virq, &irq_call);
|
||||
smp_ipi_init_one(sched_virq, &irq_resched);
|
||||
}
|
||||
|
||||
call_desc = irq_to_desc(call_virq);
|
||||
sched_desc = irq_to_desc(sched_virq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
early_initcall(mips_smp_ipi_init);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* First C code run on the secondary CPUs after being started up by
|
||||
* the master.
|
||||
@ -192,7 +328,7 @@ asmlinkage void start_secondary(void)
|
||||
WARN_ON_ONCE(!irqs_disabled());
|
||||
mp_ops->smp_finish();
|
||||
|
||||
cpu_startup_entry(CPUHP_ONLINE);
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
}
|
||||
|
||||
static void stop_this_cpu(void *dummy)
|
||||
|
@ -675,7 +675,7 @@ int __init start_secondary(void *unused)
|
||||
#ifdef CONFIG_GENERIC_CLOCKEVENTS
|
||||
init_clockevents();
|
||||
#endif
|
||||
cpu_startup_entry(CPUHP_ONLINE);
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -305,7 +305,7 @@ void __init smp_callin(void)
|
||||
|
||||
local_irq_enable(); /* Interrupts have been off until now */
|
||||
|
||||
cpu_startup_entry(CPUHP_ONLINE);
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
|
||||
/* NOTREACHED */
|
||||
panic("smp_callin() AAAAaaaaahhhh....\n");
|
||||
|
@ -727,7 +727,7 @@ void start_secondary(void *unused)
|
||||
|
||||
local_irq_enable();
|
||||
|
||||
cpu_startup_entry(CPUHP_ONLINE);
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
|
||||
BUG();
|
||||
}
|
||||
|
@ -798,7 +798,7 @@ static void smp_start_secondary(void *cpuvoid)
|
||||
set_cpu_online(smp_processor_id(), true);
|
||||
inc_irq_stat(CPU_RST);
|
||||
local_irq_enable();
|
||||
cpu_startup_entry(CPUHP_ONLINE);
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
}
|
||||
|
||||
/* Upping and downing of CPUs */
|
||||
|
@ -203,7 +203,7 @@ asmlinkage void start_secondary(void)
|
||||
set_cpu_online(cpu, true);
|
||||
per_cpu(cpu_state, cpu) = CPU_ONLINE;
|
||||
|
||||
cpu_startup_entry(CPUHP_ONLINE);
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
}
|
||||
|
||||
extern struct {
|
||||
|
@ -364,7 +364,7 @@ static void sparc_start_secondary(void *arg)
|
||||
local_irq_enable();
|
||||
|
||||
wmb();
|
||||
cpu_startup_entry(CPUHP_ONLINE);
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
|
||||
/* We should never reach here! */
|
||||
BUG();
|
||||
|
@ -134,7 +134,7 @@ void smp_callin(void)
|
||||
|
||||
local_irq_enable();
|
||||
|
||||
cpu_startup_entry(CPUHP_ONLINE);
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
}
|
||||
|
||||
void cpu_panic(void)
|
||||
|
@ -208,7 +208,7 @@ void online_secondary(void)
|
||||
/* Set up tile-timer clock-event device on this cpu */
|
||||
setup_tile_timer();
|
||||
|
||||
cpu_startup_entry(CPUHP_ONLINE);
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
}
|
||||
|
||||
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
|
||||
|
@ -1163,22 +1163,23 @@ config MICROCODE
|
||||
bool "CPU microcode loading support"
|
||||
default y
|
||||
depends on CPU_SUP_AMD || CPU_SUP_INTEL
|
||||
depends on BLK_DEV_INITRD
|
||||
select FW_LOADER
|
||||
---help---
|
||||
|
||||
If you say Y here, you will be able to update the microcode on
|
||||
certain Intel and AMD processors. The Intel support is for the
|
||||
IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4,
|
||||
Xeon etc. The AMD support is for families 0x10 and later. You will
|
||||
obviously need the actual microcode binary data itself which is not
|
||||
shipped with the Linux kernel.
|
||||
Intel and AMD processors. The Intel support is for the IA32 family,
|
||||
e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, Xeon etc. The
|
||||
AMD support is for families 0x10 and later. You will obviously need
|
||||
the actual microcode binary data itself which is not shipped with
|
||||
the Linux kernel.
|
||||
|
||||
This option selects the general module only, you need to select
|
||||
at least one vendor specific module as well.
|
||||
The preferred method to load microcode from a detached initrd is described
|
||||
in Documentation/x86/early-microcode.txt. For that you need to enable
|
||||
CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the
|
||||
initrd for microcode blobs.
|
||||
|
||||
To compile this driver as a module, choose M here: the module
|
||||
will be called microcode.
|
||||
In addition, you can build-in the microcode into the kernel. For that you
|
||||
need to enable FIRMWARE_IN_KERNEL and add the vendor-supplied microcode
|
||||
to the CONFIG_EXTRA_FIRMWARE config option.
|
||||
|
||||
config MICROCODE_INTEL
|
||||
bool "Intel microcode loading support"
|
||||
|
@ -338,16 +338,6 @@ config DEBUG_IMR_SELFTEST
|
||||
|
||||
If unsure say N here.
|
||||
|
||||
config X86_DEBUG_STATIC_CPU_HAS
|
||||
bool "Debug alternatives"
|
||||
depends on DEBUG_KERNEL
|
||||
---help---
|
||||
This option causes additional code to be generated which
|
||||
fails if static_cpu_has() is used before alternatives have
|
||||
run.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config X86_DEBUG_FPU
|
||||
bool "Debug the x86 FPU code"
|
||||
depends on DEBUG_KERNEL
|
||||
|
@ -1,7 +1,7 @@
|
||||
#ifndef BOOT_CPUFLAGS_H
|
||||
#define BOOT_CPUFLAGS_H
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/processor-flags.h>
|
||||
|
||||
struct cpu_features {
|
||||
|
@ -17,7 +17,7 @@
|
||||
|
||||
#include "../include/asm/required-features.h"
|
||||
#include "../include/asm/disabled-features.h"
|
||||
#include "../include/asm/cpufeature.h"
|
||||
#include "../include/asm/cpufeatures.h"
|
||||
#include "../kernel/cpu/capflags.c"
|
||||
|
||||
int main(void)
|
||||
|
@ -49,7 +49,6 @@ typedef unsigned int u32;
|
||||
|
||||
/* This must be large enough to hold the entire setup */
|
||||
u8 buf[SETUP_SECT_MAX*512];
|
||||
int is_big_kernel;
|
||||
|
||||
#define PECOFF_RELOC_RESERVE 0x20
|
||||
|
||||
|
@ -288,7 +288,7 @@ CONFIG_NLS_ISO8859_1=y
|
||||
CONFIG_NLS_UTF8=y
|
||||
CONFIG_PRINTK_TIME=y
|
||||
# CONFIG_ENABLE_WARN_DEPRECATED is not set
|
||||
CONFIG_FRAME_WARN=2048
|
||||
CONFIG_FRAME_WARN=1024
|
||||
CONFIG_MAGIC_SYSRQ=y
|
||||
# CONFIG_UNUSED_SYMBOLS is not set
|
||||
CONFIG_DEBUG_KERNEL=y
|
||||
|
@ -33,7 +33,7 @@
|
||||
#include <linux/crc32.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include <asm/fpu/api.h>
|
||||
|
||||
|
@ -30,7 +30,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include <asm/fpu/internal.h>
|
||||
|
||||
|
@ -30,7 +30,7 @@
|
||||
#include <linux/string.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
|
||||
asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
|
||||
|
@ -201,37 +201,6 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
.byte 0xf1
|
||||
.endm
|
||||
|
||||
#else /* CONFIG_X86_64 */
|
||||
|
||||
/*
|
||||
* For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These
|
||||
* are different from the entry_32.S versions in not changing the segment
|
||||
* registers. So only suitable for in kernel use, not when transitioning
|
||||
* from or to user space. The resulting stack frame is not a standard
|
||||
* pt_regs frame. The main use case is calling C code from assembler
|
||||
* when all the registers need to be preserved.
|
||||
*/
|
||||
|
||||
.macro SAVE_ALL
|
||||
pushl %eax
|
||||
pushl %ebp
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
pushl %edx
|
||||
pushl %ecx
|
||||
pushl %ebx
|
||||
.endm
|
||||
|
||||
.macro RESTORE_ALL
|
||||
popl %ebx
|
||||
popl %ecx
|
||||
popl %edx
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebp
|
||||
popl %eax
|
||||
.endm
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
/*
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include <asm/traps.h>
|
||||
#include <asm/vdso.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/syscalls.h>
|
||||
@ -44,6 +45,8 @@ __visible void enter_from_user_mode(void)
|
||||
CT_WARN_ON(ct_state() != CONTEXT_USER);
|
||||
user_exit();
|
||||
}
|
||||
#else
|
||||
static inline void enter_from_user_mode(void) {}
|
||||
#endif
|
||||
|
||||
static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
|
||||
@ -84,17 +87,6 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
|
||||
|
||||
work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
/*
|
||||
* If TIF_NOHZ is set, we are required to call user_exit() before
|
||||
* doing anything that could touch RCU.
|
||||
*/
|
||||
if (work & _TIF_NOHZ) {
|
||||
enter_from_user_mode();
|
||||
work &= ~_TIF_NOHZ;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SECCOMP
|
||||
/*
|
||||
* Do seccomp first -- it should minimize exposure of other
|
||||
@ -171,16 +163,6 @@ long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
|
||||
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
|
||||
BUG_ON(regs != task_pt_regs(current));
|
||||
|
||||
/*
|
||||
* If we stepped into a sysenter/syscall insn, it trapped in
|
||||
* kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
|
||||
* If user-mode had set TF itself, then it's still clear from
|
||||
* do_debug() and we need to set it again to restore the user
|
||||
* state. If we entered on the slow path, TF was already set.
|
||||
*/
|
||||
if (work & _TIF_SINGLESTEP)
|
||||
regs->flags |= X86_EFLAGS_TF;
|
||||
|
||||
#ifdef CONFIG_SECCOMP
|
||||
/*
|
||||
* Call seccomp_phase2 before running the other hooks so that
|
||||
@ -268,6 +250,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
|
||||
/* Called with IRQs disabled. */
|
||||
__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
|
||||
{
|
||||
struct thread_info *ti = pt_regs_to_thread_info(regs);
|
||||
u32 cached_flags;
|
||||
|
||||
if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
|
||||
@ -275,12 +258,22 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
|
||||
|
||||
lockdep_sys_exit();
|
||||
|
||||
cached_flags =
|
||||
READ_ONCE(pt_regs_to_thread_info(regs)->flags);
|
||||
cached_flags = READ_ONCE(ti->flags);
|
||||
|
||||
if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
|
||||
exit_to_usermode_loop(regs, cached_flags);
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
/*
|
||||
* Compat syscalls set TS_COMPAT. Make sure we clear it before
|
||||
* returning to user mode. We need to clear it *after* signal
|
||||
* handling, because syscall restart has a fixup for compat
|
||||
* syscalls. The fixup is exercised by the ptrace_syscall_32
|
||||
* selftest.
|
||||
*/
|
||||
ti->status &= ~TS_COMPAT;
|
||||
#endif
|
||||
|
||||
user_enter();
|
||||
}
|
||||
|
||||
@ -332,33 +325,45 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
|
||||
if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS))
|
||||
syscall_slow_exit_work(regs, cached_flags);
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
/*
|
||||
* Compat syscalls set TS_COMPAT. Make sure we clear it before
|
||||
* returning to user mode.
|
||||
*/
|
||||
ti->status &= ~TS_COMPAT;
|
||||
#endif
|
||||
|
||||
local_irq_disable();
|
||||
prepare_exit_to_usermode(regs);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
__visible void do_syscall_64(struct pt_regs *regs)
|
||||
{
|
||||
struct thread_info *ti = pt_regs_to_thread_info(regs);
|
||||
unsigned long nr = regs->orig_ax;
|
||||
|
||||
enter_from_user_mode();
|
||||
local_irq_enable();
|
||||
|
||||
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
|
||||
nr = syscall_trace_enter(regs);
|
||||
|
||||
/*
|
||||
* NB: Native and x32 syscalls are dispatched from the same
|
||||
* table. The only functional difference is the x32 bit in
|
||||
* regs->orig_ax, which changes the behavior of some syscalls.
|
||||
*/
|
||||
if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) {
|
||||
regs->ax = sys_call_table[nr & __SYSCALL_MASK](
|
||||
regs->di, regs->si, regs->dx,
|
||||
regs->r10, regs->r8, regs->r9);
|
||||
}
|
||||
|
||||
syscall_return_slowpath(regs);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
|
||||
/*
|
||||
* Does a 32-bit syscall. Called with IRQs on and does all entry and
|
||||
* exit work and returns with IRQs off. This function is extremely hot
|
||||
* in workloads that use it, and it's usually called from
|
||||
* Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does
|
||||
* all entry and exit work and returns with IRQs off. This function is
|
||||
* extremely hot in workloads that use it, and it's usually called from
|
||||
* do_fast_syscall_32, so forcibly inline it to improve performance.
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
/* 32-bit kernels use a trap gate for INT80, and the asm code calls here. */
|
||||
__visible
|
||||
#else
|
||||
/* 64-bit kernels use do_syscall_32_irqs_off() instead. */
|
||||
static
|
||||
#endif
|
||||
__always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
|
||||
static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
|
||||
{
|
||||
struct thread_info *ti = pt_regs_to_thread_info(regs);
|
||||
unsigned int nr = (unsigned int)regs->orig_ax;
|
||||
@ -393,14 +398,13 @@ __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
|
||||
syscall_return_slowpath(regs);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* Handles INT80 on 64-bit kernels */
|
||||
__visible void do_syscall_32_irqs_off(struct pt_regs *regs)
|
||||
/* Handles int $0x80 */
|
||||
__visible void do_int80_syscall_32(struct pt_regs *regs)
|
||||
{
|
||||
enter_from_user_mode();
|
||||
local_irq_enable();
|
||||
do_syscall_32_irqs_on(regs);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
|
||||
__visible long do_fast_syscall_32(struct pt_regs *regs)
|
||||
@ -420,12 +424,11 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
|
||||
*/
|
||||
regs->ip = landing_pad;
|
||||
|
||||
/*
|
||||
* Fetch EBP from where the vDSO stashed it.
|
||||
*
|
||||
* WARNING: We are in CONTEXT_USER and RCU isn't paying attention!
|
||||
*/
|
||||
enter_from_user_mode();
|
||||
|
||||
local_irq_enable();
|
||||
|
||||
/* Fetch EBP from where the vDSO stashed it. */
|
||||
if (
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
@ -443,9 +446,6 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
|
||||
/* User code screwed up. */
|
||||
local_irq_disable();
|
||||
regs->ax = -EFAULT;
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
enter_from_user_mode();
|
||||
#endif
|
||||
prepare_exit_to_usermode(regs);
|
||||
return 0; /* Keep it simple: use IRET. */
|
||||
}
|
||||
|
@ -40,7 +40,7 @@
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/ftrace.h>
|
||||
#include <asm/irq_vectors.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
#include <asm/asm.h>
|
||||
#include <asm/smap.h>
|
||||
@ -287,20 +287,93 @@ need_resched:
|
||||
END(resume_kernel)
|
||||
#endif
|
||||
|
||||
# SYSENTER call handler stub
|
||||
GLOBAL(__begin_SYSENTER_singlestep_region)
|
||||
/*
|
||||
* All code from here through __end_SYSENTER_singlestep_region is subject
|
||||
* to being single-stepped if a user program sets TF and executes SYSENTER.
|
||||
* There is absolutely nothing that we can do to prevent this from happening
|
||||
* (thanks Intel!). To keep our handling of this situation as simple as
|
||||
* possible, we handle TF just like AC and NT, except that our #DB handler
|
||||
* will ignore all of the single-step traps generated in this range.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
/*
|
||||
* Xen doesn't set %esp to be precisely what the normal SYSENTER
|
||||
* entry point expects, so fix it up before using the normal path.
|
||||
*/
|
||||
ENTRY(xen_sysenter_target)
|
||||
addl $5*4, %esp /* remove xen-provided frame */
|
||||
jmp sysenter_past_esp
|
||||
#endif
|
||||
|
||||
/*
|
||||
* 32-bit SYSENTER entry.
|
||||
*
|
||||
* 32-bit system calls through the vDSO's __kernel_vsyscall enter here
|
||||
* if X86_FEATURE_SEP is available. This is the preferred system call
|
||||
* entry on 32-bit systems.
|
||||
*
|
||||
* The SYSENTER instruction, in principle, should *only* occur in the
|
||||
* vDSO. In practice, a small number of Android devices were shipped
|
||||
* with a copy of Bionic that inlined a SYSENTER instruction. This
|
||||
* never happened in any of Google's Bionic versions -- it only happened
|
||||
* in a narrow range of Intel-provided versions.
|
||||
*
|
||||
* SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs.
|
||||
* IF and VM in RFLAGS are cleared (IOW: interrupts are off).
|
||||
* SYSENTER does not save anything on the stack,
|
||||
* and does not save old EIP (!!!), ESP, or EFLAGS.
|
||||
*
|
||||
* To avoid losing track of EFLAGS.VM (and thus potentially corrupting
|
||||
* user and/or vm86 state), we explicitly disable the SYSENTER
|
||||
* instruction in vm86 mode by reprogramming the MSRs.
|
||||
*
|
||||
* Arguments:
|
||||
* eax system call number
|
||||
* ebx arg1
|
||||
* ecx arg2
|
||||
* edx arg3
|
||||
* esi arg4
|
||||
* edi arg5
|
||||
* ebp user stack
|
||||
* 0(%ebp) arg6
|
||||
*/
|
||||
ENTRY(entry_SYSENTER_32)
|
||||
movl TSS_sysenter_sp0(%esp), %esp
|
||||
sysenter_past_esp:
|
||||
pushl $__USER_DS /* pt_regs->ss */
|
||||
pushl %ebp /* pt_regs->sp (stashed in bp) */
|
||||
pushfl /* pt_regs->flags (except IF = 0) */
|
||||
ASM_CLAC /* Clear AC after saving FLAGS */
|
||||
orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
|
||||
pushl $__USER_CS /* pt_regs->cs */
|
||||
pushl $0 /* pt_regs->ip = 0 (placeholder) */
|
||||
pushl %eax /* pt_regs->orig_ax */
|
||||
SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
|
||||
|
||||
/*
|
||||
* SYSENTER doesn't filter flags, so we need to clear NT, AC
|
||||
* and TF ourselves. To save a few cycles, we can check whether
|
||||
* either was set instead of doing an unconditional popfq.
|
||||
* This needs to happen before enabling interrupts so that
|
||||
* we don't get preempted with NT set.
|
||||
*
|
||||
* If TF is set, we will single-step all the way to here -- do_debug
|
||||
* will ignore all the traps. (Yes, this is slow, but so is
|
||||
* single-stepping in general. This allows us to avoid having
|
||||
* a more complicated code to handle the case where a user program
|
||||
* forces us to single-step through the SYSENTER entry code.)
|
||||
*
|
||||
* NB.: .Lsysenter_fix_flags is a label with the code under it moved
|
||||
* out-of-line as an optimization: NT is unlikely to be set in the
|
||||
* majority of the cases and instead of polluting the I$ unnecessarily,
|
||||
* we're keeping that code behind a branch which will predict as
|
||||
* not-taken and therefore its instructions won't be fetched.
|
||||
*/
|
||||
testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp)
|
||||
jnz .Lsysenter_fix_flags
|
||||
.Lsysenter_flags_fixed:
|
||||
|
||||
/*
|
||||
* User mode is traced as though IRQs are on, and SYSENTER
|
||||
* turned them off.
|
||||
@ -326,6 +399,15 @@ sysenter_past_esp:
|
||||
popl %ebp /* pt_regs->bp */
|
||||
popl %eax /* pt_regs->ax */
|
||||
|
||||
/*
|
||||
* Restore all flags except IF. (We restore IF separately because
|
||||
* STI gives a one-instruction window in which we won't be interrupted,
|
||||
* whereas POPF does not.)
|
||||
*/
|
||||
addl $PT_EFLAGS-PT_DS, %esp /* point esp at pt_regs->flags */
|
||||
btr $X86_EFLAGS_IF_BIT, (%esp)
|
||||
popfl
|
||||
|
||||
/*
|
||||
* Return back to the vDSO, which will pop ecx and edx.
|
||||
* Don't bother with DS and ES (they already contain __USER_DS).
|
||||
@ -339,28 +421,63 @@ sysenter_past_esp:
|
||||
.popsection
|
||||
_ASM_EXTABLE(1b, 2b)
|
||||
PTGS_TO_GS_EX
|
||||
|
||||
.Lsysenter_fix_flags:
|
||||
pushl $X86_EFLAGS_FIXED
|
||||
popfl
|
||||
jmp .Lsysenter_flags_fixed
|
||||
GLOBAL(__end_SYSENTER_singlestep_region)
|
||||
ENDPROC(entry_SYSENTER_32)
|
||||
|
||||
# system call handler stub
|
||||
/*
|
||||
* 32-bit legacy system call entry.
|
||||
*
|
||||
* 32-bit x86 Linux system calls traditionally used the INT $0x80
|
||||
* instruction. INT $0x80 lands here.
|
||||
*
|
||||
* This entry point can be used by any 32-bit perform system calls.
|
||||
* Instances of INT $0x80 can be found inline in various programs and
|
||||
* libraries. It is also used by the vDSO's __kernel_vsyscall
|
||||
* fallback for hardware that doesn't support a faster entry method.
|
||||
* Restarted 32-bit system calls also fall back to INT $0x80
|
||||
* regardless of what instruction was originally used to do the system
|
||||
* call. (64-bit programs can use INT $0x80 as well, but they can
|
||||
* only run on 64-bit kernels and therefore land in
|
||||
* entry_INT80_compat.)
|
||||
*
|
||||
* This is considered a slow path. It is not used by most libc
|
||||
* implementations on modern hardware except during process startup.
|
||||
*
|
||||
* Arguments:
|
||||
* eax system call number
|
||||
* ebx arg1
|
||||
* ecx arg2
|
||||
* edx arg3
|
||||
* esi arg4
|
||||
* edi arg5
|
||||
* ebp arg6
|
||||
*/
|
||||
ENTRY(entry_INT80_32)
|
||||
ASM_CLAC
|
||||
pushl %eax /* pt_regs->orig_ax */
|
||||
SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
|
||||
|
||||
/*
|
||||
* User mode is traced as though IRQs are on. Unlike the 64-bit
|
||||
* case, INT80 is a trap gate on 32-bit kernels, so interrupts
|
||||
* are already on (unless user code is messing around with iopl).
|
||||
* User mode is traced as though IRQs are on, and the interrupt gate
|
||||
* turned them off.
|
||||
*/
|
||||
TRACE_IRQS_OFF
|
||||
|
||||
movl %esp, %eax
|
||||
call do_syscall_32_irqs_on
|
||||
call do_int80_syscall_32
|
||||
.Lsyscall_32_done:
|
||||
|
||||
restore_all:
|
||||
TRACE_IRQS_IRET
|
||||
restore_all_notrace:
|
||||
#ifdef CONFIG_X86_ESPFIX32
|
||||
ALTERNATIVE "jmp restore_nocheck", "", X86_BUG_ESPFIX
|
||||
|
||||
movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
|
||||
/*
|
||||
* Warning: PT_OLDSS(%esp) contains the wrong/random values if we
|
||||
@ -387,19 +504,6 @@ ENTRY(iret_exc )
|
||||
|
||||
#ifdef CONFIG_X86_ESPFIX32
|
||||
ldt_ss:
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
/*
|
||||
* The kernel can't run on a non-flat stack if paravirt mode
|
||||
* is active. Rather than try to fixup the high bits of
|
||||
* ESP, bypass this code entirely. This may break DOSemu
|
||||
* and/or Wine support in a paravirt VM, although the option
|
||||
* is still available to implement the setting of the high
|
||||
* 16-bits in the INTERRUPT_RETURN paravirt-op.
|
||||
*/
|
||||
cmpl $0, pv_info+PARAVIRT_enabled
|
||||
jne restore_nocheck
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Setup and switch to ESPFIX stack
|
||||
*
|
||||
@ -632,14 +736,6 @@ ENTRY(spurious_interrupt_bug)
|
||||
END(spurious_interrupt_bug)
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
/*
|
||||
* Xen doesn't set %esp to be precisely what the normal SYSENTER
|
||||
* entry point expects, so fix it up before using the normal path.
|
||||
*/
|
||||
ENTRY(xen_sysenter_target)
|
||||
addl $5*4, %esp /* remove xen-provided frame */
|
||||
jmp sysenter_past_esp
|
||||
|
||||
ENTRY(xen_hypervisor_callback)
|
||||
pushl $-1 /* orig_ax = -1 => not a system call */
|
||||
SAVE_ALL
|
||||
@ -939,51 +1035,48 @@ error_code:
|
||||
jmp ret_from_exception
|
||||
END(page_fault)
|
||||
|
||||
/*
|
||||
* Debug traps and NMI can happen at the one SYSENTER instruction
|
||||
* that sets up the real kernel stack. Check here, since we can't
|
||||
* allow the wrong stack to be used.
|
||||
*
|
||||
* "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
|
||||
* already pushed 3 words if it hits on the sysenter instruction:
|
||||
* eflags, cs and eip.
|
||||
*
|
||||
* We just load the right stack, and push the three (known) values
|
||||
* by hand onto the new stack - while updating the return eip past
|
||||
* the instruction that would have done it for sysenter.
|
||||
*/
|
||||
.macro FIX_STACK offset ok label
|
||||
cmpw $__KERNEL_CS, 4(%esp)
|
||||
jne \ok
|
||||
\label:
|
||||
movl TSS_sysenter_sp0 + \offset(%esp), %esp
|
||||
pushfl
|
||||
pushl $__KERNEL_CS
|
||||
pushl $sysenter_past_esp
|
||||
.endm
|
||||
|
||||
ENTRY(debug)
|
||||
/*
|
||||
* #DB can happen at the first instruction of
|
||||
* entry_SYSENTER_32 or in Xen's SYSENTER prologue. If this
|
||||
* happens, then we will be running on a very small stack. We
|
||||
* need to detect this condition and switch to the thread
|
||||
* stack before calling any C code at all.
|
||||
*
|
||||
* If you edit this code, keep in mind that NMIs can happen in here.
|
||||
*/
|
||||
ASM_CLAC
|
||||
cmpl $entry_SYSENTER_32, (%esp)
|
||||
jne debug_stack_correct
|
||||
FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
|
||||
debug_stack_correct:
|
||||
pushl $-1 # mark this as an int
|
||||
SAVE_ALL
|
||||
TRACE_IRQS_OFF
|
||||
xorl %edx, %edx # error code 0
|
||||
movl %esp, %eax # pt_regs pointer
|
||||
|
||||
/* Are we currently on the SYSENTER stack? */
|
||||
PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
|
||||
subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
|
||||
cmpl $SIZEOF_SYSENTER_stack, %ecx
|
||||
jb .Ldebug_from_sysenter_stack
|
||||
|
||||
TRACE_IRQS_OFF
|
||||
call do_debug
|
||||
jmp ret_from_exception
|
||||
|
||||
.Ldebug_from_sysenter_stack:
|
||||
/* We're on the SYSENTER stack. Switch off. */
|
||||
movl %esp, %ebp
|
||||
movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
|
||||
TRACE_IRQS_OFF
|
||||
call do_debug
|
||||
movl %ebp, %esp
|
||||
jmp ret_from_exception
|
||||
END(debug)
|
||||
|
||||
/*
|
||||
* NMI is doubly nasty. It can happen _while_ we're handling
|
||||
* a debug fault, and the debug fault hasn't yet been able to
|
||||
* clear up the stack. So we first check whether we got an
|
||||
* NMI on the sysenter entry path, but after that we need to
|
||||
* check whether we got an NMI on the debug path where the debug
|
||||
* fault happened on the sysenter path.
|
||||
* NMI is doubly nasty. It can happen on the first instruction of
|
||||
* entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning
|
||||
* of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32
|
||||
* switched stacks. We handle both conditions by simply checking whether we
|
||||
* interrupted kernel code running on the SYSENTER stack.
|
||||
*/
|
||||
ENTRY(nmi)
|
||||
ASM_CLAC
|
||||
@ -994,41 +1087,32 @@ ENTRY(nmi)
|
||||
popl %eax
|
||||
je nmi_espfix_stack
|
||||
#endif
|
||||
cmpl $entry_SYSENTER_32, (%esp)
|
||||
je nmi_stack_fixup
|
||||
pushl %eax
|
||||
movl %esp, %eax
|
||||
/*
|
||||
* Do not access memory above the end of our stack page,
|
||||
* it might not exist.
|
||||
*/
|
||||
andl $(THREAD_SIZE-1), %eax
|
||||
cmpl $(THREAD_SIZE-20), %eax
|
||||
popl %eax
|
||||
jae nmi_stack_correct
|
||||
cmpl $entry_SYSENTER_32, 12(%esp)
|
||||
je nmi_debug_stack_check
|
||||
nmi_stack_correct:
|
||||
pushl %eax
|
||||
|
||||
pushl %eax # pt_regs->orig_ax
|
||||
SAVE_ALL
|
||||
xorl %edx, %edx # zero error code
|
||||
movl %esp, %eax # pt_regs pointer
|
||||
|
||||
/* Are we currently on the SYSENTER stack? */
|
||||
PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
|
||||
subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
|
||||
cmpl $SIZEOF_SYSENTER_stack, %ecx
|
||||
jb .Lnmi_from_sysenter_stack
|
||||
|
||||
/* Not on SYSENTER stack. */
|
||||
call do_nmi
|
||||
jmp restore_all_notrace
|
||||
|
||||
nmi_stack_fixup:
|
||||
FIX_STACK 12, nmi_stack_correct, 1
|
||||
jmp nmi_stack_correct
|
||||
|
||||
nmi_debug_stack_check:
|
||||
cmpw $__KERNEL_CS, 16(%esp)
|
||||
jne nmi_stack_correct
|
||||
cmpl $debug, (%esp)
|
||||
jb nmi_stack_correct
|
||||
cmpl $debug_esp_fix_insn, (%esp)
|
||||
ja nmi_stack_correct
|
||||
FIX_STACK 24, nmi_stack_correct, 1
|
||||
jmp nmi_stack_correct
|
||||
.Lnmi_from_sysenter_stack:
|
||||
/*
|
||||
* We're on the SYSENTER stack. Switch off. No one (not even debug)
|
||||
* is using the thread stack right now, so it's safe for us to use it.
|
||||
*/
|
||||
movl %esp, %ebp
|
||||
movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
|
||||
call do_nmi
|
||||
movl %ebp, %esp
|
||||
jmp restore_all_notrace
|
||||
|
||||
#ifdef CONFIG_X86_ESPFIX32
|
||||
nmi_espfix_stack:
|
||||
|
@ -103,6 +103,16 @@ ENDPROC(native_usergs_sysret64)
|
||||
/*
|
||||
* 64-bit SYSCALL instruction entry. Up to 6 arguments in registers.
|
||||
*
|
||||
* This is the only entry point used for 64-bit system calls. The
|
||||
* hardware interface is reasonably well designed and the register to
|
||||
* argument mapping Linux uses fits well with the registers that are
|
||||
* available when SYSCALL is used.
|
||||
*
|
||||
* SYSCALL instructions can be found inlined in libc implementations as
|
||||
* well as some other programs and libraries. There are also a handful
|
||||
* of SYSCALL instructions in the vDSO used, for example, as a
|
||||
* clock_gettimeofday fallback.
|
||||
*
|
||||
* 64-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
|
||||
* then loads new ss, cs, and rip from previously programmed MSRs.
|
||||
* rflags gets masked by a value from another MSR (so CLD and CLAC
|
||||
@ -145,17 +155,11 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
|
||||
movq %rsp, PER_CPU_VAR(rsp_scratch)
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
|
||||
TRACE_IRQS_OFF
|
||||
|
||||
/* Construct struct pt_regs on stack */
|
||||
pushq $__USER_DS /* pt_regs->ss */
|
||||
pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */
|
||||
/*
|
||||
* Re-enable interrupts.
|
||||
* We use 'rsp_scratch' as a scratch space, hence irq-off block above
|
||||
* must execute atomically in the face of possible interrupt-driven
|
||||
* task preemption. We must enable interrupts only after we're done
|
||||
* with using rsp_scratch:
|
||||
*/
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
pushq %r11 /* pt_regs->flags */
|
||||
pushq $__USER_CS /* pt_regs->cs */
|
||||
pushq %rcx /* pt_regs->ip */
|
||||
@ -171,9 +175,21 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
|
||||
pushq %r11 /* pt_regs->r11 */
|
||||
sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
|
||||
|
||||
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jnz tracesys
|
||||
/*
|
||||
* If we need to do entry work or if we guess we'll need to do
|
||||
* exit work, go straight to the slow path.
|
||||
*/
|
||||
testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jnz entry_SYSCALL64_slow_path
|
||||
|
||||
entry_SYSCALL_64_fastpath:
|
||||
/*
|
||||
* Easy case: enable interrupts and issue the syscall. If the syscall
|
||||
* needs pt_regs, we'll call a stub that disables interrupts again
|
||||
* and jumps to the slow path.
|
||||
*/
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
#if __SYSCALL_MASK == ~0
|
||||
cmpq $__NR_syscall_max, %rax
|
||||
#else
|
||||
@ -182,103 +198,56 @@ entry_SYSCALL_64_fastpath:
|
||||
#endif
|
||||
ja 1f /* return -ENOSYS (already in pt_regs->ax) */
|
||||
movq %r10, %rcx
|
||||
|
||||
/*
|
||||
* This call instruction is handled specially in stub_ptregs_64.
|
||||
* It might end up jumping to the slow path. If it jumps, RAX
|
||||
* and all argument registers are clobbered.
|
||||
*/
|
||||
call *sys_call_table(, %rax, 8)
|
||||
.Lentry_SYSCALL_64_after_fastpath_call:
|
||||
|
||||
movq %rax, RAX(%rsp)
|
||||
1:
|
||||
/*
|
||||
* Syscall return path ending with SYSRET (fast path).
|
||||
* Has incompletely filled pt_regs.
|
||||
*/
|
||||
LOCKDEP_SYS_EXIT
|
||||
|
||||
/*
|
||||
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
|
||||
* it is too small to ever cause noticeable irq latency.
|
||||
* If we get here, then we know that pt_regs is clean for SYSRET64.
|
||||
* If we see that no exit work is required (which we are required
|
||||
* to check with IRQs off), then we can go straight to SYSRET64.
|
||||
*/
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
|
||||
/*
|
||||
* We must check ti flags with interrupts (or at least preemption)
|
||||
* off because we must *never* return to userspace without
|
||||
* processing exit work that is enqueued if we're preempted here.
|
||||
* In particular, returning to userspace with any of the one-shot
|
||||
* flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is
|
||||
* very bad.
|
||||
*/
|
||||
TRACE_IRQS_OFF
|
||||
testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */
|
||||
jnz 1f
|
||||
|
||||
RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
LOCKDEP_SYS_EXIT
|
||||
TRACE_IRQS_ON /* user mode is traced as IRQs on */
|
||||
movq RIP(%rsp), %rcx
|
||||
movq EFLAGS(%rsp), %r11
|
||||
RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
movq RSP(%rsp), %rsp
|
||||
/*
|
||||
* 64-bit SYSRET restores rip from rcx,
|
||||
* rflags from r11 (but RF and VM bits are forced to 0),
|
||||
* cs and ss are loaded from MSRs.
|
||||
* Restoration of rflags re-enables interrupts.
|
||||
*
|
||||
* NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
|
||||
* descriptor is not reinitialized. This means that we should
|
||||
* avoid SYSRET with SS == NULL, which could happen if we schedule,
|
||||
* exit the kernel, and re-enter using an interrupt vector. (All
|
||||
* interrupt entries on x86_64 set SS to NULL.) We prevent that
|
||||
* from happening by reloading SS in __switch_to. (Actually
|
||||
* detecting the failure in 64-bit userspace is tricky but can be
|
||||
* done.)
|
||||
*/
|
||||
USERGS_SYSRET64
|
||||
|
||||
GLOBAL(int_ret_from_sys_call_irqs_off)
|
||||
1:
|
||||
/*
|
||||
* The fast path looked good when we started, but something changed
|
||||
* along the way and we need to switch to the slow path. Calling
|
||||
* raise(3) will trigger this, for example. IRQs are off.
|
||||
*/
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
jmp int_ret_from_sys_call
|
||||
|
||||
/* Do syscall entry tracing */
|
||||
tracesys:
|
||||
movq %rsp, %rdi
|
||||
movl $AUDIT_ARCH_X86_64, %esi
|
||||
call syscall_trace_enter_phase1
|
||||
test %rax, %rax
|
||||
jnz tracesys_phase2 /* if needed, run the slow path */
|
||||
RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */
|
||||
movq ORIG_RAX(%rsp), %rax
|
||||
jmp entry_SYSCALL_64_fastpath /* and return to the fast path */
|
||||
|
||||
tracesys_phase2:
|
||||
SAVE_EXTRA_REGS
|
||||
movq %rsp, %rdi
|
||||
movl $AUDIT_ARCH_X86_64, %esi
|
||||
movq %rax, %rdx
|
||||
call syscall_trace_enter_phase2
|
||||
|
||||
/*
|
||||
* Reload registers from stack in case ptrace changed them.
|
||||
* We don't reload %rax because syscall_trace_entry_phase2() returned
|
||||
* the value it wants us to use in the table lookup.
|
||||
*/
|
||||
RESTORE_C_REGS_EXCEPT_RAX
|
||||
RESTORE_EXTRA_REGS
|
||||
#if __SYSCALL_MASK == ~0
|
||||
cmpq $__NR_syscall_max, %rax
|
||||
#else
|
||||
andl $__SYSCALL_MASK, %eax
|
||||
cmpl $__NR_syscall_max, %eax
|
||||
#endif
|
||||
ja 1f /* return -ENOSYS (already in pt_regs->ax) */
|
||||
movq %r10, %rcx /* fixup for C */
|
||||
call *sys_call_table(, %rax, 8)
|
||||
movq %rax, RAX(%rsp)
|
||||
1:
|
||||
/* Use IRET because user could have changed pt_regs->foo */
|
||||
|
||||
/*
|
||||
* Syscall return path ending with IRET.
|
||||
* Has correct iret frame.
|
||||
*/
|
||||
GLOBAL(int_ret_from_sys_call)
|
||||
SAVE_EXTRA_REGS
|
||||
movq %rsp, %rdi
|
||||
call syscall_return_slowpath /* returns with IRQs disabled */
|
||||
jmp return_from_SYSCALL_64
|
||||
|
||||
entry_SYSCALL64_slow_path:
|
||||
/* IRQs are off. */
|
||||
SAVE_EXTRA_REGS
|
||||
movq %rsp, %rdi
|
||||
call do_syscall_64 /* returns with IRQs disabled */
|
||||
|
||||
return_from_SYSCALL_64:
|
||||
RESTORE_EXTRA_REGS
|
||||
TRACE_IRQS_IRETQ /* we're about to change IF */
|
||||
|
||||
@ -355,83 +324,45 @@ opportunistic_sysret_failed:
|
||||
jmp restore_c_regs_and_iret
|
||||
END(entry_SYSCALL_64)
|
||||
|
||||
|
||||
.macro FORK_LIKE func
|
||||
ENTRY(stub_\func)
|
||||
SAVE_EXTRA_REGS 8
|
||||
jmp sys_\func
|
||||
END(stub_\func)
|
||||
.endm
|
||||
|
||||
FORK_LIKE clone
|
||||
FORK_LIKE fork
|
||||
FORK_LIKE vfork
|
||||
|
||||
ENTRY(stub_execve)
|
||||
call sys_execve
|
||||
return_from_execve:
|
||||
testl %eax, %eax
|
||||
jz 1f
|
||||
/* exec failed, can use fast SYSRET code path in this case */
|
||||
ret
|
||||
1:
|
||||
/* must use IRET code path (pt_regs->cs may have changed) */
|
||||
addq $8, %rsp
|
||||
ZERO_EXTRA_REGS
|
||||
movq %rax, RAX(%rsp)
|
||||
jmp int_ret_from_sys_call
|
||||
END(stub_execve)
|
||||
/*
|
||||
* Remaining execve stubs are only 7 bytes long.
|
||||
* ENTRY() often aligns to 16 bytes, which in this case has no benefits.
|
||||
*/
|
||||
.align 8
|
||||
GLOBAL(stub_execveat)
|
||||
call sys_execveat
|
||||
jmp return_from_execve
|
||||
END(stub_execveat)
|
||||
|
||||
#if defined(CONFIG_X86_X32_ABI)
|
||||
.align 8
|
||||
GLOBAL(stub_x32_execve)
|
||||
call compat_sys_execve
|
||||
jmp return_from_execve
|
||||
END(stub_x32_execve)
|
||||
.align 8
|
||||
GLOBAL(stub_x32_execveat)
|
||||
call compat_sys_execveat
|
||||
jmp return_from_execve
|
||||
END(stub_x32_execveat)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* sigreturn is special because it needs to restore all registers on return.
|
||||
* This cannot be done with SYSRET, so use the IRET return path instead.
|
||||
*/
|
||||
ENTRY(stub_rt_sigreturn)
|
||||
ENTRY(stub_ptregs_64)
|
||||
/*
|
||||
* SAVE_EXTRA_REGS result is not normally needed:
|
||||
* sigreturn overwrites all pt_regs->GPREGS.
|
||||
* But sigreturn can fail (!), and there is no easy way to detect that.
|
||||
* To make sure RESTORE_EXTRA_REGS doesn't restore garbage on error,
|
||||
* we SAVE_EXTRA_REGS here.
|
||||
* Syscalls marked as needing ptregs land here.
|
||||
* If we are on the fast path, we need to save the extra regs,
|
||||
* which we achieve by trying again on the slow path. If we are on
|
||||
* the slow path, the extra regs are already saved.
|
||||
*
|
||||
* RAX stores a pointer to the C function implementing the syscall.
|
||||
* IRQs are on.
|
||||
*/
|
||||
SAVE_EXTRA_REGS 8
|
||||
call sys_rt_sigreturn
|
||||
return_from_stub:
|
||||
addq $8, %rsp
|
||||
RESTORE_EXTRA_REGS
|
||||
movq %rax, RAX(%rsp)
|
||||
jmp int_ret_from_sys_call
|
||||
END(stub_rt_sigreturn)
|
||||
cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
|
||||
jne 1f
|
||||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
ENTRY(stub_x32_rt_sigreturn)
|
||||
SAVE_EXTRA_REGS 8
|
||||
call sys32_x32_rt_sigreturn
|
||||
jmp return_from_stub
|
||||
END(stub_x32_rt_sigreturn)
|
||||
#endif
|
||||
/*
|
||||
* Called from fast path -- disable IRQs again, pop return address
|
||||
* and jump to slow path
|
||||
*/
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
TRACE_IRQS_OFF
|
||||
popq %rax
|
||||
jmp entry_SYSCALL64_slow_path
|
||||
|
||||
1:
|
||||
/* Called from C */
|
||||
jmp *%rax /* called from C */
|
||||
END(stub_ptregs_64)
|
||||
|
||||
.macro ptregs_stub func
|
||||
ENTRY(ptregs_\func)
|
||||
leaq \func(%rip), %rax
|
||||
jmp stub_ptregs_64
|
||||
END(ptregs_\func)
|
||||
.endm
|
||||
|
||||
/* Instantiate ptregs_stub for each ptregs-using syscall */
|
||||
#define __SYSCALL_64_QUAL_(sym)
|
||||
#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
|
||||
#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
|
||||
#include <asm/syscalls_64.h>
|
||||
|
||||
/*
|
||||
* A newly forked process directly context switches into this address.
|
||||
@ -439,7 +370,6 @@ END(stub_x32_rt_sigreturn)
|
||||
* rdi: prev task we switched from
|
||||
*/
|
||||
ENTRY(ret_from_fork)
|
||||
|
||||
LOCK ; btr $TIF_FORK, TI_flags(%r8)
|
||||
|
||||
pushq $0x0002
|
||||
@ -447,28 +377,32 @@ ENTRY(ret_from_fork)
|
||||
|
||||
call schedule_tail /* rdi: 'prev' task parameter */
|
||||
|
||||
RESTORE_EXTRA_REGS
|
||||
|
||||
testb $3, CS(%rsp) /* from kernel_thread? */
|
||||
jnz 1f
|
||||
|
||||
/*
|
||||
* By the time we get here, we have no idea whether our pt_regs,
|
||||
* ti flags, and ti status came from the 64-bit SYSCALL fast path,
|
||||
* the slow path, or one of the 32-bit compat paths.
|
||||
* Use IRET code path to return, since it can safely handle
|
||||
* all of the above.
|
||||
* We came from kernel_thread. This code path is quite twisted, and
|
||||
* someone should clean it up.
|
||||
*
|
||||
* copy_thread_tls stashes the function pointer in RBX and the
|
||||
* parameter to be passed in RBP. The called function is permitted
|
||||
* to call do_execve and thereby jump to user mode.
|
||||
*/
|
||||
jnz int_ret_from_sys_call
|
||||
|
||||
/*
|
||||
* We came from kernel_thread
|
||||
* nb: we depend on RESTORE_EXTRA_REGS above
|
||||
*/
|
||||
movq %rbp, %rdi
|
||||
call *%rbx
|
||||
movq RBP(%rsp), %rdi
|
||||
call *RBX(%rsp)
|
||||
movl $0, RAX(%rsp)
|
||||
RESTORE_EXTRA_REGS
|
||||
jmp int_ret_from_sys_call
|
||||
|
||||
/*
|
||||
* Fall through as though we're exiting a syscall. This makes a
|
||||
* twisted sort of sense if we just called do_execve.
|
||||
*/
|
||||
|
||||
1:
|
||||
movq %rsp, %rdi
|
||||
call syscall_return_slowpath /* returns with IRQs disabled */
|
||||
TRACE_IRQS_ON /* user mode is traced as IRQS on */
|
||||
SWAPGS
|
||||
jmp restore_regs_and_iret
|
||||
END(ret_from_fork)
|
||||
|
||||
/*
|
||||
|
@ -19,12 +19,21 @@
|
||||
.section .entry.text, "ax"
|
||||
|
||||
/*
|
||||
* 32-bit SYSENTER instruction entry.
|
||||
* 32-bit SYSENTER entry.
|
||||
*
|
||||
* SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
|
||||
* IF and VM in rflags are cleared (IOW: interrupts are off).
|
||||
* 32-bit system calls through the vDSO's __kernel_vsyscall enter here
|
||||
* on 64-bit kernels running on Intel CPUs.
|
||||
*
|
||||
* The SYSENTER instruction, in principle, should *only* occur in the
|
||||
* vDSO. In practice, a small number of Android devices were shipped
|
||||
* with a copy of Bionic that inlined a SYSENTER instruction. This
|
||||
* never happened in any of Google's Bionic versions -- it only happened
|
||||
* in a narrow range of Intel-provided versions.
|
||||
*
|
||||
* SYSENTER loads SS, RSP, CS, and RIP from previously programmed MSRs.
|
||||
* IF and VM in RFLAGS are cleared (IOW: interrupts are off).
|
||||
* SYSENTER does not save anything on the stack,
|
||||
* and does not save old rip (!!!) and rflags.
|
||||
* and does not save old RIP (!!!), RSP, or RFLAGS.
|
||||
*
|
||||
* Arguments:
|
||||
* eax system call number
|
||||
@ -35,10 +44,6 @@
|
||||
* edi arg5
|
||||
* ebp user stack
|
||||
* 0(%ebp) arg6
|
||||
*
|
||||
* This is purely a fast path. For anything complicated we use the int 0x80
|
||||
* path below. We set up a complete hardware stack frame to share code
|
||||
* with the int 0x80 path.
|
||||
*/
|
||||
ENTRY(entry_SYSENTER_compat)
|
||||
/* Interrupts are off on entry. */
|
||||
@ -66,8 +71,6 @@ ENTRY(entry_SYSENTER_compat)
|
||||
*/
|
||||
pushfq /* pt_regs->flags (except IF = 0) */
|
||||
orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */
|
||||
ASM_CLAC /* Clear AC after saving FLAGS */
|
||||
|
||||
pushq $__USER32_CS /* pt_regs->cs */
|
||||
xorq %r8,%r8
|
||||
pushq %r8 /* pt_regs->ip = 0 (placeholder) */
|
||||
@ -90,19 +93,25 @@ ENTRY(entry_SYSENTER_compat)
|
||||
cld
|
||||
|
||||
/*
|
||||
* Sysenter doesn't filter flags, so we need to clear NT
|
||||
* SYSENTER doesn't filter flags, so we need to clear NT and AC
|
||||
* ourselves. To save a few cycles, we can check whether
|
||||
* NT was set instead of doing an unconditional popfq.
|
||||
* either was set instead of doing an unconditional popfq.
|
||||
* This needs to happen before enabling interrupts so that
|
||||
* we don't get preempted with NT set.
|
||||
*
|
||||
* If TF is set, we will single-step all the way to here -- do_debug
|
||||
* will ignore all the traps. (Yes, this is slow, but so is
|
||||
* single-stepping in general. This allows us to avoid having
|
||||
* a more complicated code to handle the case where a user program
|
||||
* forces us to single-step through the SYSENTER entry code.)
|
||||
*
|
||||
* NB.: .Lsysenter_fix_flags is a label with the code under it moved
|
||||
* out-of-line as an optimization: NT is unlikely to be set in the
|
||||
* majority of the cases and instead of polluting the I$ unnecessarily,
|
||||
* we're keeping that code behind a branch which will predict as
|
||||
* not-taken and therefore its instructions won't be fetched.
|
||||
*/
|
||||
testl $X86_EFLAGS_NT, EFLAGS(%rsp)
|
||||
testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, EFLAGS(%rsp)
|
||||
jnz .Lsysenter_fix_flags
|
||||
.Lsysenter_flags_fixed:
|
||||
|
||||
@ -123,20 +132,42 @@ ENTRY(entry_SYSENTER_compat)
|
||||
pushq $X86_EFLAGS_FIXED
|
||||
popfq
|
||||
jmp .Lsysenter_flags_fixed
|
||||
GLOBAL(__end_entry_SYSENTER_compat)
|
||||
ENDPROC(entry_SYSENTER_compat)
|
||||
|
||||
/*
|
||||
* 32-bit SYSCALL instruction entry.
|
||||
* 32-bit SYSCALL entry.
|
||||
*
|
||||
* 32-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
|
||||
* then loads new ss, cs, and rip from previously programmed MSRs.
|
||||
* rflags gets masked by a value from another MSR (so CLD and CLAC
|
||||
* are not needed). SYSCALL does not save anything on the stack
|
||||
* and does not change rsp.
|
||||
* 32-bit system calls through the vDSO's __kernel_vsyscall enter here
|
||||
* on 64-bit kernels running on AMD CPUs.
|
||||
*
|
||||
* Note: rflags saving+masking-with-MSR happens only in Long mode
|
||||
* The SYSCALL instruction, in principle, should *only* occur in the
|
||||
* vDSO. In practice, it appears that this really is the case.
|
||||
* As evidence:
|
||||
*
|
||||
* - The calling convention for SYSCALL has changed several times without
|
||||
* anyone noticing.
|
||||
*
|
||||
* - Prior to the in-kernel X86_BUG_SYSRET_SS_ATTRS fixup, anything
|
||||
* user task that did SYSCALL without immediately reloading SS
|
||||
* would randomly crash.
|
||||
*
|
||||
* - Most programmers do not directly target AMD CPUs, and the 32-bit
|
||||
* SYSCALL instruction does not exist on Intel CPUs. Even on AMD
|
||||
* CPUs, Linux disables the SYSCALL instruction on 32-bit kernels
|
||||
* because the SYSCALL instruction in legacy/native 32-bit mode (as
|
||||
* opposed to compat mode) is sufficiently poorly designed as to be
|
||||
* essentially unusable.
|
||||
*
|
||||
* 32-bit SYSCALL saves RIP to RCX, clears RFLAGS.RF, then saves
|
||||
* RFLAGS to R11, then loads new SS, CS, and RIP from previously
|
||||
* programmed MSRs. RFLAGS gets masked by a value from another MSR
|
||||
* (so CLD and CLAC are not needed). SYSCALL does not save anything on
|
||||
* the stack and does not change RSP.
|
||||
*
|
||||
* Note: RFLAGS saving+masking-with-MSR happens only in Long mode
|
||||
* (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it).
|
||||
* Don't get confused: rflags saving+masking depends on Long Mode Active bit
|
||||
* Don't get confused: RFLAGS saving+masking depends on Long Mode Active bit
|
||||
* (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
|
||||
* or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
|
||||
*
|
||||
@ -236,7 +267,21 @@ sysret32_from_system_call:
|
||||
END(entry_SYSCALL_compat)
|
||||
|
||||
/*
|
||||
* Emulated IA32 system calls via int 0x80.
|
||||
* 32-bit legacy system call entry.
|
||||
*
|
||||
* 32-bit x86 Linux system calls traditionally used the INT $0x80
|
||||
* instruction. INT $0x80 lands here.
|
||||
*
|
||||
* This entry point can be used by 32-bit and 64-bit programs to perform
|
||||
* 32-bit system calls. Instances of INT $0x80 can be found inline in
|
||||
* various programs and libraries. It is also used by the vDSO's
|
||||
* __kernel_vsyscall fallback for hardware that doesn't support a faster
|
||||
* entry method. Restarted 32-bit system calls also fall back to INT
|
||||
* $0x80 regardless of what instruction was originally used to do the
|
||||
* system call.
|
||||
*
|
||||
* This is considered a slow path. It is not used by most libc
|
||||
* implementations on modern hardware except during process startup.
|
||||
*
|
||||
* Arguments:
|
||||
* eax system call number
|
||||
@ -245,17 +290,8 @@ END(entry_SYSCALL_compat)
|
||||
* edx arg3
|
||||
* esi arg4
|
||||
* edi arg5
|
||||
* ebp arg6 (note: not saved in the stack frame, should not be touched)
|
||||
*
|
||||
* Notes:
|
||||
* Uses the same stack frame as the x86-64 version.
|
||||
* All registers except eax must be saved (but ptrace may violate that).
|
||||
* Arguments are zero extended. For system calls that want sign extension and
|
||||
* take long arguments a wrapper is needed. Most calls can just be called
|
||||
* directly.
|
||||
* Assumes it is only called from user space and entered with interrupts off.
|
||||
* ebp arg6
|
||||
*/
|
||||
|
||||
ENTRY(entry_INT80_compat)
|
||||
/*
|
||||
* Interrupts are off on entry.
|
||||
@ -300,7 +336,7 @@ ENTRY(entry_INT80_compat)
|
||||
TRACE_IRQS_OFF
|
||||
|
||||
movq %rsp, %rdi
|
||||
call do_syscall_32_irqs_off
|
||||
call do_int80_syscall_32
|
||||
.Lsyscall_32_done:
|
||||
|
||||
/* Go back to user mode. */
|
||||
|
@ -6,17 +6,11 @@
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/syscall.h>
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
#define SYM(sym, compat) compat
|
||||
#else
|
||||
#define SYM(sym, compat) sym
|
||||
#endif
|
||||
|
||||
#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long SYM(sym, compat)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
|
||||
#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
|
||||
#include <asm/syscalls_32.h>
|
||||
#undef __SYSCALL_I386
|
||||
|
||||
#define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat),
|
||||
#define __SYSCALL_I386(nr, sym, qual) [nr] = sym,
|
||||
|
||||
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
|
||||
|
||||
|
@ -6,19 +6,14 @@
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/syscall.h>
|
||||
|
||||
#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
|
||||
#define __SYSCALL_64_QUAL_(sym) sym
|
||||
#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
|
||||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
# define __SYSCALL_X32(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
|
||||
#else
|
||||
# define __SYSCALL_X32(nr, sym, compat) /* nothing */
|
||||
#endif
|
||||
|
||||
#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
|
||||
#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
|
||||
#include <asm/syscalls_64.h>
|
||||
#undef __SYSCALL_64
|
||||
|
||||
#define __SYSCALL_64(nr, sym, compat) [nr] = sym,
|
||||
#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),
|
||||
|
||||
extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
|
||||
|
||||
|
@ -21,7 +21,7 @@
|
||||
12 common brk sys_brk
|
||||
13 64 rt_sigaction sys_rt_sigaction
|
||||
14 common rt_sigprocmask sys_rt_sigprocmask
|
||||
15 64 rt_sigreturn stub_rt_sigreturn
|
||||
15 64 rt_sigreturn sys_rt_sigreturn/ptregs
|
||||
16 64 ioctl sys_ioctl
|
||||
17 common pread64 sys_pread64
|
||||
18 common pwrite64 sys_pwrite64
|
||||
@ -62,10 +62,10 @@
|
||||
53 common socketpair sys_socketpair
|
||||
54 64 setsockopt sys_setsockopt
|
||||
55 64 getsockopt sys_getsockopt
|
||||
56 common clone stub_clone
|
||||
57 common fork stub_fork
|
||||
58 common vfork stub_vfork
|
||||
59 64 execve stub_execve
|
||||
56 common clone sys_clone/ptregs
|
||||
57 common fork sys_fork/ptregs
|
||||
58 common vfork sys_vfork/ptregs
|
||||
59 64 execve sys_execve/ptregs
|
||||
60 common exit sys_exit
|
||||
61 common wait4 sys_wait4
|
||||
62 common kill sys_kill
|
||||
@ -178,7 +178,7 @@
|
||||
169 common reboot sys_reboot
|
||||
170 common sethostname sys_sethostname
|
||||
171 common setdomainname sys_setdomainname
|
||||
172 common iopl sys_iopl
|
||||
172 common iopl sys_iopl/ptregs
|
||||
173 common ioperm sys_ioperm
|
||||
174 64 create_module
|
||||
175 common init_module sys_init_module
|
||||
@ -328,7 +328,7 @@
|
||||
319 common memfd_create sys_memfd_create
|
||||
320 common kexec_file_load sys_kexec_file_load
|
||||
321 common bpf sys_bpf
|
||||
322 64 execveat stub_execveat
|
||||
322 64 execveat sys_execveat/ptregs
|
||||
323 common userfaultfd sys_userfaultfd
|
||||
324 common membarrier sys_membarrier
|
||||
325 common mlock2 sys_mlock2
|
||||
@ -339,14 +339,14 @@
|
||||
# for native 64-bit operation.
|
||||
#
|
||||
512 x32 rt_sigaction compat_sys_rt_sigaction
|
||||
513 x32 rt_sigreturn stub_x32_rt_sigreturn
|
||||
513 x32 rt_sigreturn sys32_x32_rt_sigreturn
|
||||
514 x32 ioctl compat_sys_ioctl
|
||||
515 x32 readv compat_sys_readv
|
||||
516 x32 writev compat_sys_writev
|
||||
517 x32 recvfrom compat_sys_recvfrom
|
||||
518 x32 sendmsg compat_sys_sendmsg
|
||||
519 x32 recvmsg compat_sys_recvmsg
|
||||
520 x32 execve stub_x32_execve
|
||||
520 x32 execve compat_sys_execve/ptregs
|
||||
521 x32 ptrace compat_sys_ptrace
|
||||
522 x32 rt_sigpending compat_sys_rt_sigpending
|
||||
523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait
|
||||
@ -371,4 +371,4 @@
|
||||
542 x32 getsockopt compat_sys_getsockopt
|
||||
543 x32 io_setup compat_sys_io_setup
|
||||
544 x32 io_submit compat_sys_io_submit
|
||||
545 x32 execveat stub_x32_execveat
|
||||
545 x32 execveat compat_sys_execveat/ptregs
|
||||
|
@ -3,13 +3,63 @@
|
||||
in="$1"
|
||||
out="$2"
|
||||
|
||||
syscall_macro() {
|
||||
abi="$1"
|
||||
nr="$2"
|
||||
entry="$3"
|
||||
|
||||
# Entry can be either just a function name or "function/qualifier"
|
||||
real_entry="${entry%%/*}"
|
||||
qualifier="${entry:${#real_entry}}" # Strip the function name
|
||||
qualifier="${qualifier:1}" # Strip the slash, if any
|
||||
|
||||
echo "__SYSCALL_${abi}($nr, $real_entry, $qualifier)"
|
||||
}
|
||||
|
||||
emit() {
|
||||
abi="$1"
|
||||
nr="$2"
|
||||
entry="$3"
|
||||
compat="$4"
|
||||
|
||||
if [ "$abi" == "64" -a -n "$compat" ]; then
|
||||
echo "a compat entry for a 64-bit syscall makes no sense" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$compat" ]; then
|
||||
if [ -n "$entry" ]; then
|
||||
syscall_macro "$abi" "$nr" "$entry"
|
||||
fi
|
||||
else
|
||||
echo "#ifdef CONFIG_X86_32"
|
||||
if [ -n "$entry" ]; then
|
||||
syscall_macro "$abi" "$nr" "$entry"
|
||||
fi
|
||||
echo "#else"
|
||||
syscall_macro "$abi" "$nr" "$compat"
|
||||
echo "#endif"
|
||||
fi
|
||||
}
|
||||
|
||||
grep '^[0-9]' "$in" | sort -n | (
|
||||
while read nr abi name entry compat; do
|
||||
abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
|
||||
if [ -n "$compat" ]; then
|
||||
echo "__SYSCALL_${abi}($nr, $entry, $compat)"
|
||||
elif [ -n "$entry" ]; then
|
||||
echo "__SYSCALL_${abi}($nr, $entry, $entry)"
|
||||
if [ "$abi" == "COMMON" -o "$abi" == "64" ]; then
|
||||
# COMMON is the same as 64, except that we don't expect X32
|
||||
# programs to use it. Our expectation has nothing to do with
|
||||
# any generated code, so treat them the same.
|
||||
emit 64 "$nr" "$entry" "$compat"
|
||||
elif [ "$abi" == "X32" ]; then
|
||||
# X32 is equivalent to 64 on an X32-compatible kernel.
|
||||
echo "#ifdef CONFIG_X86_X32_ABI"
|
||||
emit 64 "$nr" "$entry" "$compat"
|
||||
echo "#endif"
|
||||
elif [ "$abi" == "I386" ]; then
|
||||
emit "$abi" "$nr" "$entry" "$compat"
|
||||
else
|
||||
echo "Unknown abi $abi" >&2
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
) > "$out"
|
||||
|
@ -150,16 +150,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
|
||||
}
|
||||
fprintf(outfile, "\n};\n\n");
|
||||
|
||||
fprintf(outfile, "static struct page *pages[%lu];\n\n",
|
||||
mapping_size / 4096);
|
||||
|
||||
fprintf(outfile, "const struct vdso_image %s = {\n", name);
|
||||
fprintf(outfile, "\t.data = raw_data,\n");
|
||||
fprintf(outfile, "\t.size = %lu,\n", mapping_size);
|
||||
fprintf(outfile, "\t.text_mapping = {\n");
|
||||
fprintf(outfile, "\t\t.name = \"[vdso]\",\n");
|
||||
fprintf(outfile, "\t\t.pages = pages,\n");
|
||||
fprintf(outfile, "\t},\n");
|
||||
if (alt_sec) {
|
||||
fprintf(outfile, "\t.alt = %lu,\n",
|
||||
(unsigned long)GET_LE(&alt_sec->sh_offset));
|
||||
|
@ -11,7 +11,6 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm_types.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/vdso.h>
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
*/
|
||||
|
||||
#include <asm/dwarf2.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
|
||||
/*
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include <asm/page.h>
|
||||
#include <asm/hpet.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
#if defined(CONFIG_X86_64)
|
||||
unsigned int __read_mostly vdso64_enabled = 1;
|
||||
@ -27,13 +28,7 @@ unsigned int __read_mostly vdso64_enabled = 1;
|
||||
|
||||
void __init init_vdso_image(const struct vdso_image *image)
|
||||
{
|
||||
int i;
|
||||
int npages = (image->size) / PAGE_SIZE;
|
||||
|
||||
BUG_ON(image->size % PAGE_SIZE != 0);
|
||||
for (i = 0; i < npages; i++)
|
||||
image->text_mapping.pages[i] =
|
||||
virt_to_page(image->data + i*PAGE_SIZE);
|
||||
|
||||
apply_alternatives((struct alt_instr *)(image->data + image->alt),
|
||||
(struct alt_instr *)(image->data + image->alt +
|
||||
@ -90,18 +85,87 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
|
||||
#endif
|
||||
}
|
||||
|
||||
static int vdso_fault(const struct vm_special_mapping *sm,
|
||||
struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
const struct vdso_image *image = vma->vm_mm->context.vdso_image;
|
||||
|
||||
if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size)
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT));
|
||||
get_page(vmf->page);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct vm_special_mapping text_mapping = {
|
||||
.name = "[vdso]",
|
||||
.fault = vdso_fault,
|
||||
};
|
||||
|
||||
static int vvar_fault(const struct vm_special_mapping *sm,
|
||||
struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
const struct vdso_image *image = vma->vm_mm->context.vdso_image;
|
||||
long sym_offset;
|
||||
int ret = -EFAULT;
|
||||
|
||||
if (!image)
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) +
|
||||
image->sym_vvar_start;
|
||||
|
||||
/*
|
||||
* Sanity check: a symbol offset of zero means that the page
|
||||
* does not exist for this vdso image, not that the page is at
|
||||
* offset zero relative to the text mapping. This should be
|
||||
* impossible here, because sym_offset should only be zero for
|
||||
* the page past the end of the vvar mapping.
|
||||
*/
|
||||
if (sym_offset == 0)
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
if (sym_offset == image->sym_vvar_page) {
|
||||
ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
|
||||
__pa_symbol(&__vvar_page) >> PAGE_SHIFT);
|
||||
} else if (sym_offset == image->sym_hpet_page) {
|
||||
#ifdef CONFIG_HPET_TIMER
|
||||
if (hpet_address && vclock_was_used(VCLOCK_HPET)) {
|
||||
ret = vm_insert_pfn_prot(
|
||||
vma,
|
||||
(unsigned long)vmf->virtual_address,
|
||||
hpet_address >> PAGE_SHIFT,
|
||||
pgprot_noncached(PAGE_READONLY));
|
||||
}
|
||||
#endif
|
||||
} else if (sym_offset == image->sym_pvclock_page) {
|
||||
struct pvclock_vsyscall_time_info *pvti =
|
||||
pvclock_pvti_cpu0_va();
|
||||
if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) {
|
||||
ret = vm_insert_pfn(
|
||||
vma,
|
||||
(unsigned long)vmf->virtual_address,
|
||||
__pa(pvti) >> PAGE_SHIFT);
|
||||
}
|
||||
}
|
||||
|
||||
if (ret == 0 || ret == -EBUSY)
|
||||
return VM_FAULT_NOPAGE;
|
||||
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
static int map_vdso(const struct vdso_image *image, bool calculate_addr)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long addr, text_start;
|
||||
int ret = 0;
|
||||
static struct page *no_pages[] = {NULL};
|
||||
static struct vm_special_mapping vvar_mapping = {
|
||||
static const struct vm_special_mapping vvar_mapping = {
|
||||
.name = "[vvar]",
|
||||
.pages = no_pages,
|
||||
.fault = vvar_fault,
|
||||
};
|
||||
struct pvclock_vsyscall_time_info *pvti;
|
||||
|
||||
if (calculate_addr) {
|
||||
addr = vdso_addr(current->mm->start_stack,
|
||||
@ -121,6 +185,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
|
||||
|
||||
text_start = addr - image->sym_vvar_start;
|
||||
current->mm->context.vdso = (void __user *)text_start;
|
||||
current->mm->context.vdso_image = image;
|
||||
|
||||
/*
|
||||
* MAYWRITE to allow gdb to COW and set breakpoints
|
||||
@ -130,7 +195,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
|
||||
image->size,
|
||||
VM_READ|VM_EXEC|
|
||||
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
|
||||
&image->text_mapping);
|
||||
&text_mapping);
|
||||
|
||||
if (IS_ERR(vma)) {
|
||||
ret = PTR_ERR(vma);
|
||||
@ -140,7 +205,8 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
|
||||
vma = _install_special_mapping(mm,
|
||||
addr,
|
||||
-image->sym_vvar_start,
|
||||
VM_READ|VM_MAYREAD,
|
||||
VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
|
||||
VM_PFNMAP,
|
||||
&vvar_mapping);
|
||||
|
||||
if (IS_ERR(vma)) {
|
||||
@ -148,41 +214,6 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
|
||||
goto up_fail;
|
||||
}
|
||||
|
||||
if (image->sym_vvar_page)
|
||||
ret = remap_pfn_range(vma,
|
||||
text_start + image->sym_vvar_page,
|
||||
__pa_symbol(&__vvar_page) >> PAGE_SHIFT,
|
||||
PAGE_SIZE,
|
||||
PAGE_READONLY);
|
||||
|
||||
if (ret)
|
||||
goto up_fail;
|
||||
|
||||
#ifdef CONFIG_HPET_TIMER
|
||||
if (hpet_address && image->sym_hpet_page) {
|
||||
ret = io_remap_pfn_range(vma,
|
||||
text_start + image->sym_hpet_page,
|
||||
hpet_address >> PAGE_SHIFT,
|
||||
PAGE_SIZE,
|
||||
pgprot_noncached(PAGE_READONLY));
|
||||
|
||||
if (ret)
|
||||
goto up_fail;
|
||||
}
|
||||
#endif
|
||||
|
||||
pvti = pvclock_pvti_cpu0_va();
|
||||
if (pvti && image->sym_pvclock_page) {
|
||||
ret = remap_pfn_range(vma,
|
||||
text_start + image->sym_pvclock_page,
|
||||
__pa(pvti) >> PAGE_SHIFT,
|
||||
PAGE_SIZE,
|
||||
PAGE_READONLY);
|
||||
|
||||
if (ret)
|
||||
goto up_fail;
|
||||
}
|
||||
|
||||
up_fail:
|
||||
if (ret)
|
||||
current->mm->context.vdso = NULL;
|
||||
@ -254,7 +285,7 @@ static void vgetcpu_cpu_init(void *arg)
|
||||
#ifdef CONFIG_NUMA
|
||||
node = cpu_to_node(cpu);
|
||||
#endif
|
||||
if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
|
||||
if (static_cpu_has(X86_FEATURE_RDTSCP))
|
||||
write_rdtscp_aux((node << 12) | cpu);
|
||||
|
||||
/*
|
||||
|
@ -16,6 +16,8 @@
|
||||
#include <asm/vgtod.h>
|
||||
#include <asm/vvar.h>
|
||||
|
||||
int vclocks_used __read_mostly;
|
||||
|
||||
DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
|
||||
|
||||
void update_vsyscall_tz(void)
|
||||
@ -26,12 +28,17 @@ void update_vsyscall_tz(void)
|
||||
|
||||
void update_vsyscall(struct timekeeper *tk)
|
||||
{
|
||||
int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
|
||||
struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
|
||||
|
||||
/* Mark the new vclock used. */
|
||||
BUILD_BUG_ON(VCLOCK_MAX >= 32);
|
||||
WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode));
|
||||
|
||||
gtod_write_begin(vdata);
|
||||
|
||||
/* copy vsyscall data */
|
||||
vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
|
||||
vdata->vclock_mode = vclock_mode;
|
||||
vdata->cycle_last = tk->tkr_mono.cycle_last;
|
||||
vdata->mask = tk->tkr_mono.mask;
|
||||
vdata->mult = tk->tkr_mono.mult;
|
||||
|
@ -151,12 +151,6 @@ static inline int alternatives_text_reserved(void *start, void *end)
|
||||
ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
|
||||
".popsection"
|
||||
|
||||
/*
|
||||
* This must be included *after* the definition of ALTERNATIVE due to
|
||||
* <asm/arch_hweight.h>
|
||||
*/
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
/*
|
||||
* Alternative instructions for different CPU types or capabilities.
|
||||
*
|
||||
|
@ -6,7 +6,6 @@
|
||||
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/apicdef.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <asm/fixmap.h>
|
||||
|
@ -1,6 +1,8 @@
|
||||
#ifndef _ASM_X86_HWEIGHT_H
|
||||
#define _ASM_X86_HWEIGHT_H
|
||||
|
||||
#include <asm/cpufeatures.h>
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
/* popcnt %edi, %eax -- redundant REX prefix for alignment */
|
||||
#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7"
|
||||
|
@ -91,7 +91,7 @@ set_bit(long nr, volatile unsigned long *addr)
|
||||
* If it's called on the same region of memory simultaneously, the effect
|
||||
* may be that only one operation succeeds.
|
||||
*/
|
||||
static inline void __set_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
|
||||
}
|
||||
@ -128,13 +128,13 @@ clear_bit(long nr, volatile unsigned long *addr)
|
||||
* clear_bit() is atomic and implies release semantics before the memory
|
||||
* operation. It can be used for an unlock.
|
||||
*/
|
||||
static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
|
||||
static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
barrier();
|
||||
clear_bit(nr, addr);
|
||||
}
|
||||
|
||||
static inline void __clear_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
|
||||
}
|
||||
@ -151,7 +151,7 @@ static inline void __clear_bit(long nr, volatile unsigned long *addr)
|
||||
* No memory barrier is required here, because x86 cannot reorder stores past
|
||||
* older loads. Same principle as spin_unlock.
|
||||
*/
|
||||
static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
|
||||
static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
barrier();
|
||||
__clear_bit(nr, addr);
|
||||
@ -166,7 +166,7 @@ static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
|
||||
* If it's called on the same region of memory simultaneously, the effect
|
||||
* may be that only one operation succeeds.
|
||||
*/
|
||||
static inline void __change_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
|
||||
}
|
||||
@ -180,7 +180,7 @@ static inline void __change_bit(long nr, volatile unsigned long *addr)
|
||||
* Note that @nr may be almost arbitrarily large; this function is not
|
||||
* restricted to acting on a single-word quantity.
|
||||
*/
|
||||
static inline void change_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline void change_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
if (IS_IMMEDIATE(nr)) {
|
||||
asm volatile(LOCK_PREFIX "xorb %1,%0"
|
||||
@ -201,7 +201,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr)
|
||||
* This operation is atomic and cannot be reordered.
|
||||
* It also implies a memory barrier.
|
||||
*/
|
||||
static inline int test_and_set_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline int test_and_set_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c");
|
||||
}
|
||||
@ -228,7 +228,7 @@ test_and_set_bit_lock(long nr, volatile unsigned long *addr)
|
||||
* If two examples of this operation race, one can appear to succeed
|
||||
* but actually fail. You must protect multiple accesses with a lock.
|
||||
*/
|
||||
static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
int oldbit;
|
||||
|
||||
@ -247,7 +247,7 @@ static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
|
||||
* This operation is atomic and cannot be reordered.
|
||||
* It also implies a memory barrier.
|
||||
*/
|
||||
static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c");
|
||||
}
|
||||
@ -268,7 +268,7 @@ static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
|
||||
* accessed from a hypervisor on the same CPU if running in a VM: don't change
|
||||
* this without also updating arch/x86/kernel/kvm.c
|
||||
*/
|
||||
static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
int oldbit;
|
||||
|
||||
@ -280,7 +280,7 @@ static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
|
||||
}
|
||||
|
||||
/* WARNING: non atomic and it can be reordered! */
|
||||
static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
int oldbit;
|
||||
|
||||
@ -300,7 +300,7 @@ static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
|
||||
* This operation is atomic and cannot be reordered.
|
||||
* It also implies a memory barrier.
|
||||
*/
|
||||
static inline int test_and_change_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline int test_and_change_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c");
|
||||
}
|
||||
@ -311,7 +311,7 @@ static __always_inline int constant_test_bit(long nr, const volatile unsigned lo
|
||||
(addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
|
||||
}
|
||||
|
||||
static inline int variable_test_bit(long nr, volatile const unsigned long *addr)
|
||||
static __always_inline int variable_test_bit(long nr, volatile const unsigned long *addr)
|
||||
{
|
||||
int oldbit;
|
||||
|
||||
@ -343,7 +343,7 @@ static int test_bit(int nr, const volatile unsigned long *addr);
|
||||
*
|
||||
* Undefined if no bit exists, so code should check against 0 first.
|
||||
*/
|
||||
static inline unsigned long __ffs(unsigned long word)
|
||||
static __always_inline unsigned long __ffs(unsigned long word)
|
||||
{
|
||||
asm("rep; bsf %1,%0"
|
||||
: "=r" (word)
|
||||
@ -357,7 +357,7 @@ static inline unsigned long __ffs(unsigned long word)
|
||||
*
|
||||
* Undefined if no zero exists, so code should check against ~0UL first.
|
||||
*/
|
||||
static inline unsigned long ffz(unsigned long word)
|
||||
static __always_inline unsigned long ffz(unsigned long word)
|
||||
{
|
||||
asm("rep; bsf %1,%0"
|
||||
: "=r" (word)
|
||||
@ -371,7 +371,7 @@ static inline unsigned long ffz(unsigned long word)
|
||||
*
|
||||
* Undefined if no set bit exists, so code should check against 0 first.
|
||||
*/
|
||||
static inline unsigned long __fls(unsigned long word)
|
||||
static __always_inline unsigned long __fls(unsigned long word)
|
||||
{
|
||||
asm("bsr %1,%0"
|
||||
: "=r" (word)
|
||||
@ -393,7 +393,7 @@ static inline unsigned long __fls(unsigned long word)
|
||||
* set bit if value is nonzero. The first (least significant) bit
|
||||
* is at position 1.
|
||||
*/
|
||||
static inline int ffs(int x)
|
||||
static __always_inline int ffs(int x)
|
||||
{
|
||||
int r;
|
||||
|
||||
@ -434,7 +434,7 @@ static inline int ffs(int x)
|
||||
* set bit if value is nonzero. The last (most significant) bit is
|
||||
* at position 32.
|
||||
*/
|
||||
static inline int fls(int x)
|
||||
static __always_inline int fls(int x)
|
||||
{
|
||||
int r;
|
||||
|
||||
|
@ -3,10 +3,11 @@
|
||||
#ifndef _ASM_X86_CLOCKSOURCE_H
|
||||
#define _ASM_X86_CLOCKSOURCE_H
|
||||
|
||||
#define VCLOCK_NONE 0 /* No vDSO clock available. */
|
||||
#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
|
||||
#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
|
||||
#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */
|
||||
#define VCLOCK_NONE 0 /* No vDSO clock available. */
|
||||
#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
|
||||
#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
|
||||
#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */
|
||||
#define VCLOCK_MAX 3
|
||||
|
||||
struct arch_clocksource_data {
|
||||
int vclock_mode;
|
||||
|
@ -2,6 +2,7 @@
|
||||
#define ASM_X86_CMPXCHG_H
|
||||
|
||||
#include <linux/compiler.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/alternative.h> /* Provides LOCK_PREFIX */
|
||||
|
||||
/*
|
||||
|
@ -1,288 +1,7 @@
|
||||
/*
|
||||
* Defines x86 CPU feature bits
|
||||
*/
|
||||
#ifndef _ASM_X86_CPUFEATURE_H
|
||||
#define _ASM_X86_CPUFEATURE_H
|
||||
|
||||
#ifndef _ASM_X86_REQUIRED_FEATURES_H
|
||||
#include <asm/required-features.h>
|
||||
#endif
|
||||
|
||||
#ifndef _ASM_X86_DISABLED_FEATURES_H
|
||||
#include <asm/disabled-features.h>
|
||||
#endif
|
||||
|
||||
#define NCAPINTS 16 /* N 32-bit words worth of info */
|
||||
#define NBUGINTS 1 /* N 32-bit bug flags */
|
||||
|
||||
/*
|
||||
* Note: If the comment begins with a quoted string, that string is used
|
||||
* in /proc/cpuinfo instead of the macro name. If the string is "",
|
||||
* this feature bit is not displayed in /proc/cpuinfo at all.
|
||||
*/
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
|
||||
#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
|
||||
#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
|
||||
#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
|
||||
#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
|
||||
#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
|
||||
#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
|
||||
#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
|
||||
#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
|
||||
#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
|
||||
#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
|
||||
#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
|
||||
#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
|
||||
#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
|
||||
#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
|
||||
#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */
|
||||
/* (plus FCMOVcc, FCOMI with FPU) */
|
||||
#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
|
||||
#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
|
||||
#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
|
||||
#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
|
||||
#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
|
||||
#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
|
||||
#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
|
||||
#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
|
||||
#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
|
||||
#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
|
||||
#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
|
||||
#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
|
||||
#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
|
||||
#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
|
||||
#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
|
||||
|
||||
/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
|
||||
/* Don't duplicate feature flags which are redundant with Intel! */
|
||||
#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
|
||||
#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */
|
||||
#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
|
||||
#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
|
||||
#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
|
||||
#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
|
||||
#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
|
||||
#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */
|
||||
#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */
|
||||
#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */
|
||||
|
||||
/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
|
||||
#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
|
||||
#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
|
||||
#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
|
||||
|
||||
/* Other features, Linux-defined mapping, word 3 */
|
||||
/* This range is used for feature bits which conflict or are synthesized */
|
||||
#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
|
||||
#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
|
||||
#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
|
||||
#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
|
||||
/* cpu types for specific tunings: */
|
||||
#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
|
||||
#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
|
||||
#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
|
||||
#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
|
||||
#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
|
||||
#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */
|
||||
/* free, was #define X86_FEATURE_FXSAVE_LEAK ( 3*32+10) * "" FXSAVE leaks FOP/FIP/FOP */
|
||||
#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
|
||||
#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
|
||||
#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
|
||||
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */
|
||||
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */
|
||||
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */
|
||||
#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
|
||||
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
|
||||
/* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */
|
||||
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
|
||||
#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
|
||||
#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
|
||||
#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
|
||||
#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
|
||||
/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
|
||||
#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
|
||||
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
|
||||
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
|
||||
#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
|
||||
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
|
||||
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
|
||||
#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
|
||||
#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
|
||||
#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
|
||||
#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
|
||||
#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
|
||||
#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */
|
||||
#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
|
||||
#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
|
||||
#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
|
||||
#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
|
||||
#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
|
||||
#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
|
||||
#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */
|
||||
#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
|
||||
#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */
|
||||
#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
|
||||
#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
|
||||
#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
|
||||
#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
|
||||
#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */
|
||||
#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
|
||||
#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
|
||||
#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */
|
||||
#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
|
||||
#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
|
||||
#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */
|
||||
#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
|
||||
#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */
|
||||
#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */
|
||||
#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
|
||||
|
||||
/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
|
||||
#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
|
||||
#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
|
||||
#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
|
||||
#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
|
||||
#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
|
||||
#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
|
||||
#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
|
||||
#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
|
||||
#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
|
||||
#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
|
||||
|
||||
/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
|
||||
#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
|
||||
#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
|
||||
#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */
|
||||
#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
|
||||
#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
|
||||
#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
|
||||
#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
|
||||
#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
|
||||
#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
|
||||
#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
|
||||
#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
|
||||
#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
|
||||
#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
|
||||
#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
|
||||
#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
|
||||
#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
|
||||
#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */
|
||||
#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
|
||||
#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */
|
||||
#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */
|
||||
#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
|
||||
#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
|
||||
#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
|
||||
#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
|
||||
#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
|
||||
|
||||
/*
|
||||
* Auxiliary flags: Linux defined - For features scattered in various
|
||||
* CPUID levels like 0x6, 0xA etc, word 7.
|
||||
*
|
||||
* Reuse free bits when adding new feature flags!
|
||||
*/
|
||||
|
||||
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
|
||||
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
|
||||
|
||||
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
|
||||
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
|
||||
|
||||
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
|
||||
|
||||
/* Virtualization flags: Linux defined, word 8 */
|
||||
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
|
||||
#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
|
||||
#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
|
||||
#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
|
||||
#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
|
||||
|
||||
#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
|
||||
#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
|
||||
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
|
||||
#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
|
||||
#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
|
||||
#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
|
||||
#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
|
||||
#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
|
||||
#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
|
||||
#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
|
||||
#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
|
||||
#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
|
||||
#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
|
||||
#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
|
||||
#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
|
||||
#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
|
||||
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
|
||||
#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
|
||||
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
|
||||
#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
|
||||
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
|
||||
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
|
||||
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
|
||||
#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
|
||||
#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
|
||||
#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
|
||||
|
||||
/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
|
||||
#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */
|
||||
#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */
|
||||
#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */
|
||||
#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */
|
||||
|
||||
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
|
||||
#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
|
||||
|
||||
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
|
||||
#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
|
||||
|
||||
/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
|
||||
#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
|
||||
|
||||
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
|
||||
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
|
||||
#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
|
||||
#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
|
||||
#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
|
||||
#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
|
||||
#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
|
||||
#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
|
||||
#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
|
||||
#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
|
||||
#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
|
||||
|
||||
/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
|
||||
#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
|
||||
#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
|
||||
#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
|
||||
#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
|
||||
#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
|
||||
#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
|
||||
#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
|
||||
#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
|
||||
#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
|
||||
#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
|
||||
|
||||
/*
|
||||
* BUG word(s)
|
||||
*/
|
||||
#define X86_BUG(x) (NCAPINTS*32 + (x))
|
||||
|
||||
#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
|
||||
#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
|
||||
#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
|
||||
#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
|
||||
#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
|
||||
#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
|
||||
#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
|
||||
#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
|
||||
#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
|
||||
#include <asm/processor.h>
|
||||
|
||||
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
|
||||
|
||||
@ -369,8 +88,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
|
||||
* is not relevant.
|
||||
*/
|
||||
#define cpu_feature_enabled(bit) \
|
||||
(__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : \
|
||||
cpu_has(&boot_cpu_data, bit))
|
||||
(__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit))
|
||||
|
||||
#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
|
||||
|
||||
@ -406,106 +124,19 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
|
||||
#define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE)
|
||||
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
|
||||
/*
|
||||
* Do not add any more of those clumsy macros - use static_cpu_has_safe() for
|
||||
* Do not add any more of those clumsy macros - use static_cpu_has() for
|
||||
* fast paths and boot_cpu_has() otherwise!
|
||||
*/
|
||||
|
||||
#if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS)
|
||||
extern void warn_pre_alternatives(void);
|
||||
extern bool __static_cpu_has_safe(u16 bit);
|
||||
|
||||
#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
|
||||
/*
|
||||
* Static testing of CPU features. Used the same as boot_cpu_has().
|
||||
* These are only valid after alternatives have run, but will statically
|
||||
* patch the target code for additional performance.
|
||||
* These will statically patch the target code for additional
|
||||
* performance.
|
||||
*/
|
||||
static __always_inline __pure bool __static_cpu_has(u16 bit)
|
||||
static __always_inline __pure bool _static_cpu_has(u16 bit)
|
||||
{
|
||||
#ifdef CC_HAVE_ASM_GOTO
|
||||
|
||||
#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
|
||||
|
||||
/*
|
||||
* Catch too early usage of this before alternatives
|
||||
* have run.
|
||||
*/
|
||||
asm_volatile_goto("1: jmp %l[t_warn]\n"
|
||||
"2:\n"
|
||||
".section .altinstructions,\"a\"\n"
|
||||
" .long 1b - .\n"
|
||||
" .long 0\n" /* no replacement */
|
||||
" .word %P0\n" /* 1: do replace */
|
||||
" .byte 2b - 1b\n" /* source len */
|
||||
" .byte 0\n" /* replacement len */
|
||||
" .byte 0\n" /* pad len */
|
||||
".previous\n"
|
||||
/* skipping size check since replacement size = 0 */
|
||||
: : "i" (X86_FEATURE_ALWAYS) : : t_warn);
|
||||
|
||||
#endif
|
||||
|
||||
asm_volatile_goto("1: jmp %l[t_no]\n"
|
||||
"2:\n"
|
||||
".section .altinstructions,\"a\"\n"
|
||||
" .long 1b - .\n"
|
||||
" .long 0\n" /* no replacement */
|
||||
" .word %P0\n" /* feature bit */
|
||||
" .byte 2b - 1b\n" /* source len */
|
||||
" .byte 0\n" /* replacement len */
|
||||
" .byte 0\n" /* pad len */
|
||||
".previous\n"
|
||||
/* skipping size check since replacement size = 0 */
|
||||
: : "i" (bit) : : t_no);
|
||||
return true;
|
||||
t_no:
|
||||
return false;
|
||||
|
||||
#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
|
||||
t_warn:
|
||||
warn_pre_alternatives();
|
||||
return false;
|
||||
#endif
|
||||
|
||||
#else /* CC_HAVE_ASM_GOTO */
|
||||
|
||||
u8 flag;
|
||||
/* Open-coded due to __stringify() in ALTERNATIVE() */
|
||||
asm volatile("1: movb $0,%0\n"
|
||||
"2:\n"
|
||||
".section .altinstructions,\"a\"\n"
|
||||
" .long 1b - .\n"
|
||||
" .long 3f - .\n"
|
||||
" .word %P1\n" /* feature bit */
|
||||
" .byte 2b - 1b\n" /* source len */
|
||||
" .byte 4f - 3f\n" /* replacement len */
|
||||
" .byte 0\n" /* pad len */
|
||||
".previous\n"
|
||||
".section .discard,\"aw\",@progbits\n"
|
||||
" .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
|
||||
".previous\n"
|
||||
".section .altinstr_replacement,\"ax\"\n"
|
||||
"3: movb $1,%0\n"
|
||||
"4:\n"
|
||||
".previous\n"
|
||||
: "=qm" (flag) : "i" (bit));
|
||||
return flag;
|
||||
|
||||
#endif /* CC_HAVE_ASM_GOTO */
|
||||
}
|
||||
|
||||
#define static_cpu_has(bit) \
|
||||
( \
|
||||
__builtin_constant_p(boot_cpu_has(bit)) ? \
|
||||
boot_cpu_has(bit) : \
|
||||
__builtin_constant_p(bit) ? \
|
||||
__static_cpu_has(bit) : \
|
||||
boot_cpu_has(bit) \
|
||||
)
|
||||
|
||||
static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
|
||||
{
|
||||
#ifdef CC_HAVE_ASM_GOTO
|
||||
asm_volatile_goto("1: jmp %l[t_dynamic]\n"
|
||||
asm_volatile_goto("1: jmp 6f\n"
|
||||
"2:\n"
|
||||
".skip -(((5f-4f) - (2b-1b)) > 0) * "
|
||||
"((5f-4f) - (2b-1b)),0x90\n"
|
||||
@ -530,66 +161,34 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
|
||||
" .byte 0\n" /* repl len */
|
||||
" .byte 0\n" /* pad len */
|
||||
".previous\n"
|
||||
: : "i" (bit), "i" (X86_FEATURE_ALWAYS)
|
||||
: : t_dynamic, t_no);
|
||||
".section .altinstr_aux,\"ax\"\n"
|
||||
"6:\n"
|
||||
" testb %[bitnum],%[cap_byte]\n"
|
||||
" jnz %l[t_yes]\n"
|
||||
" jmp %l[t_no]\n"
|
||||
".previous\n"
|
||||
: : "i" (bit), "i" (X86_FEATURE_ALWAYS),
|
||||
[bitnum] "i" (1 << (bit & 7)),
|
||||
[cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
|
||||
: : t_yes, t_no);
|
||||
t_yes:
|
||||
return true;
|
||||
t_no:
|
||||
return false;
|
||||
t_dynamic:
|
||||
return __static_cpu_has_safe(bit);
|
||||
#else
|
||||
u8 flag;
|
||||
/* Open-coded due to __stringify() in ALTERNATIVE() */
|
||||
asm volatile("1: movb $2,%0\n"
|
||||
"2:\n"
|
||||
".section .altinstructions,\"a\"\n"
|
||||
" .long 1b - .\n" /* src offset */
|
||||
" .long 3f - .\n" /* repl offset */
|
||||
" .word %P2\n" /* always replace */
|
||||
" .byte 2b - 1b\n" /* source len */
|
||||
" .byte 4f - 3f\n" /* replacement len */
|
||||
" .byte 0\n" /* pad len */
|
||||
".previous\n"
|
||||
".section .discard,\"aw\",@progbits\n"
|
||||
" .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
|
||||
".previous\n"
|
||||
".section .altinstr_replacement,\"ax\"\n"
|
||||
"3: movb $0,%0\n"
|
||||
"4:\n"
|
||||
".previous\n"
|
||||
".section .altinstructions,\"a\"\n"
|
||||
" .long 1b - .\n" /* src offset */
|
||||
" .long 5f - .\n" /* repl offset */
|
||||
" .word %P1\n" /* feature bit */
|
||||
" .byte 4b - 3b\n" /* src len */
|
||||
" .byte 6f - 5f\n" /* repl len */
|
||||
" .byte 0\n" /* pad len */
|
||||
".previous\n"
|
||||
".section .discard,\"aw\",@progbits\n"
|
||||
" .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
|
||||
".previous\n"
|
||||
".section .altinstr_replacement,\"ax\"\n"
|
||||
"5: movb $1,%0\n"
|
||||
"6:\n"
|
||||
".previous\n"
|
||||
: "=qm" (flag)
|
||||
: "i" (bit), "i" (X86_FEATURE_ALWAYS));
|
||||
return (flag == 2 ? __static_cpu_has_safe(bit) : flag);
|
||||
#endif /* CC_HAVE_ASM_GOTO */
|
||||
}
|
||||
|
||||
#define static_cpu_has_safe(bit) \
|
||||
#define static_cpu_has(bit) \
|
||||
( \
|
||||
__builtin_constant_p(boot_cpu_has(bit)) ? \
|
||||
boot_cpu_has(bit) : \
|
||||
_static_cpu_has_safe(bit) \
|
||||
_static_cpu_has(bit) \
|
||||
)
|
||||
#else
|
||||
/*
|
||||
* gcc 3.x is too stupid to do the static test; fall back to dynamic.
|
||||
* Fall back to dynamic for gcc versions which don't support asm goto. Should be
|
||||
* a minority now anyway.
|
||||
*/
|
||||
#define static_cpu_has(bit) boot_cpu_has(bit)
|
||||
#define static_cpu_has_safe(bit) boot_cpu_has(bit)
|
||||
#endif
|
||||
|
||||
#define cpu_has_bug(c, bit) cpu_has(c, (bit))
|
||||
@ -597,7 +196,6 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
|
||||
#define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit))
|
||||
|
||||
#define static_cpu_has_bug(bit) static_cpu_has((bit))
|
||||
#define static_cpu_has_bug_safe(bit) static_cpu_has_safe((bit))
|
||||
#define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit))
|
||||
|
||||
#define MAX_CPU_FEATURES (NCAPINTS * 32)
|
||||
|
300
arch/x86/include/asm/cpufeatures.h
Normal file
300
arch/x86/include/asm/cpufeatures.h
Normal file
@ -0,0 +1,300 @@
|
||||
#ifndef _ASM_X86_CPUFEATURES_H
|
||||
#define _ASM_X86_CPUFEATURES_H
|
||||
|
||||
#ifndef _ASM_X86_REQUIRED_FEATURES_H
|
||||
#include <asm/required-features.h>
|
||||
#endif
|
||||
|
||||
#ifndef _ASM_X86_DISABLED_FEATURES_H
|
||||
#include <asm/disabled-features.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Defines x86 CPU feature bits
|
||||
*/
|
||||
#define NCAPINTS 16 /* N 32-bit words worth of info */
|
||||
#define NBUGINTS 1 /* N 32-bit bug flags */
|
||||
|
||||
/*
|
||||
* Note: If the comment begins with a quoted string, that string is used
|
||||
* in /proc/cpuinfo instead of the macro name. If the string is "",
|
||||
* this feature bit is not displayed in /proc/cpuinfo at all.
|
||||
*/
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
|
||||
#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
|
||||
#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
|
||||
#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
|
||||
#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
|
||||
#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
|
||||
#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
|
||||
#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
|
||||
#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
|
||||
#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
|
||||
#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
|
||||
#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
|
||||
#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
|
||||
#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
|
||||
#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
|
||||
#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */
|
||||
/* (plus FCMOVcc, FCOMI with FPU) */
|
||||
#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
|
||||
#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
|
||||
#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
|
||||
#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
|
||||
#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
|
||||
#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
|
||||
#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
|
||||
#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
|
||||
#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
|
||||
#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
|
||||
#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
|
||||
#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
|
||||
#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
|
||||
#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
|
||||
#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
|
||||
|
||||
/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
|
||||
/* Don't duplicate feature flags which are redundant with Intel! */
|
||||
#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
|
||||
#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */
|
||||
#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
|
||||
#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
|
||||
#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
|
||||
#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
|
||||
#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
|
||||
#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */
|
||||
#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */
|
||||
#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */
|
||||
|
||||
/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
|
||||
#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
|
||||
#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
|
||||
#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
|
||||
|
||||
/* Other features, Linux-defined mapping, word 3 */
|
||||
/* This range is used for feature bits which conflict or are synthesized */
|
||||
#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
|
||||
#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
|
||||
#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
|
||||
#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
|
||||
/* cpu types for specific tunings: */
|
||||
#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
|
||||
#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
|
||||
#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
|
||||
#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
|
||||
#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
|
||||
#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */
|
||||
#define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */
|
||||
#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
|
||||
#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
|
||||
#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
|
||||
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */
|
||||
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */
|
||||
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */
|
||||
#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
|
||||
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
|
||||
/* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */
|
||||
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
|
||||
#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
|
||||
#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
|
||||
#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
|
||||
#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
|
||||
/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
|
||||
#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
|
||||
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
|
||||
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
|
||||
#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
|
||||
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
|
||||
#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
|
||||
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
|
||||
#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
|
||||
#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
|
||||
#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
|
||||
#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
|
||||
#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
|
||||
#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */
|
||||
#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
|
||||
#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
|
||||
#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
|
||||
#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
|
||||
#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
|
||||
#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
|
||||
#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */
|
||||
#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
|
||||
#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */
|
||||
#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
|
||||
#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
|
||||
#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
|
||||
#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
|
||||
#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */
|
||||
#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
|
||||
#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
|
||||
#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */
|
||||
#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
|
||||
#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
|
||||
#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */
|
||||
#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
|
||||
#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */
|
||||
#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */
|
||||
#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
|
||||
|
||||
/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
|
||||
#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
|
||||
#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
|
||||
#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
|
||||
#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
|
||||
#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
|
||||
#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
|
||||
#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
|
||||
#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
|
||||
#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
|
||||
#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
|
||||
|
||||
/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
|
||||
#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
|
||||
#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
|
||||
#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */
|
||||
#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
|
||||
#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
|
||||
#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
|
||||
#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
|
||||
#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
|
||||
#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
|
||||
#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
|
||||
#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
|
||||
#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
|
||||
#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
|
||||
#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
|
||||
#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
|
||||
#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
|
||||
#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */
|
||||
#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
|
||||
#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */
|
||||
#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */
|
||||
#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
|
||||
#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
|
||||
#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
|
||||
#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
|
||||
#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
|
||||
|
||||
/*
|
||||
* Auxiliary flags: Linux defined - For features scattered in various
|
||||
* CPUID levels like 0x6, 0xA etc, word 7.
|
||||
*
|
||||
* Reuse free bits when adding new feature flags!
|
||||
*/
|
||||
|
||||
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
|
||||
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
|
||||
|
||||
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
|
||||
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
|
||||
|
||||
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
|
||||
|
||||
/* Virtualization flags: Linux defined, word 8 */
|
||||
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
|
||||
#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
|
||||
#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
|
||||
#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
|
||||
#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
|
||||
|
||||
#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
|
||||
#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
|
||||
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
|
||||
#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
|
||||
#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
|
||||
#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
|
||||
#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
|
||||
#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
|
||||
#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
|
||||
#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
|
||||
#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
|
||||
#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
|
||||
#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
|
||||
#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
|
||||
#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
|
||||
#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
|
||||
#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
|
||||
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
|
||||
#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
|
||||
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
|
||||
#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
|
||||
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
|
||||
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
|
||||
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
|
||||
#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
|
||||
#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
|
||||
#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
|
||||
#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
|
||||
#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
|
||||
|
||||
/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
|
||||
#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */
|
||||
#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */
|
||||
#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */
|
||||
#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */
|
||||
|
||||
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
|
||||
#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
|
||||
|
||||
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
|
||||
#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
|
||||
|
||||
/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
|
||||
#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
|
||||
|
||||
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
|
||||
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
|
||||
#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
|
||||
#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
|
||||
#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
|
||||
#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
|
||||
#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
|
||||
#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
|
||||
#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
|
||||
#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
|
||||
#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
|
||||
|
||||
/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
|
||||
#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
|
||||
#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
|
||||
#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
|
||||
#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
|
||||
#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
|
||||
#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
|
||||
#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
|
||||
#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
|
||||
#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
|
||||
#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
|
||||
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
|
||||
|
||||
/*
|
||||
* BUG word(s)
|
||||
*/
|
||||
#define X86_BUG(x) (NCAPINTS*32 + (x))
|
||||
|
||||
#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
|
||||
#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
|
||||
#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
|
||||
#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
|
||||
#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
|
||||
#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
|
||||
#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
|
||||
#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
|
||||
#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional
|
||||
* to avoid confusion.
|
||||
*/
|
||||
#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_X86_CPUFEATURES_H */
|
@ -98,4 +98,27 @@ struct desc_ptr {
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
/* Access rights as returned by LAR */
|
||||
#define AR_TYPE_RODATA (0 * (1 << 9))
|
||||
#define AR_TYPE_RWDATA (1 * (1 << 9))
|
||||
#define AR_TYPE_RODATA_EXPDOWN (2 * (1 << 9))
|
||||
#define AR_TYPE_RWDATA_EXPDOWN (3 * (1 << 9))
|
||||
#define AR_TYPE_XOCODE (4 * (1 << 9))
|
||||
#define AR_TYPE_XRCODE (5 * (1 << 9))
|
||||
#define AR_TYPE_XOCODE_CONF (6 * (1 << 9))
|
||||
#define AR_TYPE_XRCODE_CONF (7 * (1 << 9))
|
||||
#define AR_TYPE_MASK (7 * (1 << 9))
|
||||
|
||||
#define AR_DPL0 (0 * (1 << 13))
|
||||
#define AR_DPL3 (3 * (1 << 13))
|
||||
#define AR_DPL_MASK (3 * (1 << 13))
|
||||
|
||||
#define AR_A (1 << 8) /* "Accessed" */
|
||||
#define AR_S (1 << 12) /* If clear, "System" segment */
|
||||
#define AR_P (1 << 15) /* "Present" */
|
||||
#define AR_AVL (1 << 20) /* "AVaiLable" (no HW effect) */
|
||||
#define AR_L (1 << 21) /* "Long mode" for code segments */
|
||||
#define AR_DB (1 << 22) /* D/B, effect depends on type */
|
||||
#define AR_G (1 << 23) /* "Granularity" (limit in pages) */
|
||||
|
||||
#endif /* _ASM_X86_DESC_DEFS_H */
|
||||
|
@ -15,7 +15,7 @@ static __always_inline __init void *dmi_alloc(unsigned len)
|
||||
/* Use early IO mappings for DMI because it's initialized early */
|
||||
#define dmi_early_remap early_ioremap
|
||||
#define dmi_early_unmap early_iounmap
|
||||
#define dmi_remap ioremap
|
||||
#define dmi_remap ioremap_cache
|
||||
#define dmi_unmap iounmap
|
||||
|
||||
#endif /* _ASM_X86_DMI_H */
|
||||
|
@ -138,7 +138,7 @@ extern void reserve_top_address(unsigned long reserve);
|
||||
extern int fixmaps_set;
|
||||
|
||||
extern pte_t *kmap_pte;
|
||||
extern pgprot_t kmap_prot;
|
||||
#define kmap_prot PAGE_KERNEL
|
||||
extern pte_t *pkmap_page_table;
|
||||
|
||||
void __native_set_fixmap(enum fixed_addresses idx, pte_t pte);
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <asm/user.h>
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/fpu/xstate.h>
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
/*
|
||||
* High level FPU state handling functions:
|
||||
@ -58,22 +59,22 @@ extern u64 fpu__get_supported_xfeatures_mask(void);
|
||||
*/
|
||||
static __always_inline __pure bool use_eager_fpu(void)
|
||||
{
|
||||
return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
|
||||
return static_cpu_has(X86_FEATURE_EAGER_FPU);
|
||||
}
|
||||
|
||||
static __always_inline __pure bool use_xsaveopt(void)
|
||||
{
|
||||
return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);
|
||||
return static_cpu_has(X86_FEATURE_XSAVEOPT);
|
||||
}
|
||||
|
||||
static __always_inline __pure bool use_xsave(void)
|
||||
{
|
||||
return static_cpu_has_safe(X86_FEATURE_XSAVE);
|
||||
return static_cpu_has(X86_FEATURE_XSAVE);
|
||||
}
|
||||
|
||||
static __always_inline __pure bool use_fxsr(void)
|
||||
{
|
||||
return static_cpu_has_safe(X86_FEATURE_FXSR);
|
||||
return static_cpu_has(X86_FEATURE_FXSR);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -300,7 +301,7 @@ static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
|
||||
|
||||
WARN_ON(system_state != SYSTEM_BOOTING);
|
||||
|
||||
if (static_cpu_has_safe(X86_FEATURE_XSAVES))
|
||||
if (static_cpu_has(X86_FEATURE_XSAVES))
|
||||
XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
|
||||
else
|
||||
XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
|
||||
@ -322,7 +323,7 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
|
||||
|
||||
WARN_ON(system_state != SYSTEM_BOOTING);
|
||||
|
||||
if (static_cpu_has_safe(X86_FEATURE_XSAVES))
|
||||
if (static_cpu_has(X86_FEATURE_XSAVES))
|
||||
XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
|
||||
else
|
||||
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
|
||||
@ -460,7 +461,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
|
||||
* pending. Clear the x87 state here by setting it to fixed values.
|
||||
* "m" is a random variable that should be in L1.
|
||||
*/
|
||||
if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
|
||||
if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) {
|
||||
asm volatile(
|
||||
"fnclex\n\t"
|
||||
"emms\n\t"
|
||||
@ -589,7 +590,8 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
|
||||
* If the task has used the math, pre-load the FPU on xsave processors
|
||||
* or if the past 5 consecutive context-switches used math.
|
||||
*/
|
||||
fpu.preload = new_fpu->fpstate_active &&
|
||||
fpu.preload = static_cpu_has(X86_FEATURE_FPU) &&
|
||||
new_fpu->fpstate_active &&
|
||||
(use_eager_fpu() || new_fpu->counter > 5);
|
||||
|
||||
if (old_fpu->fpregs_active) {
|
||||
|
@ -1,23 +1,44 @@
|
||||
#ifdef __ASSEMBLY__
|
||||
#ifndef _ASM_X86_FRAME_H
|
||||
#define _ASM_X86_FRAME_H
|
||||
|
||||
#include <asm/asm.h>
|
||||
|
||||
/* The annotation hides the frame from the unwinder and makes it look
|
||||
like a ordinary ebp save/restore. This avoids some special cases for
|
||||
frame pointer later */
|
||||
#ifdef CONFIG_FRAME_POINTER
|
||||
.macro FRAME
|
||||
__ASM_SIZE(push,) %__ASM_REG(bp)
|
||||
__ASM_SIZE(mov) %__ASM_REG(sp), %__ASM_REG(bp)
|
||||
.endm
|
||||
.macro ENDFRAME
|
||||
__ASM_SIZE(pop,) %__ASM_REG(bp)
|
||||
.endm
|
||||
#else
|
||||
.macro FRAME
|
||||
.endm
|
||||
.macro ENDFRAME
|
||||
.endm
|
||||
#endif
|
||||
/*
|
||||
* These are stack frame creation macros. They should be used by every
|
||||
* callable non-leaf asm function to make kernel stack traces more reliable.
|
||||
*/
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
#ifdef CONFIG_FRAME_POINTER
|
||||
|
||||
#ifdef __ASSEMBLY__
|
||||
|
||||
.macro FRAME_BEGIN
|
||||
push %_ASM_BP
|
||||
_ASM_MOV %_ASM_SP, %_ASM_BP
|
||||
.endm
|
||||
|
||||
.macro FRAME_END
|
||||
pop %_ASM_BP
|
||||
.endm
|
||||
|
||||
#else /* !__ASSEMBLY__ */
|
||||
|
||||
#define FRAME_BEGIN \
|
||||
"push %" _ASM_BP "\n" \
|
||||
_ASM_MOV "%" _ASM_SP ", %" _ASM_BP "\n"
|
||||
|
||||
#define FRAME_END "pop %" _ASM_BP "\n"
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#define FRAME_OFFSET __ASM_SEL(4, 8)
|
||||
|
||||
#else /* !CONFIG_FRAME_POINTER */
|
||||
|
||||
#define FRAME_BEGIN
|
||||
#define FRAME_END
|
||||
#define FRAME_OFFSET 0
|
||||
|
||||
#endif /* CONFIG_FRAME_POINTER */
|
||||
|
||||
#endif /* _ASM_X86_FRAME_H */
|
||||
|
@ -53,7 +53,7 @@
|
||||
#define IMR_MASK (IMR_ALIGN - 1)
|
||||
|
||||
int imr_add_range(phys_addr_t base, size_t size,
|
||||
unsigned int rmask, unsigned int wmask, bool lock);
|
||||
unsigned int rmask, unsigned int wmask);
|
||||
|
||||
int imr_remove_range(phys_addr_t base, size_t size);
|
||||
|
||||
|
@ -57,67 +57,13 @@ static inline void __xapic_wait_icr_idle(void)
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
static inline void
|
||||
__default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest)
|
||||
{
|
||||
/*
|
||||
* Subtle. In the case of the 'never do double writes' workaround
|
||||
* we have to lock out interrupts to be safe. As we don't care
|
||||
* of the value read we use an atomic rmw access to avoid costly
|
||||
* cli/sti. Otherwise we use an even cheaper single atomic write
|
||||
* to the APIC.
|
||||
*/
|
||||
unsigned int cfg;
|
||||
|
||||
/*
|
||||
* Wait for idle.
|
||||
*/
|
||||
__xapic_wait_icr_idle();
|
||||
|
||||
/*
|
||||
* No need to touch the target chip field
|
||||
*/
|
||||
cfg = __prepare_ICR(shortcut, vector, dest);
|
||||
|
||||
/*
|
||||
* Send the IPI. The write to APIC_ICR fires this off.
|
||||
*/
|
||||
native_apic_mem_write(APIC_ICR, cfg);
|
||||
}
|
||||
void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest);
|
||||
|
||||
/*
|
||||
* This is used to send an IPI with no shorthand notation (the destination is
|
||||
* specified in bits 56 to 63 of the ICR).
|
||||
*/
|
||||
static inline void
|
||||
__default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest)
|
||||
{
|
||||
unsigned long cfg;
|
||||
|
||||
/*
|
||||
* Wait for idle.
|
||||
*/
|
||||
if (unlikely(vector == NMI_VECTOR))
|
||||
safe_apic_wait_icr_idle();
|
||||
else
|
||||
__xapic_wait_icr_idle();
|
||||
|
||||
/*
|
||||
* prepare target chip field
|
||||
*/
|
||||
cfg = __prepare_ICR2(mask);
|
||||
native_apic_mem_write(APIC_ICR2, cfg);
|
||||
|
||||
/*
|
||||
* program the ICR
|
||||
*/
|
||||
cfg = __prepare_ICR(0, vector, dest);
|
||||
|
||||
/*
|
||||
* Send the IPI. The write to APIC_ICR fires this off.
|
||||
*/
|
||||
native_apic_mem_write(APIC_ICR, cfg);
|
||||
}
|
||||
void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest);
|
||||
|
||||
extern void default_send_IPI_single(int cpu, int vector);
|
||||
extern void default_send_IPI_single_phys(int cpu, int vector);
|
||||
|
@ -1,7 +1,7 @@
|
||||
#ifndef _ASM_IRQ_WORK_H
|
||||
#define _ASM_IRQ_WORK_H
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
static inline bool arch_irq_work_has_interrupt(void)
|
||||
{
|
||||
|
@ -135,6 +135,7 @@ struct mca_config {
|
||||
bool ignore_ce;
|
||||
bool disabled;
|
||||
bool ser;
|
||||
bool recovery;
|
||||
bool bios_cmci_threshold;
|
||||
u8 banks;
|
||||
s8 bootlog;
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
#include <asm/cpu.h>
|
||||
#include <linux/earlycpio.h>
|
||||
#include <linux/initrd.h>
|
||||
|
||||
#define native_rdmsr(msr, val1, val2) \
|
||||
do { \
|
||||
@ -143,4 +144,29 @@ static inline void reload_early_microcode(void) { }
|
||||
static inline bool
|
||||
get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; }
|
||||
#endif
|
||||
|
||||
static inline unsigned long get_initrd_start(void)
|
||||
{
|
||||
#ifdef CONFIG_BLK_DEV_INITRD
|
||||
return initrd_start;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline unsigned long get_initrd_start_addr(void)
|
||||
{
|
||||
#ifdef CONFIG_BLK_DEV_INITRD
|
||||
#ifdef CONFIG_X86_32
|
||||
unsigned long *initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start);
|
||||
|
||||
return (unsigned long)__pa_nodebug(*initrd_start_p);
|
||||
#else
|
||||
return get_initrd_start();
|
||||
#endif
|
||||
#else /* CONFIG_BLK_DEV_INITRD */
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* _ASM_X86_MICROCODE_H */
|
||||
|
@ -40,7 +40,6 @@ struct extended_sigtable {
|
||||
#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
|
||||
#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable))
|
||||
#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature))
|
||||
#define DWSIZE (sizeof(u32))
|
||||
|
||||
#define get_totalsize(mc) \
|
||||
(((struct microcode_intel *)mc)->hdr.datasize ? \
|
||||
|
@ -19,7 +19,8 @@ typedef struct {
|
||||
#endif
|
||||
|
||||
struct mutex lock;
|
||||
void __user *vdso;
|
||||
void __user *vdso; /* vdso base address */
|
||||
const struct vdso_image *vdso_image; /* vdso image in use */
|
||||
|
||||
atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
|
||||
} mm_context_t;
|
||||
|
@ -1,7 +1,12 @@
|
||||
#ifndef _ASM_X86_MSR_INDEX_H
|
||||
#define _ASM_X86_MSR_INDEX_H
|
||||
|
||||
/* CPU model specific register (MSR) numbers */
|
||||
/*
|
||||
* CPU model specific register (MSR) numbers.
|
||||
*
|
||||
* Do not add new entries to this file unless the definitions are shared
|
||||
* between multiple compilation units.
|
||||
*/
|
||||
|
||||
/* x86-64 specific MSRs */
|
||||
#define MSR_EFER 0xc0000080 /* extended feature register */
|
||||
|
@ -3,6 +3,8 @@
|
||||
|
||||
#include <linux/sched.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
#define MWAIT_SUBSTATE_MASK 0xf
|
||||
#define MWAIT_CSTATE_MASK 0xf
|
||||
#define MWAIT_SUBSTATE_SIZE 4
|
||||
|
@ -13,7 +13,7 @@ struct vm86;
|
||||
#include <asm/types.h>
|
||||
#include <uapi/asm/sigcontext.h>
|
||||
#include <asm/current.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/pgtable_types.h>
|
||||
#include <asm/percpu.h>
|
||||
@ -24,7 +24,6 @@ struct vm86;
|
||||
#include <asm/fpu/types.h>
|
||||
|
||||
#include <linux/personality.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/cache.h>
|
||||
#include <linux/threads.h>
|
||||
#include <linux/math64.h>
|
||||
@ -300,10 +299,13 @@ struct tss_struct {
|
||||
*/
|
||||
unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* Space for the temporary SYSENTER stack:
|
||||
* Space for the temporary SYSENTER stack.
|
||||
*/
|
||||
unsigned long SYSENTER_stack_canary;
|
||||
unsigned long SYSENTER_stack[64];
|
||||
#endif
|
||||
|
||||
} ____cacheline_aligned;
|
||||
|
||||
|
@ -7,12 +7,23 @@
|
||||
|
||||
void syscall_init(void);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
void entry_SYSCALL_64(void);
|
||||
void entry_SYSCALL_compat(void);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
void entry_INT80_32(void);
|
||||
void entry_INT80_compat(void);
|
||||
void entry_SYSENTER_32(void);
|
||||
void __begin_SYSENTER_singlestep_region(void);
|
||||
void __end_SYSENTER_singlestep_region(void);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
void entry_SYSENTER_compat(void);
|
||||
void __end_entry_SYSENTER_compat(void);
|
||||
void entry_SYSCALL_compat(void);
|
||||
void entry_INT80_compat(void);
|
||||
#endif
|
||||
|
||||
void x86_configure_nx(void);
|
||||
void x86_report_nx(void);
|
||||
|
@ -13,7 +13,6 @@
|
||||
X86_EFLAGS_CF | X86_EFLAGS_RF)
|
||||
|
||||
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
|
||||
int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc);
|
||||
int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
|
||||
struct pt_regs *regs, unsigned long mask);
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
#include <linux/stringify.h>
|
||||
#include <asm/nops.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
|
||||
/* "Raw" instruction opcodes */
|
||||
#define __ASM_CLAC .byte 0x0f,0x01,0xca
|
||||
|
@ -16,7 +16,6 @@
|
||||
#endif
|
||||
#include <asm/thread_info.h>
|
||||
#include <asm/cpumask.h>
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
extern int smp_num_siblings;
|
||||
extern unsigned int num_processors;
|
||||
|
@ -49,7 +49,7 @@
|
||||
*/
|
||||
#ifndef __ASSEMBLY__
|
||||
struct task_struct;
|
||||
#include <asm/processor.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <linux/atomic.h>
|
||||
|
||||
struct thread_info {
|
||||
@ -134,10 +134,13 @@ struct thread_info {
|
||||
#define _TIF_ADDR32 (1 << TIF_ADDR32)
|
||||
#define _TIF_X32 (1 << TIF_X32)
|
||||
|
||||
/* work to do in syscall_trace_enter() */
|
||||
/*
|
||||
* work to do in syscall_trace_enter(). Also includes TIF_NOHZ for
|
||||
* enter_from_user_mode()
|
||||
*/
|
||||
#define _TIF_WORK_SYSCALL_ENTRY \
|
||||
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \
|
||||
_TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \
|
||||
_TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
|
||||
_TIF_NOHZ)
|
||||
|
||||
/* work to do on any return to user space */
|
||||
|
@ -5,8 +5,57 @@
|
||||
#include <linux/sched.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/special_insns.h>
|
||||
|
||||
static inline void __invpcid(unsigned long pcid, unsigned long addr,
|
||||
unsigned long type)
|
||||
{
|
||||
struct { u64 d[2]; } desc = { { pcid, addr } };
|
||||
|
||||
/*
|
||||
* The memory clobber is because the whole point is to invalidate
|
||||
* stale TLB entries and, especially if we're flushing global
|
||||
* mappings, we don't want the compiler to reorder any subsequent
|
||||
* memory accesses before the TLB flush.
|
||||
*
|
||||
* The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
|
||||
* invpcid (%rcx), %rax in long mode.
|
||||
*/
|
||||
asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
|
||||
: : "m" (desc), "a" (type), "c" (&desc) : "memory");
|
||||
}
|
||||
|
||||
#define INVPCID_TYPE_INDIV_ADDR 0
|
||||
#define INVPCID_TYPE_SINGLE_CTXT 1
|
||||
#define INVPCID_TYPE_ALL_INCL_GLOBAL 2
|
||||
#define INVPCID_TYPE_ALL_NON_GLOBAL 3
|
||||
|
||||
/* Flush all mappings for a given pcid and addr, not including globals. */
|
||||
static inline void invpcid_flush_one(unsigned long pcid,
|
||||
unsigned long addr)
|
||||
{
|
||||
__invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
|
||||
}
|
||||
|
||||
/* Flush all mappings for a given PCID, not including globals. */
|
||||
static inline void invpcid_flush_single_context(unsigned long pcid)
|
||||
{
|
||||
__invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
|
||||
}
|
||||
|
||||
/* Flush all mappings, including globals, for all PCIDs. */
|
||||
static inline void invpcid_flush_all(void)
|
||||
{
|
||||
__invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
|
||||
}
|
||||
|
||||
/* Flush all mappings for all PCIDs except globals. */
|
||||
static inline void invpcid_flush_all_nonglobals(void)
|
||||
{
|
||||
__invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
#include <asm/paravirt.h>
|
||||
#else
|
||||
@ -104,6 +153,15 @@ static inline void __native_flush_tlb_global(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (static_cpu_has(X86_FEATURE_INVPCID)) {
|
||||
/*
|
||||
* Using INVPCID is considerably faster than a pair of writes
|
||||
* to CR4 sandwiched inside an IRQ flag save/restore.
|
||||
*/
|
||||
invpcid_flush_all();
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read-modify-write to CR4 - protect it from preemption and
|
||||
* from interrupts. (Use the raw variant because this code can
|
||||
|
@ -29,6 +29,8 @@ static inline cycles_t get_cycles(void)
|
||||
return rdtsc();
|
||||
}
|
||||
|
||||
extern struct system_counterval_t convert_art_to_tsc(cycle_t art);
|
||||
|
||||
extern void tsc_init(void);
|
||||
extern void mark_tsc_unstable(char *reason);
|
||||
extern int unsynchronized_tsc(void);
|
||||
|
@ -8,7 +8,7 @@
|
||||
#include <linux/errno.h>
|
||||
#include <linux/lockdep.h>
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
/*
|
||||
|
@ -13,9 +13,6 @@ struct vdso_image {
|
||||
void *data;
|
||||
unsigned long size; /* Always a multiple of PAGE_SIZE */
|
||||
|
||||
/* text_mapping.pages is big enough for data/size page pointers */
|
||||
struct vm_special_mapping text_mapping;
|
||||
|
||||
unsigned long alt, alt_len;
|
||||
|
||||
long sym_vvar_start; /* Negative offset to the vvar area */
|
||||
|
@ -37,6 +37,12 @@ struct vsyscall_gtod_data {
|
||||
};
|
||||
extern struct vsyscall_gtod_data vsyscall_gtod_data;
|
||||
|
||||
extern int vclocks_used;
|
||||
static inline bool vclock_was_used(int vclock)
|
||||
{
|
||||
return READ_ONCE(vclocks_used) & (1 << vclock);
|
||||
}
|
||||
|
||||
static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s)
|
||||
{
|
||||
unsigned ret;
|
||||
|
@ -256,7 +256,7 @@ struct sigcontext_64 {
|
||||
__u16 cs;
|
||||
__u16 gs;
|
||||
__u16 fs;
|
||||
__u16 __pad0;
|
||||
__u16 ss;
|
||||
__u64 err;
|
||||
__u64 trapno;
|
||||
__u64 oldmask;
|
||||
@ -341,9 +341,37 @@ struct sigcontext {
|
||||
__u64 rip;
|
||||
__u64 eflags; /* RFLAGS */
|
||||
__u16 cs;
|
||||
|
||||
/*
|
||||
* Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"),
|
||||
* Linux saved and restored fs and gs in these slots. This
|
||||
* was counterproductive, as fsbase and gsbase were never
|
||||
* saved, so arch_prctl was presumably unreliable.
|
||||
*
|
||||
* These slots should never be reused without extreme caution:
|
||||
*
|
||||
* - Some DOSEMU versions stash fs and gs in these slots manually,
|
||||
* thus overwriting anything the kernel expects to be preserved
|
||||
* in these slots.
|
||||
*
|
||||
* - If these slots are ever needed for any other purpose,
|
||||
* there is some risk that very old 64-bit binaries could get
|
||||
* confused. I doubt that many such binaries still work,
|
||||
* though, since the same patch in 2.5.64 also removed the
|
||||
* 64-bit set_thread_area syscall, so it appears that there
|
||||
* is no TLS API beyond modify_ldt that works in both pre-
|
||||
* and post-2.5.64 kernels.
|
||||
*
|
||||
* If the kernel ever adds explicit fs, gs, fsbase, and gsbase
|
||||
* save/restore, it will most likely need to be opt-in and use
|
||||
* different context slots.
|
||||
*/
|
||||
__u16 gs;
|
||||
__u16 fs;
|
||||
__u16 __pad0;
|
||||
union {
|
||||
__u16 ss; /* If UC_SIGCONTEXT_SS */
|
||||
__u16 __pad0; /* Alias name for old (!UC_SIGCONTEXT_SS) user-space */
|
||||
};
|
||||
__u64 err;
|
||||
__u64 trapno;
|
||||
__u64 oldmask;
|
||||
|
@ -1,11 +1,54 @@
|
||||
#ifndef _ASM_X86_UCONTEXT_H
|
||||
#define _ASM_X86_UCONTEXT_H
|
||||
|
||||
#define UC_FP_XSTATE 0x1 /* indicates the presence of extended state
|
||||
* information in the memory layout pointed
|
||||
* by the fpstate pointer in the ucontext's
|
||||
* sigcontext struct (uc_mcontext).
|
||||
*/
|
||||
/*
|
||||
* Indicates the presence of extended state information in the memory
|
||||
* layout pointed by the fpstate pointer in the ucontext's sigcontext
|
||||
* struct (uc_mcontext).
|
||||
*/
|
||||
#define UC_FP_XSTATE 0x1
|
||||
|
||||
#ifdef __x86_64__
|
||||
/*
|
||||
* UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
|
||||
* kernels that save SS in the sigcontext. All kernels that set
|
||||
* UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
|
||||
* regardless of SS (i.e. they implement espfix).
|
||||
*
|
||||
* Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
|
||||
* when delivering a signal that came from 64-bit code.
|
||||
*
|
||||
* Sigreturn restores SS as follows:
|
||||
*
|
||||
* if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
|
||||
* saved CS is not 64-bit)
|
||||
* new SS = saved SS (will fail IRET and signal if invalid)
|
||||
* else
|
||||
* new SS = a flat 32-bit data segment
|
||||
*
|
||||
* This behavior serves three purposes:
|
||||
*
|
||||
* - Legacy programs that construct a 64-bit sigcontext from scratch
|
||||
* with zero or garbage in the SS slot (e.g. old CRIU) and call
|
||||
* sigreturn will still work.
|
||||
*
|
||||
* - Old DOSEMU versions sometimes catch a signal from a segmented
|
||||
* context, delete the old SS segment (with modify_ldt), and change
|
||||
* the saved CS to a 64-bit segment. These DOSEMU versions expect
|
||||
* sigreturn to send them back to 64-bit mode without killing them,
|
||||
* despite the fact that the SS selector when the signal was raised is
|
||||
* no longer valid. UC_STRICT_RESTORE_SS will be clear, so the kernel
|
||||
* will fix up SS for these DOSEMU versions.
|
||||
*
|
||||
* - Old and new programs that catch a signal and return without
|
||||
* modifying the saved context will end up in exactly the state they
|
||||
* started in, even if they were running in a segmented context when
|
||||
* the signal was raised.. Old kernels would lose track of the
|
||||
* previous SS value.
|
||||
*/
|
||||
#define UC_SIGCONTEXT_SS 0x2
|
||||
#define UC_STRICT_RESTORE_SS 0x4
|
||||
#endif
|
||||
|
||||
#include <asm-generic/ucontext.h>
|
||||
|
||||
|
@ -53,7 +53,7 @@ void flat_init_apic_ldr(void)
|
||||
apic_write(APIC_LDR, val);
|
||||
}
|
||||
|
||||
static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
|
||||
static void _flat_send_IPI_mask(unsigned long mask, int vector)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
|
@ -30,7 +30,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x)
|
||||
unsigned long value;
|
||||
unsigned int id = (x >> 24) & 0xff;
|
||||
|
||||
if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
|
||||
if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
|
||||
rdmsrl(MSR_FAM10H_NODE_ID, value);
|
||||
id |= (value << 2) & 0xff00;
|
||||
}
|
||||
@ -178,7 +178,7 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
|
||||
this_cpu_write(cpu_llc_id, node);
|
||||
|
||||
/* Account for nodes per socket in multi-core-module processors */
|
||||
if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
|
||||
if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
|
||||
rdmsrl(MSR_FAM10H_NODE_ID, val);
|
||||
nodes = ((val >> 3) & 7) + 1;
|
||||
}
|
||||
|
@ -18,6 +18,66 @@
|
||||
#include <asm/proto.h>
|
||||
#include <asm/ipi.h>
|
||||
|
||||
void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest)
|
||||
{
|
||||
/*
|
||||
* Subtle. In the case of the 'never do double writes' workaround
|
||||
* we have to lock out interrupts to be safe. As we don't care
|
||||
* of the value read we use an atomic rmw access to avoid costly
|
||||
* cli/sti. Otherwise we use an even cheaper single atomic write
|
||||
* to the APIC.
|
||||
*/
|
||||
unsigned int cfg;
|
||||
|
||||
/*
|
||||
* Wait for idle.
|
||||
*/
|
||||
__xapic_wait_icr_idle();
|
||||
|
||||
/*
|
||||
* No need to touch the target chip field
|
||||
*/
|
||||
cfg = __prepare_ICR(shortcut, vector, dest);
|
||||
|
||||
/*
|
||||
* Send the IPI. The write to APIC_ICR fires this off.
|
||||
*/
|
||||
native_apic_mem_write(APIC_ICR, cfg);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is used to send an IPI with no shorthand notation (the destination is
|
||||
* specified in bits 56 to 63 of the ICR).
|
||||
*/
|
||||
void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest)
|
||||
{
|
||||
unsigned long cfg;
|
||||
|
||||
/*
|
||||
* Wait for idle.
|
||||
*/
|
||||
if (unlikely(vector == NMI_VECTOR))
|
||||
safe_apic_wait_icr_idle();
|
||||
else
|
||||
__xapic_wait_icr_idle();
|
||||
|
||||
/*
|
||||
* prepare target chip field
|
||||
*/
|
||||
cfg = __prepare_ICR2(mask);
|
||||
native_apic_mem_write(APIC_ICR2, cfg);
|
||||
|
||||
/*
|
||||
* program the ICR
|
||||
*/
|
||||
cfg = __prepare_ICR(0, vector, dest);
|
||||
|
||||
/*
|
||||
* Send the IPI. The write to APIC_ICR fires this off.
|
||||
*/
|
||||
native_apic_mem_write(APIC_ICR, cfg);
|
||||
}
|
||||
|
||||
void default_send_IPI_single_phys(int cpu, int vector)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
@ -59,7 +59,6 @@ void common(void) {
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
BLANK();
|
||||
OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
|
||||
OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
|
||||
OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
|
||||
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user