Merge branch 'linus' into x86/urgent, to pick up dependencies for a fix

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2016-03-16 09:01:55 +01:00
commit ba4e06d68e
261 changed files with 7775 additions and 2828 deletions

View File

@ -0,0 +1,26 @@
Alpine MSIX controller
See arm,gic-v3.txt for SPI and MSI definitions.
Required properties:
- compatible: should be "al,alpine-msix"
- reg: physical base address and size of the registers
- interrupt-parent: specifies the parent interrupt controller.
- interrupt-controller: identifies the node as an interrupt controller
- msi-controller: identifies the node as an PCI Message Signaled Interrupt
controller
- al,msi-base-spi: SPI base of the MSI frame
- al,msi-num-spis: number of SPIs assigned to the MSI frame, relative to SPI0
Example:
msix: msix {
compatible = "al,alpine-msix";
reg = <0x0 0xfbe00000 0x0 0x100000>;
interrupt-parent = <&gic>;
interrupt-controller;
msi-controller;
al,msi-base-spi = <160>;
al,msi-num-spis = <160>;
};

View File

@ -16,6 +16,7 @@ Main node required properties:
"arm,cortex-a15-gic"
"arm,cortex-a7-gic"
"arm,cortex-a9-gic"
"arm,eb11mp-gic"
"arm,gic-400"
"arm,pl390"
"arm,tc11mp-gic"

View File

@ -0,0 +1,44 @@
* Marvell ODMI for MSI support
Some Marvell SoCs have an On-Die Message Interrupt (ODMI) controller
which can be used by on-board peripheral for MSI interrupts.
Required properties:
- compatible : The value here should contain:
"marvell,ap806-odmi-controller", "marvell,odmi-controller".
- interrupt,controller : Identifies the node as an interrupt controller.
- msi-controller : Identifies the node as an MSI controller.
- marvell,odmi-frames : Number of ODMI frames available. Each frame
provides a number of events.
- reg : List of register definitions, one for each
ODMI frame.
- marvell,spi-base : List of GIC base SPI interrupts, one for each
ODMI frame. Those SPI interrupts are 0-based,
i.e marvell,spi-base = <128> will use SPI #96.
See Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt
for details about the GIC Device Tree binding.
- interrupt-parent : Reference to the parent interrupt controller.
Example:
odmi: odmi@300000 {
compatible = "marvell,ap806-odm-controller",
"marvell,odmi-controller";
interrupt-controller;
msi-controller;
marvell,odmi-frames = <4>;
reg = <0x300000 0x4000>,
<0x304000 0x4000>,
<0x308000 0x4000>,
<0x30C000 0x4000>;
marvell,spi-base = <128>, <136>, <144>, <152>;
};

View File

@ -23,6 +23,12 @@ Optional properties:
- mti,reserved-cpu-vectors : Specifies the list of CPU interrupt vectors
to which the GIC may not route interrupts. Valid values are 2 - 7.
This property is ignored if the CPU is started in EIC mode.
- mti,reserved-ipi-vectors : Specifies the range of GIC interrupts that are
reserved for IPIs.
It accepts 2 values, the 1st is the starting interrupt and the 2nd is the size
of the reserved range.
If not specified, the driver will allocate the last 2 * number of VPEs in the
system.
Required properties for timer sub-node:
- compatible : Should be "mti,gic-timer".
@ -44,6 +50,7 @@ Example:
#interrupt-cells = <3>;
mti,reserved-cpu-vectors = <7>;
mti,reserved-ipi-vectors = <40 8>;
timer {
compatible = "mti,gic-timer";

View File

@ -0,0 +1,49 @@
Sigma Designs SMP86xx/SMP87xx secondary interrupt controller
Required properties:
- compatible: should be "sigma,smp8642-intc"
- reg: physical address of MMIO region
- ranges: address space mapping of child nodes
- interrupt-parent: phandle of parent interrupt controller
- interrupt-controller: boolean
- #address-cells: should be <1>
- #size-cells: should be <1>
One child node per control block with properties:
- reg: address of registers for this control block
- interrupt-controller: boolean
- #interrupt-cells: should be <2>, interrupt index and flags per interrupts.txt
- interrupts: interrupt spec of primary interrupt controller
Example:
interrupt-controller@6e000 {
compatible = "sigma,smp8642-intc";
reg = <0x6e000 0x400>;
ranges = <0x0 0x6e000 0x400>;
interrupt-parent = <&gic>;
interrupt-controller;
#address-cells = <1>;
#size-cells = <1>;
irq0: interrupt-controller@0 {
reg = <0x000 0x100>;
interrupt-controller;
#interrupt-cells = <2>;
interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
};
irq1: interrupt-controller@100 {
reg = <0x100 0x100>;
interrupt-controller;
#interrupt-cells = <2>;
interrupts = <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>;
};
irq2: interrupt-controller@300 {
reg = <0x300 0x100>;
interrupt-controller;
#interrupt-cells = <2>;
interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>;
};
};

View File

@ -666,7 +666,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
clearcpuid=BITNUM [X86]
Disable CPUID feature X for the kernel. See
arch/x86/include/asm/cpufeature.h for the valid bit
arch/x86/include/asm/cpufeatures.h for the valid bit
numbers. Note the Linux specific bits are not necessarily
stable over kernel options, but the vendor specific
ones should be.
@ -1687,6 +1687,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
ip= [IP_PNP]
See Documentation/filesystems/nfs/nfsroot.txt.
irqaffinity= [SMP] Set the default irq affinity mask
Format:
<cpu number>,...,<cpu number>
or
<cpu number>-<cpu number>
(must be a positive range in ascending order)
or a mixture
<cpu number>,...,<cpu number>-<cpu number>
irqfixup [HW]
When an interrupt is not handled search all handlers
for it. Intended to get systems with badly broken
@ -2566,6 +2575,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
nointroute [IA-64]
noinvpcid [X86] Disable the INVPCID cpu feature.
nojitter [IA-64] Disables jitter checking for ITC timers.
no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver

View File

@ -277,13 +277,15 @@ int main(int argc, char *argv[])
" %d external time stamp channels\n"
" %d programmable periodic signals\n"
" %d pulse per second\n"
" %d programmable pins\n",
" %d programmable pins\n"
" %d cross timestamping\n",
caps.max_adj,
caps.n_alarm,
caps.n_ext_ts,
caps.n_per_out,
caps.pps,
caps.n_pins);
caps.n_pins,
caps.cross_timestamping);
}
}

View File

@ -40,3 +40,28 @@ cp ../microcode.bin kernel/x86/microcode/GenuineIntel.bin (or AuthenticAMD.bin)
find . | cpio -o -H newc >../ucode.cpio
cd ..
cat ucode.cpio /boot/initrd-3.5.0.img >/boot/initrd-3.5.0.ucode.img
Builtin microcode
=================
We can also load builtin microcode supplied through the regular firmware
builtin method CONFIG_FIRMWARE_IN_KERNEL. Here's an example:
CONFIG_FIRMWARE_IN_KERNEL=y
CONFIG_EXTRA_FIRMWARE="intel-ucode/06-3a-09 amd-ucode/microcode_amd_fam15h.bin"
CONFIG_EXTRA_FIRMWARE_DIR="/lib/firmware"
This basically means, you have the following tree structure locally:
/lib/firmware/
|-- amd-ucode
...
| |-- microcode_amd_fam15h.bin
...
|-- intel-ucode
...
| |-- 06-3a-09
...
so that the build system can find those files and integrate them into
the final kernel image. The early loader finds them and applies them.

View File

@ -60,6 +60,8 @@ Machine check
threshold to 1. Enabling this may make memory predictive failure
analysis less effective if the bios sets thresholds for memory
errors since we will not see details for all errors.
mce=recovery
Force-enable recoverable machine check code paths
nomce (for compatibility with i386): same as mce=off

View File

@ -2422,6 +2422,7 @@ F: arch/mips/bmips/*
F: arch/mips/include/asm/mach-bmips/*
F: arch/mips/kernel/*bmips*
F: arch/mips/boot/dts/brcm/bcm*.dts*
F: drivers/irqchip/irq-bcm63*
F: drivers/irqchip/irq-bcm7*
F: drivers/irqchip/irq-brcmstb*
F: include/linux/bcm963xx_nvram.h

View File

@ -168,7 +168,7 @@ smp_callin(void)
cpuid, current, current->active_mm));
preempt_disable();
cpu_startup_entry(CPUHP_ONLINE);
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
/* Wait until hwrpb->txrdy is clear for cpu. Return -1 on timeout. */

View File

@ -142,7 +142,7 @@ void start_kernel_secondary(void)
local_irq_enable();
preempt_disable();
cpu_startup_entry(CPUHP_ONLINE);
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
/*

View File

@ -409,7 +409,7 @@ asmlinkage void secondary_start_kernel(void)
/*
* OK, it's off to the idle thread for us
*/
cpu_startup_entry(CPUHP_ONLINE);
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
void __init smp_cpus_done(unsigned int max_cpus)

View File

@ -3,7 +3,6 @@ menuconfig ARCH_MVEBU
depends on ARCH_MULTI_V7 || ARCH_MULTI_V5
select ARCH_SUPPORTS_BIG_ENDIAN
select CLKSRC_MMIO
select GENERIC_IRQ_CHIP
select PINCTRL
select PLAT_ORION
select SOC_BUS
@ -29,6 +28,7 @@ config MACH_ARMADA_370
bool "Marvell Armada 370 boards"
depends on ARCH_MULTI_V7
select ARMADA_370_CLK
select ARMADA_370_XP_IRQ
select CPU_PJ4B
select MACH_MVEBU_V7
select PINCTRL_ARMADA_370
@ -39,6 +39,7 @@ config MACH_ARMADA_370
config MACH_ARMADA_375
bool "Marvell Armada 375 boards"
depends on ARCH_MULTI_V7
select ARMADA_370_XP_IRQ
select ARM_ERRATA_720789
select ARM_ERRATA_753970
select ARM_GIC
@ -58,6 +59,7 @@ config MACH_ARMADA_38X
select ARM_ERRATA_720789
select ARM_ERRATA_753970
select ARM_GIC
select ARMADA_370_XP_IRQ
select ARMADA_38X_CLK
select HAVE_ARM_SCU
select HAVE_ARM_TWD if SMP
@ -72,6 +74,7 @@ config MACH_ARMADA_39X
bool "Marvell Armada 39x boards"
depends on ARCH_MULTI_V7
select ARM_GIC
select ARMADA_370_XP_IRQ
select ARMADA_39X_CLK
select CACHE_L2X0
select HAVE_ARM_SCU
@ -86,6 +89,7 @@ config MACH_ARMADA_39X
config MACH_ARMADA_XP
bool "Marvell Armada XP boards"
depends on ARCH_MULTI_V7
select ARMADA_370_XP_IRQ
select ARMADA_XP_CLK
select CPU_PJ4B
select MACH_MVEBU_V7

View File

@ -195,7 +195,7 @@ asmlinkage void secondary_start_kernel(void)
/*
* OK, it's off to the idle thread for us
*/
cpu_startup_entry(CPUHP_ONLINE);
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
#ifdef CONFIG_HOTPLUG_CPU

View File

@ -333,7 +333,7 @@ void secondary_start_kernel(void)
/* We are done with local CPU inits, unblock the boot CPU. */
set_cpu_online(cpu, true);
cpu_startup_entry(CPUHP_ONLINE);
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
void __init smp_prepare_boot_cpu(void)

View File

@ -180,7 +180,7 @@ void start_secondary(void)
local_irq_enable();
cpu_startup_entry(CPUHP_ONLINE);
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}

View File

@ -454,7 +454,7 @@ start_secondary (void *unused)
preempt_disable();
smp_callin();
cpu_startup_entry(CPUHP_ONLINE);
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
return 0;
}

View File

@ -432,7 +432,7 @@ int __init start_secondary(void *unused)
*/
local_flush_tlb_all();
cpu_startup_entry(CPUHP_ONLINE);
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
return 0;
}

View File

@ -396,7 +396,7 @@ asmlinkage void secondary_start_kernel(void)
/*
* OK, it's off to the idle thread for us
*/
cpu_startup_entry(CPUHP_ONLINE);
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
void __init smp_cpus_done(unsigned int max_cpus)

View File

@ -151,6 +151,7 @@ config BMIPS_GENERIC
select CSRC_R4K
select SYNC_R4K
select COMMON_CLK
select BCM6345_L1_IRQ
select BCM7038_L1_IRQ
select BCM7120_L2_IRQ
select BRCMSTB_L2_IRQ
@ -2169,7 +2170,6 @@ config MIPS_MT_SMP
select CPU_MIPSR2_IRQ_VI
select CPU_MIPSR2_IRQ_EI
select SYNC_R4K
select MIPS_GIC_IPI if MIPS_GIC
select MIPS_MT
select SMP
select SMP_UP
@ -2267,7 +2267,6 @@ config MIPS_VPE_APSP_API_MT
config MIPS_CMP
bool "MIPS CMP framework support (DEPRECATED)"
depends on SYS_SUPPORTS_MIPS_CMP && !CPU_MIPSR6
select MIPS_GIC_IPI if MIPS_GIC
select SMP
select SYNC_R4K
select SYS_SUPPORTS_SMP
@ -2287,7 +2286,6 @@ config MIPS_CPS
select MIPS_CM
select MIPS_CPC
select MIPS_CPS_PM if HOTPLUG_CPU
select MIPS_GIC_IPI if MIPS_GIC
select SMP
select SYNC_R4K if (CEVT_R4K || CSRC_R4K)
select SYS_SUPPORTS_HOTPLUG_CPU
@ -2305,10 +2303,6 @@ config MIPS_CPS_PM
select MIPS_CPC
bool
config MIPS_GIC_IPI
depends on MIPS_GIC
bool
config MIPS_CM
bool

View File

@ -26,90 +26,6 @@
#include "common.h"
#include "machtypes.h"
static void __init ath79_misc_intc_domain_init(
struct device_node *node, int irq);
static void ath79_misc_irq_handler(struct irq_desc *desc)
{
struct irq_domain *domain = irq_desc_get_handler_data(desc);
void __iomem *base = domain->host_data;
u32 pending;
pending = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS) &
__raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
if (!pending) {
spurious_interrupt();
return;
}
while (pending) {
int bit = __ffs(pending);
generic_handle_irq(irq_linear_revmap(domain, bit));
pending &= ~BIT(bit);
}
}
static void ar71xx_misc_irq_unmask(struct irq_data *d)
{
void __iomem *base = irq_data_get_irq_chip_data(d);
unsigned int irq = d->hwirq;
u32 t;
t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
__raw_writel(t | (1 << irq), base + AR71XX_RESET_REG_MISC_INT_ENABLE);
/* flush write */
__raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
}
static void ar71xx_misc_irq_mask(struct irq_data *d)
{
void __iomem *base = irq_data_get_irq_chip_data(d);
unsigned int irq = d->hwirq;
u32 t;
t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
__raw_writel(t & ~(1 << irq), base + AR71XX_RESET_REG_MISC_INT_ENABLE);
/* flush write */
__raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
}
static void ar724x_misc_irq_ack(struct irq_data *d)
{
void __iomem *base = irq_data_get_irq_chip_data(d);
unsigned int irq = d->hwirq;
u32 t;
t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS);
__raw_writel(t & ~(1 << irq), base + AR71XX_RESET_REG_MISC_INT_STATUS);
/* flush write */
__raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS);
}
static struct irq_chip ath79_misc_irq_chip = {
.name = "MISC",
.irq_unmask = ar71xx_misc_irq_unmask,
.irq_mask = ar71xx_misc_irq_mask,
};
static void __init ath79_misc_irq_init(void)
{
if (soc_is_ar71xx() || soc_is_ar913x())
ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask;
else if (soc_is_ar724x() ||
soc_is_ar933x() ||
soc_is_ar934x() ||
soc_is_qca955x())
ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack;
else
BUG();
ath79_misc_intc_domain_init(NULL, ATH79_CPU_IRQ(6));
}
static void ar934x_ip2_irq_dispatch(struct irq_desc *desc)
{
@ -212,142 +128,12 @@ static void qca955x_irq_init(void)
irq_set_chained_handler(ATH79_CPU_IRQ(3), qca955x_ip3_irq_dispatch);
}
/*
* The IP2/IP3 lines are tied to a PCI/WMAC/USB device. Drivers for
* these devices typically allocate coherent DMA memory, however the
* DMA controller may still have some unsynchronized data in the FIFO.
* Issue a flush in the handlers to ensure that the driver sees
* the update.
*
* This array map the interrupt lines to the DDR write buffer channels.
*/
static unsigned irq_wb_chan[8] = {
-1, -1, -1, -1, -1, -1, -1, -1,
};
asmlinkage void plat_irq_dispatch(void)
{
unsigned long pending;
int irq;
pending = read_c0_status() & read_c0_cause() & ST0_IM;
if (!pending) {
spurious_interrupt();
return;
}
pending >>= CAUSEB_IP;
while (pending) {
irq = fls(pending) - 1;
if (irq < ARRAY_SIZE(irq_wb_chan) && irq_wb_chan[irq] != -1)
ath79_ddr_wb_flush(irq_wb_chan[irq]);
do_IRQ(MIPS_CPU_IRQ_BASE + irq);
pending &= ~BIT(irq);
}
}
static int misc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
{
irq_set_chip_and_handler(irq, &ath79_misc_irq_chip, handle_level_irq);
irq_set_chip_data(irq, d->host_data);
return 0;
}
static const struct irq_domain_ops misc_irq_domain_ops = {
.xlate = irq_domain_xlate_onecell,
.map = misc_map,
};
static void __init ath79_misc_intc_domain_init(
struct device_node *node, int irq)
{
void __iomem *base = ath79_reset_base;
struct irq_domain *domain;
domain = irq_domain_add_legacy(node, ATH79_MISC_IRQ_COUNT,
ATH79_MISC_IRQ_BASE, 0, &misc_irq_domain_ops, base);
if (!domain)
panic("Failed to add MISC irqdomain");
/* Disable and clear all interrupts */
__raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE);
__raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS);
irq_set_chained_handler_and_data(irq, ath79_misc_irq_handler, domain);
}
static int __init ath79_misc_intc_of_init(
struct device_node *node, struct device_node *parent)
{
int irq;
irq = irq_of_parse_and_map(node, 0);
if (!irq)
panic("Failed to get MISC IRQ");
ath79_misc_intc_domain_init(node, irq);
return 0;
}
static int __init ar7100_misc_intc_of_init(
struct device_node *node, struct device_node *parent)
{
ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask;
return ath79_misc_intc_of_init(node, parent);
}
IRQCHIP_DECLARE(ar7100_misc_intc, "qca,ar7100-misc-intc",
ar7100_misc_intc_of_init);
static int __init ar7240_misc_intc_of_init(
struct device_node *node, struct device_node *parent)
{
ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack;
return ath79_misc_intc_of_init(node, parent);
}
IRQCHIP_DECLARE(ar7240_misc_intc, "qca,ar7240-misc-intc",
ar7240_misc_intc_of_init);
static int __init ar79_cpu_intc_of_init(
struct device_node *node, struct device_node *parent)
{
int err, i, count;
/* Fill the irq_wb_chan table */
count = of_count_phandle_with_args(
node, "qca,ddr-wb-channels", "#qca,ddr-wb-channel-cells");
for (i = 0; i < count; i++) {
struct of_phandle_args args;
u32 irq = i;
of_property_read_u32_index(
node, "qca,ddr-wb-channel-interrupts", i, &irq);
if (irq >= ARRAY_SIZE(irq_wb_chan))
continue;
err = of_parse_phandle_with_args(
node, "qca,ddr-wb-channels",
"#qca,ddr-wb-channel-cells",
i, &args);
if (err)
return err;
irq_wb_chan[irq] = args.args[0];
pr_info("IRQ: Set flush channel of IRQ%d to %d\n",
irq, args.args[0]);
}
return mips_cpu_irq_of_init(node, parent);
}
IRQCHIP_DECLARE(ar79_cpu_intc, "qca,ar7100-cpu-intc",
ar79_cpu_intc_of_init);
void __init arch_init_irq(void)
{
unsigned irq_wb_chan2 = -1;
unsigned irq_wb_chan3 = -1;
bool misc_is_ar71xx;
if (mips_machtype == ATH79_MACH_GENERIC_OF) {
irqchip_init();
return;
@ -355,14 +141,26 @@ void __init arch_init_irq(void)
if (soc_is_ar71xx() || soc_is_ar724x() ||
soc_is_ar913x() || soc_is_ar933x()) {
irq_wb_chan[2] = 3;
irq_wb_chan[3] = 2;
irq_wb_chan2 = 3;
irq_wb_chan3 = 2;
} else if (soc_is_ar934x()) {
irq_wb_chan[3] = 2;
irq_wb_chan3 = 2;
}
mips_cpu_irq_init();
ath79_misc_irq_init();
ath79_cpu_irq_init(irq_wb_chan2, irq_wb_chan3);
if (soc_is_ar71xx() || soc_is_ar913x())
misc_is_ar71xx = true;
else if (soc_is_ar724x() ||
soc_is_ar933x() ||
soc_is_ar934x() ||
soc_is_qca955x())
misc_is_ar71xx = false;
else
BUG();
ath79_misc_irq_init(
ath79_reset_base + AR71XX_RESET_REG_MISC_INT_STATUS,
ATH79_CPU_IRQ(6), ATH79_MISC_IRQ_BASE, misc_is_ar71xx);
if (soc_is_ar934x())
ar934x_ip2_irq_init();

View File

@ -15,6 +15,12 @@
#include <asm/irq_cpu.h>
#include <asm/time.h>
static const struct of_device_id smp_intc_dt_match[] = {
{ .compatible = "brcm,bcm7038-l1-intc" },
{ .compatible = "brcm,bcm6345-l1-intc" },
{}
};
unsigned int get_c0_compare_int(void)
{
return CP0_LEGACY_COMPARE_IRQ;
@ -24,8 +30,8 @@ void __init arch_init_irq(void)
{
struct device_node *dn;
/* Only the STB (bcm7038) controller supports SMP IRQ affinity */
dn = of_find_compatible_node(NULL, NULL, "brcm,bcm7038-l1-intc");
/* Only these controllers support SMP IRQ affinity */
dn = of_find_matching_node(NULL, smp_intc_dt_match);
if (dn)
of_node_put(dn);
else

View File

@ -144,4 +144,8 @@ static inline u32 ath79_reset_rr(unsigned reg)
void ath79_device_reset_set(u32 mask);
void ath79_device_reset_clear(u32 mask);
void ath79_cpu_irq_init(unsigned irq_wb_chan2, unsigned irq_wb_chan3);
void ath79_misc_irq_init(void __iomem *regs, int irq,
int irq_base, bool is_ar71xx);
#endif /* __ASM_MACH_ATH79_H */

View File

@ -44,8 +44,9 @@ static inline void plat_smp_setup(void)
mp_ops->smp_setup();
}
extern void gic_send_ipi_single(int cpu, unsigned int action);
extern void gic_send_ipi_mask(const struct cpumask *mask, unsigned int action);
extern void mips_smp_send_ipi_single(int cpu, unsigned int action);
extern void mips_smp_send_ipi_mask(const struct cpumask *mask,
unsigned int action);
#else /* !CONFIG_SMP */

View File

@ -52,7 +52,6 @@ obj-$(CONFIG_MIPS_MT_SMP) += smp-mt.o
obj-$(CONFIG_MIPS_CMP) += smp-cmp.o
obj-$(CONFIG_MIPS_CPS) += smp-cps.o cps-vec.o
obj-$(CONFIG_MIPS_CPS_NS16550) += cps-vec-ns16550.o
obj-$(CONFIG_MIPS_GIC_IPI) += smp-gic.o
obj-$(CONFIG_MIPS_SPRAM) += spram.o
obj-$(CONFIG_MIPS_VPE_LOADER) += vpe.o

View File

@ -149,8 +149,8 @@ void __init cmp_prepare_cpus(unsigned int max_cpus)
}
struct plat_smp_ops cmp_smp_ops = {
.send_ipi_single = gic_send_ipi_single,
.send_ipi_mask = gic_send_ipi_mask,
.send_ipi_single = mips_smp_send_ipi_single,
.send_ipi_mask = mips_smp_send_ipi_mask,
.init_secondary = cmp_init_secondary,
.smp_finish = cmp_smp_finish,
.boot_secondary = cmp_boot_secondary,

View File

@ -472,8 +472,8 @@ static struct plat_smp_ops cps_smp_ops = {
.boot_secondary = cps_boot_secondary,
.init_secondary = cps_init_secondary,
.smp_finish = cps_smp_finish,
.send_ipi_single = gic_send_ipi_single,
.send_ipi_mask = gic_send_ipi_mask,
.send_ipi_single = mips_smp_send_ipi_single,
.send_ipi_mask = mips_smp_send_ipi_mask,
#ifdef CONFIG_HOTPLUG_CPU
.cpu_disable = cps_cpu_disable,
.cpu_die = cps_cpu_die,

View File

@ -121,7 +121,7 @@ static void vsmp_send_ipi_single(int cpu, unsigned int action)
#ifdef CONFIG_MIPS_GIC
if (gic_present) {
gic_send_ipi_single(cpu, action);
mips_smp_send_ipi_single(cpu, action);
return;
}
#endif

View File

@ -33,12 +33,16 @@
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/ftrace.h>
#include <linux/irqdomain.h>
#include <linux/of.h>
#include <linux/of_irq.h>
#include <linux/atomic.h>
#include <asm/cpu.h>
#include <asm/processor.h>
#include <asm/idle.h>
#include <asm/r4k-timer.h>
#include <asm/mips-cpc.h>
#include <asm/mmu_context.h>
#include <asm/time.h>
#include <asm/setup.h>
@ -79,6 +83,11 @@ static cpumask_t cpu_core_setup_map;
cpumask_t cpu_coherent_mask;
#ifdef CONFIG_GENERIC_IRQ_IPI
static struct irq_desc *call_desc;
static struct irq_desc *sched_desc;
#endif
static inline void set_cpu_sibling_map(int cpu)
{
int i;
@ -146,6 +155,133 @@ void register_smp_ops(struct plat_smp_ops *ops)
mp_ops = ops;
}
#ifdef CONFIG_GENERIC_IRQ_IPI
void mips_smp_send_ipi_single(int cpu, unsigned int action)
{
mips_smp_send_ipi_mask(cpumask_of(cpu), action);
}
void mips_smp_send_ipi_mask(const struct cpumask *mask, unsigned int action)
{
unsigned long flags;
unsigned int core;
int cpu;
local_irq_save(flags);
switch (action) {
case SMP_CALL_FUNCTION:
__ipi_send_mask(call_desc, mask);
break;
case SMP_RESCHEDULE_YOURSELF:
__ipi_send_mask(sched_desc, mask);
break;
default:
BUG();
}
if (mips_cpc_present()) {
for_each_cpu(cpu, mask) {
core = cpu_data[cpu].core;
if (core == current_cpu_data.core)
continue;
while (!cpumask_test_cpu(cpu, &cpu_coherent_mask)) {
mips_cpc_lock_other(core);
write_cpc_co_cmd(CPC_Cx_CMD_PWRUP);
mips_cpc_unlock_other();
}
}
}
local_irq_restore(flags);
}
static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
{
scheduler_ipi();
return IRQ_HANDLED;
}
static irqreturn_t ipi_call_interrupt(int irq, void *dev_id)
{
generic_smp_call_function_interrupt();
return IRQ_HANDLED;
}
static struct irqaction irq_resched = {
.handler = ipi_resched_interrupt,
.flags = IRQF_PERCPU,
.name = "IPI resched"
};
static struct irqaction irq_call = {
.handler = ipi_call_interrupt,
.flags = IRQF_PERCPU,
.name = "IPI call"
};
static __init void smp_ipi_init_one(unsigned int virq,
struct irqaction *action)
{
int ret;
irq_set_handler(virq, handle_percpu_irq);
ret = setup_irq(virq, action);
BUG_ON(ret);
}
static int __init mips_smp_ipi_init(void)
{
unsigned int call_virq, sched_virq;
struct irq_domain *ipidomain;
struct device_node *node;
node = of_irq_find_parent(of_root);
ipidomain = irq_find_matching_host(node, DOMAIN_BUS_IPI);
/*
* Some platforms have half DT setup. So if we found irq node but
* didn't find an ipidomain, try to search for one that is not in the
* DT.
*/
if (node && !ipidomain)
ipidomain = irq_find_matching_host(NULL, DOMAIN_BUS_IPI);
BUG_ON(!ipidomain);
call_virq = irq_reserve_ipi(ipidomain, cpu_possible_mask);
BUG_ON(!call_virq);
sched_virq = irq_reserve_ipi(ipidomain, cpu_possible_mask);
BUG_ON(!sched_virq);
if (irq_domain_is_ipi_per_cpu(ipidomain)) {
int cpu;
for_each_cpu(cpu, cpu_possible_mask) {
smp_ipi_init_one(call_virq + cpu, &irq_call);
smp_ipi_init_one(sched_virq + cpu, &irq_resched);
}
} else {
smp_ipi_init_one(call_virq, &irq_call);
smp_ipi_init_one(sched_virq, &irq_resched);
}
call_desc = irq_to_desc(call_virq);
sched_desc = irq_to_desc(sched_virq);
return 0;
}
early_initcall(mips_smp_ipi_init);
#endif
/*
* First C code run on the secondary CPUs after being started up by
* the master.
@ -192,7 +328,7 @@ asmlinkage void start_secondary(void)
WARN_ON_ONCE(!irqs_disabled());
mp_ops->smp_finish();
cpu_startup_entry(CPUHP_ONLINE);
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
static void stop_this_cpu(void *dummy)

View File

@ -675,7 +675,7 @@ int __init start_secondary(void *unused)
#ifdef CONFIG_GENERIC_CLOCKEVENTS
init_clockevents();
#endif
cpu_startup_entry(CPUHP_ONLINE);
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
return 0;
}

View File

@ -305,7 +305,7 @@ void __init smp_callin(void)
local_irq_enable(); /* Interrupts have been off until now */
cpu_startup_entry(CPUHP_ONLINE);
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
/* NOTREACHED */
panic("smp_callin() AAAAaaaaahhhh....\n");

View File

@ -727,7 +727,7 @@ void start_secondary(void *unused)
local_irq_enable();
cpu_startup_entry(CPUHP_ONLINE);
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
BUG();
}

View File

@ -798,7 +798,7 @@ static void smp_start_secondary(void *cpuvoid)
set_cpu_online(smp_processor_id(), true);
inc_irq_stat(CPU_RST);
local_irq_enable();
cpu_startup_entry(CPUHP_ONLINE);
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
/* Upping and downing of CPUs */

View File

@ -203,7 +203,7 @@ asmlinkage void start_secondary(void)
set_cpu_online(cpu, true);
per_cpu(cpu_state, cpu) = CPU_ONLINE;
cpu_startup_entry(CPUHP_ONLINE);
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
extern struct {

View File

@ -364,7 +364,7 @@ static void sparc_start_secondary(void *arg)
local_irq_enable();
wmb();
cpu_startup_entry(CPUHP_ONLINE);
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
/* We should never reach here! */
BUG();

View File

@ -134,7 +134,7 @@ void smp_callin(void)
local_irq_enable();
cpu_startup_entry(CPUHP_ONLINE);
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
void cpu_panic(void)

View File

@ -208,7 +208,7 @@ void online_secondary(void)
/* Set up tile-timer clock-event device on this cpu */
setup_tile_timer();
cpu_startup_entry(CPUHP_ONLINE);
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
int __cpu_up(unsigned int cpu, struct task_struct *tidle)

View File

@ -1163,22 +1163,23 @@ config MICROCODE
bool "CPU microcode loading support"
default y
depends on CPU_SUP_AMD || CPU_SUP_INTEL
depends on BLK_DEV_INITRD
select FW_LOADER
---help---
If you say Y here, you will be able to update the microcode on
certain Intel and AMD processors. The Intel support is for the
IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4,
Xeon etc. The AMD support is for families 0x10 and later. You will
obviously need the actual microcode binary data itself which is not
shipped with the Linux kernel.
Intel and AMD processors. The Intel support is for the IA32 family,
e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, Xeon etc. The
AMD support is for families 0x10 and later. You will obviously need
the actual microcode binary data itself which is not shipped with
the Linux kernel.
This option selects the general module only, you need to select
at least one vendor specific module as well.
The preferred method to load microcode from a detached initrd is described
in Documentation/x86/early-microcode.txt. For that you need to enable
CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the
initrd for microcode blobs.
To compile this driver as a module, choose M here: the module
will be called microcode.
In addition, you can build-in the microcode into the kernel. For that you
need to enable FIRMWARE_IN_KERNEL and add the vendor-supplied microcode
to the CONFIG_EXTRA_FIRMWARE config option.
config MICROCODE_INTEL
bool "Intel microcode loading support"

View File

@ -338,16 +338,6 @@ config DEBUG_IMR_SELFTEST
If unsure say N here.
config X86_DEBUG_STATIC_CPU_HAS
bool "Debug alternatives"
depends on DEBUG_KERNEL
---help---
This option causes additional code to be generated which
fails if static_cpu_has() is used before alternatives have
run.
If unsure, say N.
config X86_DEBUG_FPU
bool "Debug the x86 FPU code"
depends on DEBUG_KERNEL

View File

@ -1,7 +1,7 @@
#ifndef BOOT_CPUFLAGS_H
#define BOOT_CPUFLAGS_H
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/processor-flags.h>
struct cpu_features {

View File

@ -17,7 +17,7 @@
#include "../include/asm/required-features.h"
#include "../include/asm/disabled-features.h"
#include "../include/asm/cpufeature.h"
#include "../include/asm/cpufeatures.h"
#include "../kernel/cpu/capflags.c"
int main(void)

View File

@ -49,7 +49,6 @@ typedef unsigned int u32;
/* This must be large enough to hold the entire setup */
u8 buf[SETUP_SECT_MAX*512];
int is_big_kernel;
#define PECOFF_RELOC_RESERVE 0x20

View File

@ -288,7 +288,7 @@ CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
CONFIG_PRINTK_TIME=y
# CONFIG_ENABLE_WARN_DEPRECATED is not set
CONFIG_FRAME_WARN=2048
CONFIG_FRAME_WARN=1024
CONFIG_MAGIC_SYSRQ=y
# CONFIG_UNUSED_SYMBOLS is not set
CONFIG_DEBUG_KERNEL=y

View File

@ -33,7 +33,7 @@
#include <linux/crc32.h>
#include <crypto/internal/hash.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/cpu_device_id.h>
#include <asm/fpu/api.h>

View File

@ -30,7 +30,7 @@
#include <linux/kernel.h>
#include <crypto/internal/hash.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/cpu_device_id.h>
#include <asm/fpu/internal.h>

View File

@ -30,7 +30,7 @@
#include <linux/string.h>
#include <linux/kernel.h>
#include <asm/fpu/api.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/cpu_device_id.h>
asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,

View File

@ -201,37 +201,6 @@ For 32-bit we have the following conventions - kernel is built with
.byte 0xf1
.endm
#else /* CONFIG_X86_64 */
/*
* For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These
* are different from the entry_32.S versions in not changing the segment
* registers. So only suitable for in kernel use, not when transitioning
* from or to user space. The resulting stack frame is not a standard
* pt_regs frame. The main use case is calling C code from assembler
* when all the registers need to be preserved.
*/
.macro SAVE_ALL
pushl %eax
pushl %ebp
pushl %edi
pushl %esi
pushl %edx
pushl %ecx
pushl %ebx
.endm
.macro RESTORE_ALL
popl %ebx
popl %ecx
popl %edx
popl %esi
popl %edi
popl %ebp
popl %eax
.endm
#endif /* CONFIG_X86_64 */
/*

View File

@ -26,6 +26,7 @@
#include <asm/traps.h>
#include <asm/vdso.h>
#include <asm/uaccess.h>
#include <asm/cpufeature.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
@ -44,6 +45,8 @@ __visible void enter_from_user_mode(void)
CT_WARN_ON(ct_state() != CONTEXT_USER);
user_exit();
}
#else
static inline void enter_from_user_mode(void) {}
#endif
static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
@ -84,17 +87,6 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
#ifdef CONFIG_CONTEXT_TRACKING
/*
* If TIF_NOHZ is set, we are required to call user_exit() before
* doing anything that could touch RCU.
*/
if (work & _TIF_NOHZ) {
enter_from_user_mode();
work &= ~_TIF_NOHZ;
}
#endif
#ifdef CONFIG_SECCOMP
/*
* Do seccomp first -- it should minimize exposure of other
@ -171,16 +163,6 @@ long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
BUG_ON(regs != task_pt_regs(current));
/*
* If we stepped into a sysenter/syscall insn, it trapped in
* kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
* If user-mode had set TF itself, then it's still clear from
* do_debug() and we need to set it again to restore the user
* state. If we entered on the slow path, TF was already set.
*/
if (work & _TIF_SINGLESTEP)
regs->flags |= X86_EFLAGS_TF;
#ifdef CONFIG_SECCOMP
/*
* Call seccomp_phase2 before running the other hooks so that
@ -268,6 +250,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
/* Called with IRQs disabled. */
__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
{
struct thread_info *ti = pt_regs_to_thread_info(regs);
u32 cached_flags;
if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
@ -275,12 +258,22 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
lockdep_sys_exit();
cached_flags =
READ_ONCE(pt_regs_to_thread_info(regs)->flags);
cached_flags = READ_ONCE(ti->flags);
if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
exit_to_usermode_loop(regs, cached_flags);
#ifdef CONFIG_COMPAT
/*
* Compat syscalls set TS_COMPAT. Make sure we clear it before
* returning to user mode. We need to clear it *after* signal
* handling, because syscall restart has a fixup for compat
* syscalls. The fixup is exercised by the ptrace_syscall_32
* selftest.
*/
ti->status &= ~TS_COMPAT;
#endif
user_enter();
}
@ -332,33 +325,45 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS))
syscall_slow_exit_work(regs, cached_flags);
#ifdef CONFIG_COMPAT
/*
* Compat syscalls set TS_COMPAT. Make sure we clear it before
* returning to user mode.
*/
ti->status &= ~TS_COMPAT;
#endif
local_irq_disable();
prepare_exit_to_usermode(regs);
}
#ifdef CONFIG_X86_64
__visible void do_syscall_64(struct pt_regs *regs)
{
struct thread_info *ti = pt_regs_to_thread_info(regs);
unsigned long nr = regs->orig_ax;
enter_from_user_mode();
local_irq_enable();
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
nr = syscall_trace_enter(regs);
/*
* NB: Native and x32 syscalls are dispatched from the same
* table. The only functional difference is the x32 bit in
* regs->orig_ax, which changes the behavior of some syscalls.
*/
if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) {
regs->ax = sys_call_table[nr & __SYSCALL_MASK](
regs->di, regs->si, regs->dx,
regs->r10, regs->r8, regs->r9);
}
syscall_return_slowpath(regs);
}
#endif
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
/*
* Does a 32-bit syscall. Called with IRQs on and does all entry and
* exit work and returns with IRQs off. This function is extremely hot
* in workloads that use it, and it's usually called from
* Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does
* all entry and exit work and returns with IRQs off. This function is
* extremely hot in workloads that use it, and it's usually called from
* do_fast_syscall_32, so forcibly inline it to improve performance.
*/
#ifdef CONFIG_X86_32
/* 32-bit kernels use a trap gate for INT80, and the asm code calls here. */
__visible
#else
/* 64-bit kernels use do_syscall_32_irqs_off() instead. */
static
#endif
__always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
{
struct thread_info *ti = pt_regs_to_thread_info(regs);
unsigned int nr = (unsigned int)regs->orig_ax;
@ -393,14 +398,13 @@ __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
syscall_return_slowpath(regs);
}
#ifdef CONFIG_X86_64
/* Handles INT80 on 64-bit kernels */
__visible void do_syscall_32_irqs_off(struct pt_regs *regs)
/* Handles int $0x80 */
__visible void do_int80_syscall_32(struct pt_regs *regs)
{
enter_from_user_mode();
local_irq_enable();
do_syscall_32_irqs_on(regs);
}
#endif
/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
__visible long do_fast_syscall_32(struct pt_regs *regs)
@ -420,12 +424,11 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
*/
regs->ip = landing_pad;
/*
* Fetch EBP from where the vDSO stashed it.
*
* WARNING: We are in CONTEXT_USER and RCU isn't paying attention!
*/
enter_from_user_mode();
local_irq_enable();
/* Fetch EBP from where the vDSO stashed it. */
if (
#ifdef CONFIG_X86_64
/*
@ -443,9 +446,6 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
/* User code screwed up. */
local_irq_disable();
regs->ax = -EFAULT;
#ifdef CONFIG_CONTEXT_TRACKING
enter_from_user_mode();
#endif
prepare_exit_to_usermode(regs);
return 0; /* Keep it simple: use IRET. */
}

View File

@ -40,7 +40,7 @@
#include <asm/processor-flags.h>
#include <asm/ftrace.h>
#include <asm/irq_vectors.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
#include <asm/asm.h>
#include <asm/smap.h>
@ -287,20 +287,93 @@ need_resched:
END(resume_kernel)
#endif
# SYSENTER call handler stub
GLOBAL(__begin_SYSENTER_singlestep_region)
/*
* All code from here through __end_SYSENTER_singlestep_region is subject
* to being single-stepped if a user program sets TF and executes SYSENTER.
* There is absolutely nothing that we can do to prevent this from happening
* (thanks Intel!). To keep our handling of this situation as simple as
* possible, we handle TF just like AC and NT, except that our #DB handler
* will ignore all of the single-step traps generated in this range.
*/
#ifdef CONFIG_XEN
/*
* Xen doesn't set %esp to be precisely what the normal SYSENTER
* entry point expects, so fix it up before using the normal path.
*/
ENTRY(xen_sysenter_target)
addl $5*4, %esp /* remove xen-provided frame */
jmp sysenter_past_esp
#endif
/*
* 32-bit SYSENTER entry.
*
* 32-bit system calls through the vDSO's __kernel_vsyscall enter here
* if X86_FEATURE_SEP is available. This is the preferred system call
* entry on 32-bit systems.
*
* The SYSENTER instruction, in principle, should *only* occur in the
* vDSO. In practice, a small number of Android devices were shipped
* with a copy of Bionic that inlined a SYSENTER instruction. This
* never happened in any of Google's Bionic versions -- it only happened
* in a narrow range of Intel-provided versions.
*
* SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs.
* IF and VM in RFLAGS are cleared (IOW: interrupts are off).
* SYSENTER does not save anything on the stack,
* and does not save old EIP (!!!), ESP, or EFLAGS.
*
* To avoid losing track of EFLAGS.VM (and thus potentially corrupting
* user and/or vm86 state), we explicitly disable the SYSENTER
* instruction in vm86 mode by reprogramming the MSRs.
*
* Arguments:
* eax system call number
* ebx arg1
* ecx arg2
* edx arg3
* esi arg4
* edi arg5
* ebp user stack
* 0(%ebp) arg6
*/
ENTRY(entry_SYSENTER_32)
movl TSS_sysenter_sp0(%esp), %esp
sysenter_past_esp:
pushl $__USER_DS /* pt_regs->ss */
pushl %ebp /* pt_regs->sp (stashed in bp) */
pushfl /* pt_regs->flags (except IF = 0) */
ASM_CLAC /* Clear AC after saving FLAGS */
orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
pushl $__USER_CS /* pt_regs->cs */
pushl $0 /* pt_regs->ip = 0 (placeholder) */
pushl %eax /* pt_regs->orig_ax */
SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
/*
* SYSENTER doesn't filter flags, so we need to clear NT, AC
* and TF ourselves. To save a few cycles, we can check whether
* either was set instead of doing an unconditional popfq.
* This needs to happen before enabling interrupts so that
* we don't get preempted with NT set.
*
* If TF is set, we will single-step all the way to here -- do_debug
* will ignore all the traps. (Yes, this is slow, but so is
* single-stepping in general. This allows us to avoid having
* a more complicated code to handle the case where a user program
* forces us to single-step through the SYSENTER entry code.)
*
* NB.: .Lsysenter_fix_flags is a label with the code under it moved
* out-of-line as an optimization: NT is unlikely to be set in the
* majority of the cases and instead of polluting the I$ unnecessarily,
* we're keeping that code behind a branch which will predict as
* not-taken and therefore its instructions won't be fetched.
*/
testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp)
jnz .Lsysenter_fix_flags
.Lsysenter_flags_fixed:
/*
* User mode is traced as though IRQs are on, and SYSENTER
* turned them off.
@ -326,6 +399,15 @@ sysenter_past_esp:
popl %ebp /* pt_regs->bp */
popl %eax /* pt_regs->ax */
/*
* Restore all flags except IF. (We restore IF separately because
* STI gives a one-instruction window in which we won't be interrupted,
* whereas POPF does not.)
*/
addl $PT_EFLAGS-PT_DS, %esp /* point esp at pt_regs->flags */
btr $X86_EFLAGS_IF_BIT, (%esp)
popfl
/*
* Return back to the vDSO, which will pop ecx and edx.
* Don't bother with DS and ES (they already contain __USER_DS).
@ -339,28 +421,63 @@ sysenter_past_esp:
.popsection
_ASM_EXTABLE(1b, 2b)
PTGS_TO_GS_EX
.Lsysenter_fix_flags:
pushl $X86_EFLAGS_FIXED
popfl
jmp .Lsysenter_flags_fixed
GLOBAL(__end_SYSENTER_singlestep_region)
ENDPROC(entry_SYSENTER_32)
# system call handler stub
/*
* 32-bit legacy system call entry.
*
* 32-bit x86 Linux system calls traditionally used the INT $0x80
* instruction. INT $0x80 lands here.
*
* This entry point can be used by any 32-bit perform system calls.
* Instances of INT $0x80 can be found inline in various programs and
* libraries. It is also used by the vDSO's __kernel_vsyscall
* fallback for hardware that doesn't support a faster entry method.
* Restarted 32-bit system calls also fall back to INT $0x80
* regardless of what instruction was originally used to do the system
* call. (64-bit programs can use INT $0x80 as well, but they can
* only run on 64-bit kernels and therefore land in
* entry_INT80_compat.)
*
* This is considered a slow path. It is not used by most libc
* implementations on modern hardware except during process startup.
*
* Arguments:
* eax system call number
* ebx arg1
* ecx arg2
* edx arg3
* esi arg4
* edi arg5
* ebp arg6
*/
ENTRY(entry_INT80_32)
ASM_CLAC
pushl %eax /* pt_regs->orig_ax */
SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
/*
* User mode is traced as though IRQs are on. Unlike the 64-bit
* case, INT80 is a trap gate on 32-bit kernels, so interrupts
* are already on (unless user code is messing around with iopl).
* User mode is traced as though IRQs are on, and the interrupt gate
* turned them off.
*/
TRACE_IRQS_OFF
movl %esp, %eax
call do_syscall_32_irqs_on
call do_int80_syscall_32
.Lsyscall_32_done:
restore_all:
TRACE_IRQS_IRET
restore_all_notrace:
#ifdef CONFIG_X86_ESPFIX32
ALTERNATIVE "jmp restore_nocheck", "", X86_BUG_ESPFIX
movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
/*
* Warning: PT_OLDSS(%esp) contains the wrong/random values if we
@ -387,19 +504,6 @@ ENTRY(iret_exc )
#ifdef CONFIG_X86_ESPFIX32
ldt_ss:
#ifdef CONFIG_PARAVIRT
/*
* The kernel can't run on a non-flat stack if paravirt mode
* is active. Rather than try to fixup the high bits of
* ESP, bypass this code entirely. This may break DOSemu
* and/or Wine support in a paravirt VM, although the option
* is still available to implement the setting of the high
* 16-bits in the INTERRUPT_RETURN paravirt-op.
*/
cmpl $0, pv_info+PARAVIRT_enabled
jne restore_nocheck
#endif
/*
* Setup and switch to ESPFIX stack
*
@ -632,14 +736,6 @@ ENTRY(spurious_interrupt_bug)
END(spurious_interrupt_bug)
#ifdef CONFIG_XEN
/*
* Xen doesn't set %esp to be precisely what the normal SYSENTER
* entry point expects, so fix it up before using the normal path.
*/
ENTRY(xen_sysenter_target)
addl $5*4, %esp /* remove xen-provided frame */
jmp sysenter_past_esp
ENTRY(xen_hypervisor_callback)
pushl $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
@ -939,51 +1035,48 @@ error_code:
jmp ret_from_exception
END(page_fault)
/*
* Debug traps and NMI can happen at the one SYSENTER instruction
* that sets up the real kernel stack. Check here, since we can't
* allow the wrong stack to be used.
*
* "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
* already pushed 3 words if it hits on the sysenter instruction:
* eflags, cs and eip.
*
* We just load the right stack, and push the three (known) values
* by hand onto the new stack - while updating the return eip past
* the instruction that would have done it for sysenter.
*/
.macro FIX_STACK offset ok label
cmpw $__KERNEL_CS, 4(%esp)
jne \ok
\label:
movl TSS_sysenter_sp0 + \offset(%esp), %esp
pushfl
pushl $__KERNEL_CS
pushl $sysenter_past_esp
.endm
ENTRY(debug)
/*
* #DB can happen at the first instruction of
* entry_SYSENTER_32 or in Xen's SYSENTER prologue. If this
* happens, then we will be running on a very small stack. We
* need to detect this condition and switch to the thread
* stack before calling any C code at all.
*
* If you edit this code, keep in mind that NMIs can happen in here.
*/
ASM_CLAC
cmpl $entry_SYSENTER_32, (%esp)
jne debug_stack_correct
FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
debug_stack_correct:
pushl $-1 # mark this as an int
SAVE_ALL
TRACE_IRQS_OFF
xorl %edx, %edx # error code 0
movl %esp, %eax # pt_regs pointer
/* Are we currently on the SYSENTER stack? */
PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
cmpl $SIZEOF_SYSENTER_stack, %ecx
jb .Ldebug_from_sysenter_stack
TRACE_IRQS_OFF
call do_debug
jmp ret_from_exception
.Ldebug_from_sysenter_stack:
/* We're on the SYSENTER stack. Switch off. */
movl %esp, %ebp
movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
TRACE_IRQS_OFF
call do_debug
movl %ebp, %esp
jmp ret_from_exception
END(debug)
/*
* NMI is doubly nasty. It can happen _while_ we're handling
* a debug fault, and the debug fault hasn't yet been able to
* clear up the stack. So we first check whether we got an
* NMI on the sysenter entry path, but after that we need to
* check whether we got an NMI on the debug path where the debug
* fault happened on the sysenter path.
* NMI is doubly nasty. It can happen on the first instruction of
* entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning
* of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32
* switched stacks. We handle both conditions by simply checking whether we
* interrupted kernel code running on the SYSENTER stack.
*/
ENTRY(nmi)
ASM_CLAC
@ -994,41 +1087,32 @@ ENTRY(nmi)
popl %eax
je nmi_espfix_stack
#endif
cmpl $entry_SYSENTER_32, (%esp)
je nmi_stack_fixup
pushl %eax
movl %esp, %eax
/*
* Do not access memory above the end of our stack page,
* it might not exist.
*/
andl $(THREAD_SIZE-1), %eax
cmpl $(THREAD_SIZE-20), %eax
popl %eax
jae nmi_stack_correct
cmpl $entry_SYSENTER_32, 12(%esp)
je nmi_debug_stack_check
nmi_stack_correct:
pushl %eax
pushl %eax # pt_regs->orig_ax
SAVE_ALL
xorl %edx, %edx # zero error code
movl %esp, %eax # pt_regs pointer
/* Are we currently on the SYSENTER stack? */
PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
cmpl $SIZEOF_SYSENTER_stack, %ecx
jb .Lnmi_from_sysenter_stack
/* Not on SYSENTER stack. */
call do_nmi
jmp restore_all_notrace
nmi_stack_fixup:
FIX_STACK 12, nmi_stack_correct, 1
jmp nmi_stack_correct
nmi_debug_stack_check:
cmpw $__KERNEL_CS, 16(%esp)
jne nmi_stack_correct
cmpl $debug, (%esp)
jb nmi_stack_correct
cmpl $debug_esp_fix_insn, (%esp)
ja nmi_stack_correct
FIX_STACK 24, nmi_stack_correct, 1
jmp nmi_stack_correct
.Lnmi_from_sysenter_stack:
/*
* We're on the SYSENTER stack. Switch off. No one (not even debug)
* is using the thread stack right now, so it's safe for us to use it.
*/
movl %esp, %ebp
movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
call do_nmi
movl %ebp, %esp
jmp restore_all_notrace
#ifdef CONFIG_X86_ESPFIX32
nmi_espfix_stack:

View File

@ -103,6 +103,16 @@ ENDPROC(native_usergs_sysret64)
/*
* 64-bit SYSCALL instruction entry. Up to 6 arguments in registers.
*
* This is the only entry point used for 64-bit system calls. The
* hardware interface is reasonably well designed and the register to
* argument mapping Linux uses fits well with the registers that are
* available when SYSCALL is used.
*
* SYSCALL instructions can be found inlined in libc implementations as
* well as some other programs and libraries. There are also a handful
* of SYSCALL instructions in the vDSO used, for example, as a
* clock_gettimeofday fallback.
*
* 64-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
* then loads new ss, cs, and rip from previously programmed MSRs.
* rflags gets masked by a value from another MSR (so CLD and CLAC
@ -145,17 +155,11 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
movq %rsp, PER_CPU_VAR(rsp_scratch)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
TRACE_IRQS_OFF
/* Construct struct pt_regs on stack */
pushq $__USER_DS /* pt_regs->ss */
pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */
/*
* Re-enable interrupts.
* We use 'rsp_scratch' as a scratch space, hence irq-off block above
* must execute atomically in the face of possible interrupt-driven
* task preemption. We must enable interrupts only after we're done
* with using rsp_scratch:
*/
ENABLE_INTERRUPTS(CLBR_NONE)
pushq %r11 /* pt_regs->flags */
pushq $__USER_CS /* pt_regs->cs */
pushq %rcx /* pt_regs->ip */
@ -171,9 +175,21 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
pushq %r11 /* pt_regs->r11 */
sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz tracesys
/*
* If we need to do entry work or if we guess we'll need to do
* exit work, go straight to the slow path.
*/
testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz entry_SYSCALL64_slow_path
entry_SYSCALL_64_fastpath:
/*
* Easy case: enable interrupts and issue the syscall. If the syscall
* needs pt_regs, we'll call a stub that disables interrupts again
* and jumps to the slow path.
*/
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
#if __SYSCALL_MASK == ~0
cmpq $__NR_syscall_max, %rax
#else
@ -182,103 +198,56 @@ entry_SYSCALL_64_fastpath:
#endif
ja 1f /* return -ENOSYS (already in pt_regs->ax) */
movq %r10, %rcx
/*
* This call instruction is handled specially in stub_ptregs_64.
* It might end up jumping to the slow path. If it jumps, RAX
* and all argument registers are clobbered.
*/
call *sys_call_table(, %rax, 8)
.Lentry_SYSCALL_64_after_fastpath_call:
movq %rax, RAX(%rsp)
1:
/*
* Syscall return path ending with SYSRET (fast path).
* Has incompletely filled pt_regs.
*/
LOCKDEP_SYS_EXIT
/*
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
* it is too small to ever cause noticeable irq latency.
* If we get here, then we know that pt_regs is clean for SYSRET64.
* If we see that no exit work is required (which we are required
* to check with IRQs off), then we can go straight to SYSRET64.
*/
DISABLE_INTERRUPTS(CLBR_NONE)
/*
* We must check ti flags with interrupts (or at least preemption)
* off because we must *never* return to userspace without
* processing exit work that is enqueued if we're preempted here.
* In particular, returning to userspace with any of the one-shot
* flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is
* very bad.
*/
TRACE_IRQS_OFF
testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */
jnz 1f
RESTORE_C_REGS_EXCEPT_RCX_R11
LOCKDEP_SYS_EXIT
TRACE_IRQS_ON /* user mode is traced as IRQs on */
movq RIP(%rsp), %rcx
movq EFLAGS(%rsp), %r11
RESTORE_C_REGS_EXCEPT_RCX_R11
movq RSP(%rsp), %rsp
/*
* 64-bit SYSRET restores rip from rcx,
* rflags from r11 (but RF and VM bits are forced to 0),
* cs and ss are loaded from MSRs.
* Restoration of rflags re-enables interrupts.
*
* NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
* descriptor is not reinitialized. This means that we should
* avoid SYSRET with SS == NULL, which could happen if we schedule,
* exit the kernel, and re-enter using an interrupt vector. (All
* interrupt entries on x86_64 set SS to NULL.) We prevent that
* from happening by reloading SS in __switch_to. (Actually
* detecting the failure in 64-bit userspace is tricky but can be
* done.)
*/
USERGS_SYSRET64
GLOBAL(int_ret_from_sys_call_irqs_off)
1:
/*
* The fast path looked good when we started, but something changed
* along the way and we need to switch to the slow path. Calling
* raise(3) will trigger this, for example. IRQs are off.
*/
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
jmp int_ret_from_sys_call
/* Do syscall entry tracing */
tracesys:
movq %rsp, %rdi
movl $AUDIT_ARCH_X86_64, %esi
call syscall_trace_enter_phase1
test %rax, %rax
jnz tracesys_phase2 /* if needed, run the slow path */
RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */
movq ORIG_RAX(%rsp), %rax
jmp entry_SYSCALL_64_fastpath /* and return to the fast path */
tracesys_phase2:
SAVE_EXTRA_REGS
movq %rsp, %rdi
movl $AUDIT_ARCH_X86_64, %esi
movq %rax, %rdx
call syscall_trace_enter_phase2
/*
* Reload registers from stack in case ptrace changed them.
* We don't reload %rax because syscall_trace_entry_phase2() returned
* the value it wants us to use in the table lookup.
*/
RESTORE_C_REGS_EXCEPT_RAX
RESTORE_EXTRA_REGS
#if __SYSCALL_MASK == ~0
cmpq $__NR_syscall_max, %rax
#else
andl $__SYSCALL_MASK, %eax
cmpl $__NR_syscall_max, %eax
#endif
ja 1f /* return -ENOSYS (already in pt_regs->ax) */
movq %r10, %rcx /* fixup for C */
call *sys_call_table(, %rax, 8)
movq %rax, RAX(%rsp)
1:
/* Use IRET because user could have changed pt_regs->foo */
/*
* Syscall return path ending with IRET.
* Has correct iret frame.
*/
GLOBAL(int_ret_from_sys_call)
SAVE_EXTRA_REGS
movq %rsp, %rdi
call syscall_return_slowpath /* returns with IRQs disabled */
jmp return_from_SYSCALL_64
entry_SYSCALL64_slow_path:
/* IRQs are off. */
SAVE_EXTRA_REGS
movq %rsp, %rdi
call do_syscall_64 /* returns with IRQs disabled */
return_from_SYSCALL_64:
RESTORE_EXTRA_REGS
TRACE_IRQS_IRETQ /* we're about to change IF */
@ -355,83 +324,45 @@ opportunistic_sysret_failed:
jmp restore_c_regs_and_iret
END(entry_SYSCALL_64)
.macro FORK_LIKE func
ENTRY(stub_\func)
SAVE_EXTRA_REGS 8
jmp sys_\func
END(stub_\func)
.endm
FORK_LIKE clone
FORK_LIKE fork
FORK_LIKE vfork
ENTRY(stub_execve)
call sys_execve
return_from_execve:
testl %eax, %eax
jz 1f
/* exec failed, can use fast SYSRET code path in this case */
ret
1:
/* must use IRET code path (pt_regs->cs may have changed) */
addq $8, %rsp
ZERO_EXTRA_REGS
movq %rax, RAX(%rsp)
jmp int_ret_from_sys_call
END(stub_execve)
/*
* Remaining execve stubs are only 7 bytes long.
* ENTRY() often aligns to 16 bytes, which in this case has no benefits.
*/
.align 8
GLOBAL(stub_execveat)
call sys_execveat
jmp return_from_execve
END(stub_execveat)
#if defined(CONFIG_X86_X32_ABI)
.align 8
GLOBAL(stub_x32_execve)
call compat_sys_execve
jmp return_from_execve
END(stub_x32_execve)
.align 8
GLOBAL(stub_x32_execveat)
call compat_sys_execveat
jmp return_from_execve
END(stub_x32_execveat)
#endif
/*
* sigreturn is special because it needs to restore all registers on return.
* This cannot be done with SYSRET, so use the IRET return path instead.
*/
ENTRY(stub_rt_sigreturn)
ENTRY(stub_ptregs_64)
/*
* SAVE_EXTRA_REGS result is not normally needed:
* sigreturn overwrites all pt_regs->GPREGS.
* But sigreturn can fail (!), and there is no easy way to detect that.
* To make sure RESTORE_EXTRA_REGS doesn't restore garbage on error,
* we SAVE_EXTRA_REGS here.
* Syscalls marked as needing ptregs land here.
* If we are on the fast path, we need to save the extra regs,
* which we achieve by trying again on the slow path. If we are on
* the slow path, the extra regs are already saved.
*
* RAX stores a pointer to the C function implementing the syscall.
* IRQs are on.
*/
SAVE_EXTRA_REGS 8
call sys_rt_sigreturn
return_from_stub:
addq $8, %rsp
RESTORE_EXTRA_REGS
movq %rax, RAX(%rsp)
jmp int_ret_from_sys_call
END(stub_rt_sigreturn)
cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
jne 1f
#ifdef CONFIG_X86_X32_ABI
ENTRY(stub_x32_rt_sigreturn)
SAVE_EXTRA_REGS 8
call sys32_x32_rt_sigreturn
jmp return_from_stub
END(stub_x32_rt_sigreturn)
#endif
/*
* Called from fast path -- disable IRQs again, pop return address
* and jump to slow path
*/
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
popq %rax
jmp entry_SYSCALL64_slow_path
1:
/* Called from C */
jmp *%rax /* called from C */
END(stub_ptregs_64)
.macro ptregs_stub func
ENTRY(ptregs_\func)
leaq \func(%rip), %rax
jmp stub_ptregs_64
END(ptregs_\func)
.endm
/* Instantiate ptregs_stub for each ptregs-using syscall */
#define __SYSCALL_64_QUAL_(sym)
#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
#include <asm/syscalls_64.h>
/*
* A newly forked process directly context switches into this address.
@ -439,7 +370,6 @@ END(stub_x32_rt_sigreturn)
* rdi: prev task we switched from
*/
ENTRY(ret_from_fork)
LOCK ; btr $TIF_FORK, TI_flags(%r8)
pushq $0x0002
@ -447,28 +377,32 @@ ENTRY(ret_from_fork)
call schedule_tail /* rdi: 'prev' task parameter */
RESTORE_EXTRA_REGS
testb $3, CS(%rsp) /* from kernel_thread? */
jnz 1f
/*
* By the time we get here, we have no idea whether our pt_regs,
* ti flags, and ti status came from the 64-bit SYSCALL fast path,
* the slow path, or one of the 32-bit compat paths.
* Use IRET code path to return, since it can safely handle
* all of the above.
* We came from kernel_thread. This code path is quite twisted, and
* someone should clean it up.
*
* copy_thread_tls stashes the function pointer in RBX and the
* parameter to be passed in RBP. The called function is permitted
* to call do_execve and thereby jump to user mode.
*/
jnz int_ret_from_sys_call
/*
* We came from kernel_thread
* nb: we depend on RESTORE_EXTRA_REGS above
*/
movq %rbp, %rdi
call *%rbx
movq RBP(%rsp), %rdi
call *RBX(%rsp)
movl $0, RAX(%rsp)
RESTORE_EXTRA_REGS
jmp int_ret_from_sys_call
/*
* Fall through as though we're exiting a syscall. This makes a
* twisted sort of sense if we just called do_execve.
*/
1:
movq %rsp, %rdi
call syscall_return_slowpath /* returns with IRQs disabled */
TRACE_IRQS_ON /* user mode is traced as IRQS on */
SWAPGS
jmp restore_regs_and_iret
END(ret_from_fork)
/*

View File

@ -19,12 +19,21 @@
.section .entry.text, "ax"
/*
* 32-bit SYSENTER instruction entry.
* 32-bit SYSENTER entry.
*
* SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
* IF and VM in rflags are cleared (IOW: interrupts are off).
* 32-bit system calls through the vDSO's __kernel_vsyscall enter here
* on 64-bit kernels running on Intel CPUs.
*
* The SYSENTER instruction, in principle, should *only* occur in the
* vDSO. In practice, a small number of Android devices were shipped
* with a copy of Bionic that inlined a SYSENTER instruction. This
* never happened in any of Google's Bionic versions -- it only happened
* in a narrow range of Intel-provided versions.
*
* SYSENTER loads SS, RSP, CS, and RIP from previously programmed MSRs.
* IF and VM in RFLAGS are cleared (IOW: interrupts are off).
* SYSENTER does not save anything on the stack,
* and does not save old rip (!!!) and rflags.
* and does not save old RIP (!!!), RSP, or RFLAGS.
*
* Arguments:
* eax system call number
@ -35,10 +44,6 @@
* edi arg5
* ebp user stack
* 0(%ebp) arg6
*
* This is purely a fast path. For anything complicated we use the int 0x80
* path below. We set up a complete hardware stack frame to share code
* with the int 0x80 path.
*/
ENTRY(entry_SYSENTER_compat)
/* Interrupts are off on entry. */
@ -66,8 +71,6 @@ ENTRY(entry_SYSENTER_compat)
*/
pushfq /* pt_regs->flags (except IF = 0) */
orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */
ASM_CLAC /* Clear AC after saving FLAGS */
pushq $__USER32_CS /* pt_regs->cs */
xorq %r8,%r8
pushq %r8 /* pt_regs->ip = 0 (placeholder) */
@ -90,19 +93,25 @@ ENTRY(entry_SYSENTER_compat)
cld
/*
* Sysenter doesn't filter flags, so we need to clear NT
* SYSENTER doesn't filter flags, so we need to clear NT and AC
* ourselves. To save a few cycles, we can check whether
* NT was set instead of doing an unconditional popfq.
* either was set instead of doing an unconditional popfq.
* This needs to happen before enabling interrupts so that
* we don't get preempted with NT set.
*
* If TF is set, we will single-step all the way to here -- do_debug
* will ignore all the traps. (Yes, this is slow, but so is
* single-stepping in general. This allows us to avoid having
* a more complicated code to handle the case where a user program
* forces us to single-step through the SYSENTER entry code.)
*
* NB.: .Lsysenter_fix_flags is a label with the code under it moved
* out-of-line as an optimization: NT is unlikely to be set in the
* majority of the cases and instead of polluting the I$ unnecessarily,
* we're keeping that code behind a branch which will predict as
* not-taken and therefore its instructions won't be fetched.
*/
testl $X86_EFLAGS_NT, EFLAGS(%rsp)
testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, EFLAGS(%rsp)
jnz .Lsysenter_fix_flags
.Lsysenter_flags_fixed:
@ -123,20 +132,42 @@ ENTRY(entry_SYSENTER_compat)
pushq $X86_EFLAGS_FIXED
popfq
jmp .Lsysenter_flags_fixed
GLOBAL(__end_entry_SYSENTER_compat)
ENDPROC(entry_SYSENTER_compat)
/*
* 32-bit SYSCALL instruction entry.
* 32-bit SYSCALL entry.
*
* 32-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
* then loads new ss, cs, and rip from previously programmed MSRs.
* rflags gets masked by a value from another MSR (so CLD and CLAC
* are not needed). SYSCALL does not save anything on the stack
* and does not change rsp.
* 32-bit system calls through the vDSO's __kernel_vsyscall enter here
* on 64-bit kernels running on AMD CPUs.
*
* Note: rflags saving+masking-with-MSR happens only in Long mode
* The SYSCALL instruction, in principle, should *only* occur in the
* vDSO. In practice, it appears that this really is the case.
* As evidence:
*
* - The calling convention for SYSCALL has changed several times without
* anyone noticing.
*
* - Prior to the in-kernel X86_BUG_SYSRET_SS_ATTRS fixup, anything
* user task that did SYSCALL without immediately reloading SS
* would randomly crash.
*
* - Most programmers do not directly target AMD CPUs, and the 32-bit
* SYSCALL instruction does not exist on Intel CPUs. Even on AMD
* CPUs, Linux disables the SYSCALL instruction on 32-bit kernels
* because the SYSCALL instruction in legacy/native 32-bit mode (as
* opposed to compat mode) is sufficiently poorly designed as to be
* essentially unusable.
*
* 32-bit SYSCALL saves RIP to RCX, clears RFLAGS.RF, then saves
* RFLAGS to R11, then loads new SS, CS, and RIP from previously
* programmed MSRs. RFLAGS gets masked by a value from another MSR
* (so CLD and CLAC are not needed). SYSCALL does not save anything on
* the stack and does not change RSP.
*
* Note: RFLAGS saving+masking-with-MSR happens only in Long mode
* (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it).
* Don't get confused: rflags saving+masking depends on Long Mode Active bit
* Don't get confused: RFLAGS saving+masking depends on Long Mode Active bit
* (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
* or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
*
@ -236,7 +267,21 @@ sysret32_from_system_call:
END(entry_SYSCALL_compat)
/*
* Emulated IA32 system calls via int 0x80.
* 32-bit legacy system call entry.
*
* 32-bit x86 Linux system calls traditionally used the INT $0x80
* instruction. INT $0x80 lands here.
*
* This entry point can be used by 32-bit and 64-bit programs to perform
* 32-bit system calls. Instances of INT $0x80 can be found inline in
* various programs and libraries. It is also used by the vDSO's
* __kernel_vsyscall fallback for hardware that doesn't support a faster
* entry method. Restarted 32-bit system calls also fall back to INT
* $0x80 regardless of what instruction was originally used to do the
* system call.
*
* This is considered a slow path. It is not used by most libc
* implementations on modern hardware except during process startup.
*
* Arguments:
* eax system call number
@ -245,17 +290,8 @@ END(entry_SYSCALL_compat)
* edx arg3
* esi arg4
* edi arg5
* ebp arg6 (note: not saved in the stack frame, should not be touched)
*
* Notes:
* Uses the same stack frame as the x86-64 version.
* All registers except eax must be saved (but ptrace may violate that).
* Arguments are zero extended. For system calls that want sign extension and
* take long arguments a wrapper is needed. Most calls can just be called
* directly.
* Assumes it is only called from user space and entered with interrupts off.
* ebp arg6
*/
ENTRY(entry_INT80_compat)
/*
* Interrupts are off on entry.
@ -300,7 +336,7 @@ ENTRY(entry_INT80_compat)
TRACE_IRQS_OFF
movq %rsp, %rdi
call do_syscall_32_irqs_off
call do_int80_syscall_32
.Lsyscall_32_done:
/* Go back to user mode. */

View File

@ -6,17 +6,11 @@
#include <asm/asm-offsets.h>
#include <asm/syscall.h>
#ifdef CONFIG_IA32_EMULATION
#define SYM(sym, compat) compat
#else
#define SYM(sym, compat) sym
#endif
#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long SYM(sym, compat)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#include <asm/syscalls_32.h>
#undef __SYSCALL_I386
#define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat),
#define __SYSCALL_I386(nr, sym, qual) [nr] = sym,
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);

View File

@ -6,19 +6,14 @@
#include <asm/asm-offsets.h>
#include <asm/syscall.h>
#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
#define __SYSCALL_64_QUAL_(sym) sym
#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
#ifdef CONFIG_X86_X32_ABI
# define __SYSCALL_X32(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
#else
# define __SYSCALL_X32(nr, sym, compat) /* nothing */
#endif
#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
#include <asm/syscalls_64.h>
#undef __SYSCALL_64
#define __SYSCALL_64(nr, sym, compat) [nr] = sym,
#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),
extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);

View File

@ -21,7 +21,7 @@
12 common brk sys_brk
13 64 rt_sigaction sys_rt_sigaction
14 common rt_sigprocmask sys_rt_sigprocmask
15 64 rt_sigreturn stub_rt_sigreturn
15 64 rt_sigreturn sys_rt_sigreturn/ptregs
16 64 ioctl sys_ioctl
17 common pread64 sys_pread64
18 common pwrite64 sys_pwrite64
@ -62,10 +62,10 @@
53 common socketpair sys_socketpair
54 64 setsockopt sys_setsockopt
55 64 getsockopt sys_getsockopt
56 common clone stub_clone
57 common fork stub_fork
58 common vfork stub_vfork
59 64 execve stub_execve
56 common clone sys_clone/ptregs
57 common fork sys_fork/ptregs
58 common vfork sys_vfork/ptregs
59 64 execve sys_execve/ptregs
60 common exit sys_exit
61 common wait4 sys_wait4
62 common kill sys_kill
@ -178,7 +178,7 @@
169 common reboot sys_reboot
170 common sethostname sys_sethostname
171 common setdomainname sys_setdomainname
172 common iopl sys_iopl
172 common iopl sys_iopl/ptregs
173 common ioperm sys_ioperm
174 64 create_module
175 common init_module sys_init_module
@ -328,7 +328,7 @@
319 common memfd_create sys_memfd_create
320 common kexec_file_load sys_kexec_file_load
321 common bpf sys_bpf
322 64 execveat stub_execveat
322 64 execveat sys_execveat/ptregs
323 common userfaultfd sys_userfaultfd
324 common membarrier sys_membarrier
325 common mlock2 sys_mlock2
@ -339,14 +339,14 @@
# for native 64-bit operation.
#
512 x32 rt_sigaction compat_sys_rt_sigaction
513 x32 rt_sigreturn stub_x32_rt_sigreturn
513 x32 rt_sigreturn sys32_x32_rt_sigreturn
514 x32 ioctl compat_sys_ioctl
515 x32 readv compat_sys_readv
516 x32 writev compat_sys_writev
517 x32 recvfrom compat_sys_recvfrom
518 x32 sendmsg compat_sys_sendmsg
519 x32 recvmsg compat_sys_recvmsg
520 x32 execve stub_x32_execve
520 x32 execve compat_sys_execve/ptregs
521 x32 ptrace compat_sys_ptrace
522 x32 rt_sigpending compat_sys_rt_sigpending
523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait
@ -371,4 +371,4 @@
542 x32 getsockopt compat_sys_getsockopt
543 x32 io_setup compat_sys_io_setup
544 x32 io_submit compat_sys_io_submit
545 x32 execveat stub_x32_execveat
545 x32 execveat compat_sys_execveat/ptregs

View File

@ -3,13 +3,63 @@
in="$1"
out="$2"
syscall_macro() {
abi="$1"
nr="$2"
entry="$3"
# Entry can be either just a function name or "function/qualifier"
real_entry="${entry%%/*}"
qualifier="${entry:${#real_entry}}" # Strip the function name
qualifier="${qualifier:1}" # Strip the slash, if any
echo "__SYSCALL_${abi}($nr, $real_entry, $qualifier)"
}
emit() {
abi="$1"
nr="$2"
entry="$3"
compat="$4"
if [ "$abi" == "64" -a -n "$compat" ]; then
echo "a compat entry for a 64-bit syscall makes no sense" >&2
exit 1
fi
if [ -z "$compat" ]; then
if [ -n "$entry" ]; then
syscall_macro "$abi" "$nr" "$entry"
fi
else
echo "#ifdef CONFIG_X86_32"
if [ -n "$entry" ]; then
syscall_macro "$abi" "$nr" "$entry"
fi
echo "#else"
syscall_macro "$abi" "$nr" "$compat"
echo "#endif"
fi
}
grep '^[0-9]' "$in" | sort -n | (
while read nr abi name entry compat; do
abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
if [ -n "$compat" ]; then
echo "__SYSCALL_${abi}($nr, $entry, $compat)"
elif [ -n "$entry" ]; then
echo "__SYSCALL_${abi}($nr, $entry, $entry)"
if [ "$abi" == "COMMON" -o "$abi" == "64" ]; then
# COMMON is the same as 64, except that we don't expect X32
# programs to use it. Our expectation has nothing to do with
# any generated code, so treat them the same.
emit 64 "$nr" "$entry" "$compat"
elif [ "$abi" == "X32" ]; then
# X32 is equivalent to 64 on an X32-compatible kernel.
echo "#ifdef CONFIG_X86_X32_ABI"
emit 64 "$nr" "$entry" "$compat"
echo "#endif"
elif [ "$abi" == "I386" ]; then
emit "$abi" "$nr" "$entry" "$compat"
else
echo "Unknown abi $abi" >&2
exit 1
fi
done
) > "$out"

View File

@ -150,16 +150,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
}
fprintf(outfile, "\n};\n\n");
fprintf(outfile, "static struct page *pages[%lu];\n\n",
mapping_size / 4096);
fprintf(outfile, "const struct vdso_image %s = {\n", name);
fprintf(outfile, "\t.data = raw_data,\n");
fprintf(outfile, "\t.size = %lu,\n", mapping_size);
fprintf(outfile, "\t.text_mapping = {\n");
fprintf(outfile, "\t\t.name = \"[vdso]\",\n");
fprintf(outfile, "\t\t.pages = pages,\n");
fprintf(outfile, "\t},\n");
if (alt_sec) {
fprintf(outfile, "\t.alt = %lu,\n",
(unsigned long)GET_LE(&alt_sec->sh_offset));

View File

@ -11,7 +11,6 @@
#include <linux/kernel.h>
#include <linux/mm_types.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
#include <asm/vdso.h>

View File

@ -3,7 +3,7 @@
*/
#include <asm/dwarf2.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
/*

View File

@ -20,6 +20,7 @@
#include <asm/page.h>
#include <asm/hpet.h>
#include <asm/desc.h>
#include <asm/cpufeature.h>
#if defined(CONFIG_X86_64)
unsigned int __read_mostly vdso64_enabled = 1;
@ -27,13 +28,7 @@ unsigned int __read_mostly vdso64_enabled = 1;
void __init init_vdso_image(const struct vdso_image *image)
{
int i;
int npages = (image->size) / PAGE_SIZE;
BUG_ON(image->size % PAGE_SIZE != 0);
for (i = 0; i < npages; i++)
image->text_mapping.pages[i] =
virt_to_page(image->data + i*PAGE_SIZE);
apply_alternatives((struct alt_instr *)(image->data + image->alt),
(struct alt_instr *)(image->data + image->alt +
@ -90,18 +85,87 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
#endif
}
static int vdso_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
const struct vdso_image *image = vma->vm_mm->context.vdso_image;
if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size)
return VM_FAULT_SIGBUS;
vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT));
get_page(vmf->page);
return 0;
}
static const struct vm_special_mapping text_mapping = {
.name = "[vdso]",
.fault = vdso_fault,
};
static int vvar_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
const struct vdso_image *image = vma->vm_mm->context.vdso_image;
long sym_offset;
int ret = -EFAULT;
if (!image)
return VM_FAULT_SIGBUS;
sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) +
image->sym_vvar_start;
/*
* Sanity check: a symbol offset of zero means that the page
* does not exist for this vdso image, not that the page is at
* offset zero relative to the text mapping. This should be
* impossible here, because sym_offset should only be zero for
* the page past the end of the vvar mapping.
*/
if (sym_offset == 0)
return VM_FAULT_SIGBUS;
if (sym_offset == image->sym_vvar_page) {
ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
__pa_symbol(&__vvar_page) >> PAGE_SHIFT);
} else if (sym_offset == image->sym_hpet_page) {
#ifdef CONFIG_HPET_TIMER
if (hpet_address && vclock_was_used(VCLOCK_HPET)) {
ret = vm_insert_pfn_prot(
vma,
(unsigned long)vmf->virtual_address,
hpet_address >> PAGE_SHIFT,
pgprot_noncached(PAGE_READONLY));
}
#endif
} else if (sym_offset == image->sym_pvclock_page) {
struct pvclock_vsyscall_time_info *pvti =
pvclock_pvti_cpu0_va();
if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) {
ret = vm_insert_pfn(
vma,
(unsigned long)vmf->virtual_address,
__pa(pvti) >> PAGE_SHIFT);
}
}
if (ret == 0 || ret == -EBUSY)
return VM_FAULT_NOPAGE;
return VM_FAULT_SIGBUS;
}
static int map_vdso(const struct vdso_image *image, bool calculate_addr)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long addr, text_start;
int ret = 0;
static struct page *no_pages[] = {NULL};
static struct vm_special_mapping vvar_mapping = {
static const struct vm_special_mapping vvar_mapping = {
.name = "[vvar]",
.pages = no_pages,
.fault = vvar_fault,
};
struct pvclock_vsyscall_time_info *pvti;
if (calculate_addr) {
addr = vdso_addr(current->mm->start_stack,
@ -121,6 +185,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
text_start = addr - image->sym_vvar_start;
current->mm->context.vdso = (void __user *)text_start;
current->mm->context.vdso_image = image;
/*
* MAYWRITE to allow gdb to COW and set breakpoints
@ -130,7 +195,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
image->size,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
&image->text_mapping);
&text_mapping);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
@ -140,7 +205,8 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
vma = _install_special_mapping(mm,
addr,
-image->sym_vvar_start,
VM_READ|VM_MAYREAD,
VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
VM_PFNMAP,
&vvar_mapping);
if (IS_ERR(vma)) {
@ -148,41 +214,6 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
goto up_fail;
}
if (image->sym_vvar_page)
ret = remap_pfn_range(vma,
text_start + image->sym_vvar_page,
__pa_symbol(&__vvar_page) >> PAGE_SHIFT,
PAGE_SIZE,
PAGE_READONLY);
if (ret)
goto up_fail;
#ifdef CONFIG_HPET_TIMER
if (hpet_address && image->sym_hpet_page) {
ret = io_remap_pfn_range(vma,
text_start + image->sym_hpet_page,
hpet_address >> PAGE_SHIFT,
PAGE_SIZE,
pgprot_noncached(PAGE_READONLY));
if (ret)
goto up_fail;
}
#endif
pvti = pvclock_pvti_cpu0_va();
if (pvti && image->sym_pvclock_page) {
ret = remap_pfn_range(vma,
text_start + image->sym_pvclock_page,
__pa(pvti) >> PAGE_SHIFT,
PAGE_SIZE,
PAGE_READONLY);
if (ret)
goto up_fail;
}
up_fail:
if (ret)
current->mm->context.vdso = NULL;
@ -254,7 +285,7 @@ static void vgetcpu_cpu_init(void *arg)
#ifdef CONFIG_NUMA
node = cpu_to_node(cpu);
#endif
if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
if (static_cpu_has(X86_FEATURE_RDTSCP))
write_rdtscp_aux((node << 12) | cpu);
/*

View File

@ -16,6 +16,8 @@
#include <asm/vgtod.h>
#include <asm/vvar.h>
int vclocks_used __read_mostly;
DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
void update_vsyscall_tz(void)
@ -26,12 +28,17 @@ void update_vsyscall_tz(void)
void update_vsyscall(struct timekeeper *tk)
{
int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
/* Mark the new vclock used. */
BUILD_BUG_ON(VCLOCK_MAX >= 32);
WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode));
gtod_write_begin(vdata);
/* copy vsyscall data */
vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
vdata->vclock_mode = vclock_mode;
vdata->cycle_last = tk->tkr_mono.cycle_last;
vdata->mask = tk->tkr_mono.mask;
vdata->mult = tk->tkr_mono.mult;

View File

@ -151,12 +151,6 @@ static inline int alternatives_text_reserved(void *start, void *end)
ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
".popsection"
/*
* This must be included *after* the definition of ALTERNATIVE due to
* <asm/arch_hweight.h>
*/
#include <asm/cpufeature.h>
/*
* Alternative instructions for different CPU types or capabilities.
*

View File

@ -6,7 +6,6 @@
#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
#include <asm/apicdef.h>
#include <linux/atomic.h>
#include <asm/fixmap.h>

View File

@ -1,6 +1,8 @@
#ifndef _ASM_X86_HWEIGHT_H
#define _ASM_X86_HWEIGHT_H
#include <asm/cpufeatures.h>
#ifdef CONFIG_64BIT
/* popcnt %edi, %eax -- redundant REX prefix for alignment */
#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7"

View File

@ -91,7 +91,7 @@ set_bit(long nr, volatile unsigned long *addr)
* If it's called on the same region of memory simultaneously, the effect
* may be that only one operation succeeds.
*/
static inline void __set_bit(long nr, volatile unsigned long *addr)
static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
{
asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
}
@ -128,13 +128,13 @@ clear_bit(long nr, volatile unsigned long *addr)
* clear_bit() is atomic and implies release semantics before the memory
* operation. It can be used for an unlock.
*/
static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
{
barrier();
clear_bit(nr, addr);
}
static inline void __clear_bit(long nr, volatile unsigned long *addr)
static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
{
asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
}
@ -151,7 +151,7 @@ static inline void __clear_bit(long nr, volatile unsigned long *addr)
* No memory barrier is required here, because x86 cannot reorder stores past
* older loads. Same principle as spin_unlock.
*/
static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
{
barrier();
__clear_bit(nr, addr);
@ -166,7 +166,7 @@ static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
* If it's called on the same region of memory simultaneously, the effect
* may be that only one operation succeeds.
*/
static inline void __change_bit(long nr, volatile unsigned long *addr)
static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
{
asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
}
@ -180,7 +180,7 @@ static inline void __change_bit(long nr, volatile unsigned long *addr)
* Note that @nr may be almost arbitrarily large; this function is not
* restricted to acting on a single-word quantity.
*/
static inline void change_bit(long nr, volatile unsigned long *addr)
static __always_inline void change_bit(long nr, volatile unsigned long *addr)
{
if (IS_IMMEDIATE(nr)) {
asm volatile(LOCK_PREFIX "xorb %1,%0"
@ -201,7 +201,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
static inline int test_and_set_bit(long nr, volatile unsigned long *addr)
static __always_inline int test_and_set_bit(long nr, volatile unsigned long *addr)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c");
}
@ -228,7 +228,7 @@ test_and_set_bit_lock(long nr, volatile unsigned long *addr)
* If two examples of this operation race, one can appear to succeed
* but actually fail. You must protect multiple accesses with a lock.
*/
static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
{
int oldbit;
@ -247,7 +247,7 @@ static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c");
}
@ -268,7 +268,7 @@ static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
* accessed from a hypervisor on the same CPU if running in a VM: don't change
* this without also updating arch/x86/kernel/kvm.c
*/
static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
{
int oldbit;
@ -280,7 +280,7 @@ static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
}
/* WARNING: non atomic and it can be reordered! */
static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
static __always_inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
{
int oldbit;
@ -300,7 +300,7 @@ static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
static inline int test_and_change_bit(long nr, volatile unsigned long *addr)
static __always_inline int test_and_change_bit(long nr, volatile unsigned long *addr)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c");
}
@ -311,7 +311,7 @@ static __always_inline int constant_test_bit(long nr, const volatile unsigned lo
(addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
}
static inline int variable_test_bit(long nr, volatile const unsigned long *addr)
static __always_inline int variable_test_bit(long nr, volatile const unsigned long *addr)
{
int oldbit;
@ -343,7 +343,7 @@ static int test_bit(int nr, const volatile unsigned long *addr);
*
* Undefined if no bit exists, so code should check against 0 first.
*/
static inline unsigned long __ffs(unsigned long word)
static __always_inline unsigned long __ffs(unsigned long word)
{
asm("rep; bsf %1,%0"
: "=r" (word)
@ -357,7 +357,7 @@ static inline unsigned long __ffs(unsigned long word)
*
* Undefined if no zero exists, so code should check against ~0UL first.
*/
static inline unsigned long ffz(unsigned long word)
static __always_inline unsigned long ffz(unsigned long word)
{
asm("rep; bsf %1,%0"
: "=r" (word)
@ -371,7 +371,7 @@ static inline unsigned long ffz(unsigned long word)
*
* Undefined if no set bit exists, so code should check against 0 first.
*/
static inline unsigned long __fls(unsigned long word)
static __always_inline unsigned long __fls(unsigned long word)
{
asm("bsr %1,%0"
: "=r" (word)
@ -393,7 +393,7 @@ static inline unsigned long __fls(unsigned long word)
* set bit if value is nonzero. The first (least significant) bit
* is at position 1.
*/
static inline int ffs(int x)
static __always_inline int ffs(int x)
{
int r;
@ -434,7 +434,7 @@ static inline int ffs(int x)
* set bit if value is nonzero. The last (most significant) bit is
* at position 32.
*/
static inline int fls(int x)
static __always_inline int fls(int x)
{
int r;

View File

@ -3,10 +3,11 @@
#ifndef _ASM_X86_CLOCKSOURCE_H
#define _ASM_X86_CLOCKSOURCE_H
#define VCLOCK_NONE 0 /* No vDSO clock available. */
#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */
#define VCLOCK_NONE 0 /* No vDSO clock available. */
#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */
#define VCLOCK_MAX 3
struct arch_clocksource_data {
int vclock_mode;

View File

@ -2,6 +2,7 @@
#define ASM_X86_CMPXCHG_H
#include <linux/compiler.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h> /* Provides LOCK_PREFIX */
/*

View File

@ -1,288 +1,7 @@
/*
* Defines x86 CPU feature bits
*/
#ifndef _ASM_X86_CPUFEATURE_H
#define _ASM_X86_CPUFEATURE_H
#ifndef _ASM_X86_REQUIRED_FEATURES_H
#include <asm/required-features.h>
#endif
#ifndef _ASM_X86_DISABLED_FEATURES_H
#include <asm/disabled-features.h>
#endif
#define NCAPINTS 16 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */
/*
* Note: If the comment begins with a quoted string, that string is used
* in /proc/cpuinfo instead of the macro name. If the string is "",
* this feature bit is not displayed in /proc/cpuinfo at all.
*/
/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */
/* (plus FCMOVcc, FCOMI with FPU) */
#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
/* Don't duplicate feature flags which are redundant with Intel! */
#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */
#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */
#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */
#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */
/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
/* Other features, Linux-defined mapping, word 3 */
/* This range is used for feature bits which conflict or are synthesized */
#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
/* cpu types for specific tunings: */
#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */
/* free, was #define X86_FEATURE_FXSAVE_LEAK ( 3*32+10) * "" FXSAVE leaks FOP/FIP/FOP */
#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */
#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
/* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */
#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */
#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */
#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */
#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */
#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */
#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */
#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */
#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */
#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */
#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */
#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */
#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
/*
* Auxiliary flags: Linux defined - For features scattered in various
* CPUID levels like 0x6, 0xA etc, word 7.
*
* Reuse free bits when adding new feature flags!
*/
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */
#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */
#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */
#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
/*
* BUG word(s)
*/
#define X86_BUG(x) (NCAPINTS*32 + (x))
#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
#include <asm/processor.h>
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
@ -369,8 +88,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
* is not relevant.
*/
#define cpu_feature_enabled(bit) \
(__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : \
cpu_has(&boot_cpu_data, bit))
(__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit))
#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
@ -406,106 +124,19 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
#define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE)
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
/*
* Do not add any more of those clumsy macros - use static_cpu_has_safe() for
* Do not add any more of those clumsy macros - use static_cpu_has() for
* fast paths and boot_cpu_has() otherwise!
*/
#if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS)
extern void warn_pre_alternatives(void);
extern bool __static_cpu_has_safe(u16 bit);
#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
/*
* Static testing of CPU features. Used the same as boot_cpu_has().
* These are only valid after alternatives have run, but will statically
* patch the target code for additional performance.
* These will statically patch the target code for additional
* performance.
*/
static __always_inline __pure bool __static_cpu_has(u16 bit)
static __always_inline __pure bool _static_cpu_has(u16 bit)
{
#ifdef CC_HAVE_ASM_GOTO
#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
/*
* Catch too early usage of this before alternatives
* have run.
*/
asm_volatile_goto("1: jmp %l[t_warn]\n"
"2:\n"
".section .altinstructions,\"a\"\n"
" .long 1b - .\n"
" .long 0\n" /* no replacement */
" .word %P0\n" /* 1: do replace */
" .byte 2b - 1b\n" /* source len */
" .byte 0\n" /* replacement len */
" .byte 0\n" /* pad len */
".previous\n"
/* skipping size check since replacement size = 0 */
: : "i" (X86_FEATURE_ALWAYS) : : t_warn);
#endif
asm_volatile_goto("1: jmp %l[t_no]\n"
"2:\n"
".section .altinstructions,\"a\"\n"
" .long 1b - .\n"
" .long 0\n" /* no replacement */
" .word %P0\n" /* feature bit */
" .byte 2b - 1b\n" /* source len */
" .byte 0\n" /* replacement len */
" .byte 0\n" /* pad len */
".previous\n"
/* skipping size check since replacement size = 0 */
: : "i" (bit) : : t_no);
return true;
t_no:
return false;
#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
t_warn:
warn_pre_alternatives();
return false;
#endif
#else /* CC_HAVE_ASM_GOTO */
u8 flag;
/* Open-coded due to __stringify() in ALTERNATIVE() */
asm volatile("1: movb $0,%0\n"
"2:\n"
".section .altinstructions,\"a\"\n"
" .long 1b - .\n"
" .long 3f - .\n"
" .word %P1\n" /* feature bit */
" .byte 2b - 1b\n" /* source len */
" .byte 4f - 3f\n" /* replacement len */
" .byte 0\n" /* pad len */
".previous\n"
".section .discard,\"aw\",@progbits\n"
" .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
".previous\n"
".section .altinstr_replacement,\"ax\"\n"
"3: movb $1,%0\n"
"4:\n"
".previous\n"
: "=qm" (flag) : "i" (bit));
return flag;
#endif /* CC_HAVE_ASM_GOTO */
}
#define static_cpu_has(bit) \
( \
__builtin_constant_p(boot_cpu_has(bit)) ? \
boot_cpu_has(bit) : \
__builtin_constant_p(bit) ? \
__static_cpu_has(bit) : \
boot_cpu_has(bit) \
)
static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
{
#ifdef CC_HAVE_ASM_GOTO
asm_volatile_goto("1: jmp %l[t_dynamic]\n"
asm_volatile_goto("1: jmp 6f\n"
"2:\n"
".skip -(((5f-4f) - (2b-1b)) > 0) * "
"((5f-4f) - (2b-1b)),0x90\n"
@ -530,66 +161,34 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
" .byte 0\n" /* repl len */
" .byte 0\n" /* pad len */
".previous\n"
: : "i" (bit), "i" (X86_FEATURE_ALWAYS)
: : t_dynamic, t_no);
".section .altinstr_aux,\"ax\"\n"
"6:\n"
" testb %[bitnum],%[cap_byte]\n"
" jnz %l[t_yes]\n"
" jmp %l[t_no]\n"
".previous\n"
: : "i" (bit), "i" (X86_FEATURE_ALWAYS),
[bitnum] "i" (1 << (bit & 7)),
[cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
: : t_yes, t_no);
t_yes:
return true;
t_no:
return false;
t_dynamic:
return __static_cpu_has_safe(bit);
#else
u8 flag;
/* Open-coded due to __stringify() in ALTERNATIVE() */
asm volatile("1: movb $2,%0\n"
"2:\n"
".section .altinstructions,\"a\"\n"
" .long 1b - .\n" /* src offset */
" .long 3f - .\n" /* repl offset */
" .word %P2\n" /* always replace */
" .byte 2b - 1b\n" /* source len */
" .byte 4f - 3f\n" /* replacement len */
" .byte 0\n" /* pad len */
".previous\n"
".section .discard,\"aw\",@progbits\n"
" .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
".previous\n"
".section .altinstr_replacement,\"ax\"\n"
"3: movb $0,%0\n"
"4:\n"
".previous\n"
".section .altinstructions,\"a\"\n"
" .long 1b - .\n" /* src offset */
" .long 5f - .\n" /* repl offset */
" .word %P1\n" /* feature bit */
" .byte 4b - 3b\n" /* src len */
" .byte 6f - 5f\n" /* repl len */
" .byte 0\n" /* pad len */
".previous\n"
".section .discard,\"aw\",@progbits\n"
" .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
".previous\n"
".section .altinstr_replacement,\"ax\"\n"
"5: movb $1,%0\n"
"6:\n"
".previous\n"
: "=qm" (flag)
: "i" (bit), "i" (X86_FEATURE_ALWAYS));
return (flag == 2 ? __static_cpu_has_safe(bit) : flag);
#endif /* CC_HAVE_ASM_GOTO */
}
#define static_cpu_has_safe(bit) \
#define static_cpu_has(bit) \
( \
__builtin_constant_p(boot_cpu_has(bit)) ? \
boot_cpu_has(bit) : \
_static_cpu_has_safe(bit) \
_static_cpu_has(bit) \
)
#else
/*
* gcc 3.x is too stupid to do the static test; fall back to dynamic.
* Fall back to dynamic for gcc versions which don't support asm goto. Should be
* a minority now anyway.
*/
#define static_cpu_has(bit) boot_cpu_has(bit)
#define static_cpu_has_safe(bit) boot_cpu_has(bit)
#endif
#define cpu_has_bug(c, bit) cpu_has(c, (bit))
@ -597,7 +196,6 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
#define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit))
#define static_cpu_has_bug(bit) static_cpu_has((bit))
#define static_cpu_has_bug_safe(bit) static_cpu_has_safe((bit))
#define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit))
#define MAX_CPU_FEATURES (NCAPINTS * 32)

View File

@ -0,0 +1,300 @@
#ifndef _ASM_X86_CPUFEATURES_H
#define _ASM_X86_CPUFEATURES_H
#ifndef _ASM_X86_REQUIRED_FEATURES_H
#include <asm/required-features.h>
#endif
#ifndef _ASM_X86_DISABLED_FEATURES_H
#include <asm/disabled-features.h>
#endif
/*
* Defines x86 CPU feature bits
*/
#define NCAPINTS 16 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */
/*
* Note: If the comment begins with a quoted string, that string is used
* in /proc/cpuinfo instead of the macro name. If the string is "",
* this feature bit is not displayed in /proc/cpuinfo at all.
*/
/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */
/* (plus FCMOVcc, FCOMI with FPU) */
#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
/* Don't duplicate feature flags which are redundant with Intel! */
#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */
#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */
#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */
#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */
/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
/* Other features, Linux-defined mapping, word 3 */
/* This range is used for feature bits which conflict or are synthesized */
#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
/* cpu types for specific tunings: */
#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */
#define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */
#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */
#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
/* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */
#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */
#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */
#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */
#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */
#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */
#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */
#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */
#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */
#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */
#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */
#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */
#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
/*
* Auxiliary flags: Linux defined - For features scattered in various
* CPUID levels like 0x6, 0xA etc, word 7.
*
* Reuse free bits when adding new feature flags!
*/
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */
#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */
#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */
#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
/*
* BUG word(s)
*/
#define X86_BUG(x) (NCAPINTS*32 + (x))
#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
#ifdef CONFIG_X86_32
/*
* 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional
* to avoid confusion.
*/
#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
#endif
#endif /* _ASM_X86_CPUFEATURES_H */

View File

@ -98,4 +98,27 @@ struct desc_ptr {
#endif /* !__ASSEMBLY__ */
/* Access rights as returned by LAR */
#define AR_TYPE_RODATA (0 * (1 << 9))
#define AR_TYPE_RWDATA (1 * (1 << 9))
#define AR_TYPE_RODATA_EXPDOWN (2 * (1 << 9))
#define AR_TYPE_RWDATA_EXPDOWN (3 * (1 << 9))
#define AR_TYPE_XOCODE (4 * (1 << 9))
#define AR_TYPE_XRCODE (5 * (1 << 9))
#define AR_TYPE_XOCODE_CONF (6 * (1 << 9))
#define AR_TYPE_XRCODE_CONF (7 * (1 << 9))
#define AR_TYPE_MASK (7 * (1 << 9))
#define AR_DPL0 (0 * (1 << 13))
#define AR_DPL3 (3 * (1 << 13))
#define AR_DPL_MASK (3 * (1 << 13))
#define AR_A (1 << 8) /* "Accessed" */
#define AR_S (1 << 12) /* If clear, "System" segment */
#define AR_P (1 << 15) /* "Present" */
#define AR_AVL (1 << 20) /* "AVaiLable" (no HW effect) */
#define AR_L (1 << 21) /* "Long mode" for code segments */
#define AR_DB (1 << 22) /* D/B, effect depends on type */
#define AR_G (1 << 23) /* "Granularity" (limit in pages) */
#endif /* _ASM_X86_DESC_DEFS_H */

View File

@ -15,7 +15,7 @@ static __always_inline __init void *dmi_alloc(unsigned len)
/* Use early IO mappings for DMI because it's initialized early */
#define dmi_early_remap early_ioremap
#define dmi_early_unmap early_iounmap
#define dmi_remap ioremap
#define dmi_remap ioremap_cache
#define dmi_unmap iounmap
#endif /* _ASM_X86_DMI_H */

View File

@ -138,7 +138,7 @@ extern void reserve_top_address(unsigned long reserve);
extern int fixmaps_set;
extern pte_t *kmap_pte;
extern pgprot_t kmap_prot;
#define kmap_prot PAGE_KERNEL
extern pte_t *pkmap_page_table;
void __native_set_fixmap(enum fixed_addresses idx, pte_t pte);

View File

@ -17,6 +17,7 @@
#include <asm/user.h>
#include <asm/fpu/api.h>
#include <asm/fpu/xstate.h>
#include <asm/cpufeature.h>
/*
* High level FPU state handling functions:
@ -58,22 +59,22 @@ extern u64 fpu__get_supported_xfeatures_mask(void);
*/
static __always_inline __pure bool use_eager_fpu(void)
{
return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
return static_cpu_has(X86_FEATURE_EAGER_FPU);
}
static __always_inline __pure bool use_xsaveopt(void)
{
return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);
return static_cpu_has(X86_FEATURE_XSAVEOPT);
}
static __always_inline __pure bool use_xsave(void)
{
return static_cpu_has_safe(X86_FEATURE_XSAVE);
return static_cpu_has(X86_FEATURE_XSAVE);
}
static __always_inline __pure bool use_fxsr(void)
{
return static_cpu_has_safe(X86_FEATURE_FXSR);
return static_cpu_has(X86_FEATURE_FXSR);
}
/*
@ -300,7 +301,7 @@ static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
WARN_ON(system_state != SYSTEM_BOOTING);
if (static_cpu_has_safe(X86_FEATURE_XSAVES))
if (static_cpu_has(X86_FEATURE_XSAVES))
XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
else
XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
@ -322,7 +323,7 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
WARN_ON(system_state != SYSTEM_BOOTING);
if (static_cpu_has_safe(X86_FEATURE_XSAVES))
if (static_cpu_has(X86_FEATURE_XSAVES))
XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
else
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
@ -460,7 +461,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
* pending. Clear the x87 state here by setting it to fixed values.
* "m" is a random variable that should be in L1.
*/
if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) {
asm volatile(
"fnclex\n\t"
"emms\n\t"
@ -589,7 +590,8 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
* If the task has used the math, pre-load the FPU on xsave processors
* or if the past 5 consecutive context-switches used math.
*/
fpu.preload = new_fpu->fpstate_active &&
fpu.preload = static_cpu_has(X86_FEATURE_FPU) &&
new_fpu->fpstate_active &&
(use_eager_fpu() || new_fpu->counter > 5);
if (old_fpu->fpregs_active) {

View File

@ -1,23 +1,44 @@
#ifdef __ASSEMBLY__
#ifndef _ASM_X86_FRAME_H
#define _ASM_X86_FRAME_H
#include <asm/asm.h>
/* The annotation hides the frame from the unwinder and makes it look
like a ordinary ebp save/restore. This avoids some special cases for
frame pointer later */
#ifdef CONFIG_FRAME_POINTER
.macro FRAME
__ASM_SIZE(push,) %__ASM_REG(bp)
__ASM_SIZE(mov) %__ASM_REG(sp), %__ASM_REG(bp)
.endm
.macro ENDFRAME
__ASM_SIZE(pop,) %__ASM_REG(bp)
.endm
#else
.macro FRAME
.endm
.macro ENDFRAME
.endm
#endif
/*
* These are stack frame creation macros. They should be used by every
* callable non-leaf asm function to make kernel stack traces more reliable.
*/
#endif /* __ASSEMBLY__ */
#ifdef CONFIG_FRAME_POINTER
#ifdef __ASSEMBLY__
.macro FRAME_BEGIN
push %_ASM_BP
_ASM_MOV %_ASM_SP, %_ASM_BP
.endm
.macro FRAME_END
pop %_ASM_BP
.endm
#else /* !__ASSEMBLY__ */
#define FRAME_BEGIN \
"push %" _ASM_BP "\n" \
_ASM_MOV "%" _ASM_SP ", %" _ASM_BP "\n"
#define FRAME_END "pop %" _ASM_BP "\n"
#endif /* __ASSEMBLY__ */
#define FRAME_OFFSET __ASM_SEL(4, 8)
#else /* !CONFIG_FRAME_POINTER */
#define FRAME_BEGIN
#define FRAME_END
#define FRAME_OFFSET 0
#endif /* CONFIG_FRAME_POINTER */
#endif /* _ASM_X86_FRAME_H */

View File

@ -53,7 +53,7 @@
#define IMR_MASK (IMR_ALIGN - 1)
int imr_add_range(phys_addr_t base, size_t size,
unsigned int rmask, unsigned int wmask, bool lock);
unsigned int rmask, unsigned int wmask);
int imr_remove_range(phys_addr_t base, size_t size);

View File

@ -57,67 +57,13 @@ static inline void __xapic_wait_icr_idle(void)
cpu_relax();
}
static inline void
__default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest)
{
/*
* Subtle. In the case of the 'never do double writes' workaround
* we have to lock out interrupts to be safe. As we don't care
* of the value read we use an atomic rmw access to avoid costly
* cli/sti. Otherwise we use an even cheaper single atomic write
* to the APIC.
*/
unsigned int cfg;
/*
* Wait for idle.
*/
__xapic_wait_icr_idle();
/*
* No need to touch the target chip field
*/
cfg = __prepare_ICR(shortcut, vector, dest);
/*
* Send the IPI. The write to APIC_ICR fires this off.
*/
native_apic_mem_write(APIC_ICR, cfg);
}
void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest);
/*
* This is used to send an IPI with no shorthand notation (the destination is
* specified in bits 56 to 63 of the ICR).
*/
static inline void
__default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest)
{
unsigned long cfg;
/*
* Wait for idle.
*/
if (unlikely(vector == NMI_VECTOR))
safe_apic_wait_icr_idle();
else
__xapic_wait_icr_idle();
/*
* prepare target chip field
*/
cfg = __prepare_ICR2(mask);
native_apic_mem_write(APIC_ICR2, cfg);
/*
* program the ICR
*/
cfg = __prepare_ICR(0, vector, dest);
/*
* Send the IPI. The write to APIC_ICR fires this off.
*/
native_apic_mem_write(APIC_ICR, cfg);
}
void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest);
extern void default_send_IPI_single(int cpu, int vector);
extern void default_send_IPI_single_phys(int cpu, int vector);

View File

@ -1,7 +1,7 @@
#ifndef _ASM_IRQ_WORK_H
#define _ASM_IRQ_WORK_H
#include <asm/processor.h>
#include <asm/cpufeature.h>
static inline bool arch_irq_work_has_interrupt(void)
{

View File

@ -135,6 +135,7 @@ struct mca_config {
bool ignore_ce;
bool disabled;
bool ser;
bool recovery;
bool bios_cmci_threshold;
u8 banks;
s8 bootlog;

View File

@ -3,6 +3,7 @@
#include <asm/cpu.h>
#include <linux/earlycpio.h>
#include <linux/initrd.h>
#define native_rdmsr(msr, val1, val2) \
do { \
@ -143,4 +144,29 @@ static inline void reload_early_microcode(void) { }
static inline bool
get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; }
#endif
static inline unsigned long get_initrd_start(void)
{
#ifdef CONFIG_BLK_DEV_INITRD
return initrd_start;
#else
return 0;
#endif
}
static inline unsigned long get_initrd_start_addr(void)
{
#ifdef CONFIG_BLK_DEV_INITRD
#ifdef CONFIG_X86_32
unsigned long *initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start);
return (unsigned long)__pa_nodebug(*initrd_start_p);
#else
return get_initrd_start();
#endif
#else /* CONFIG_BLK_DEV_INITRD */
return 0;
#endif
}
#endif /* _ASM_X86_MICROCODE_H */

View File

@ -40,7 +40,6 @@ struct extended_sigtable {
#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable))
#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature))
#define DWSIZE (sizeof(u32))
#define get_totalsize(mc) \
(((struct microcode_intel *)mc)->hdr.datasize ? \

View File

@ -19,7 +19,8 @@ typedef struct {
#endif
struct mutex lock;
void __user *vdso;
void __user *vdso; /* vdso base address */
const struct vdso_image *vdso_image; /* vdso image in use */
atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
} mm_context_t;

View File

@ -1,7 +1,12 @@
#ifndef _ASM_X86_MSR_INDEX_H
#define _ASM_X86_MSR_INDEX_H
/* CPU model specific register (MSR) numbers */
/*
* CPU model specific register (MSR) numbers.
*
* Do not add new entries to this file unless the definitions are shared
* between multiple compilation units.
*/
/* x86-64 specific MSRs */
#define MSR_EFER 0xc0000080 /* extended feature register */

View File

@ -3,6 +3,8 @@
#include <linux/sched.h>
#include <asm/cpufeature.h>
#define MWAIT_SUBSTATE_MASK 0xf
#define MWAIT_CSTATE_MASK 0xf
#define MWAIT_SUBSTATE_SIZE 4

View File

@ -13,7 +13,7 @@ struct vm86;
#include <asm/types.h>
#include <uapi/asm/sigcontext.h>
#include <asm/current.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/page.h>
#include <asm/pgtable_types.h>
#include <asm/percpu.h>
@ -24,7 +24,6 @@ struct vm86;
#include <asm/fpu/types.h>
#include <linux/personality.h>
#include <linux/cpumask.h>
#include <linux/cache.h>
#include <linux/threads.h>
#include <linux/math64.h>
@ -300,10 +299,13 @@ struct tss_struct {
*/
unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
#ifdef CONFIG_X86_32
/*
* Space for the temporary SYSENTER stack:
* Space for the temporary SYSENTER stack.
*/
unsigned long SYSENTER_stack_canary;
unsigned long SYSENTER_stack[64];
#endif
} ____cacheline_aligned;

View File

@ -7,12 +7,23 @@
void syscall_init(void);
#ifdef CONFIG_X86_64
void entry_SYSCALL_64(void);
void entry_SYSCALL_compat(void);
#endif
#ifdef CONFIG_X86_32
void entry_INT80_32(void);
void entry_INT80_compat(void);
void entry_SYSENTER_32(void);
void __begin_SYSENTER_singlestep_region(void);
void __end_SYSENTER_singlestep_region(void);
#endif
#ifdef CONFIG_IA32_EMULATION
void entry_SYSENTER_compat(void);
void __end_entry_SYSENTER_compat(void);
void entry_SYSCALL_compat(void);
void entry_INT80_compat(void);
#endif
void x86_configure_nx(void);
void x86_report_nx(void);

View File

@ -13,7 +13,6 @@
X86_EFLAGS_CF | X86_EFLAGS_RF)
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc);
int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
struct pt_regs *regs, unsigned long mask);

View File

@ -15,7 +15,7 @@
#include <linux/stringify.h>
#include <asm/nops.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
/* "Raw" instruction opcodes */
#define __ASM_CLAC .byte 0x0f,0x01,0xca

View File

@ -16,7 +16,6 @@
#endif
#include <asm/thread_info.h>
#include <asm/cpumask.h>
#include <asm/cpufeature.h>
extern int smp_num_siblings;
extern unsigned int num_processors;

View File

@ -49,7 +49,7 @@
*/
#ifndef __ASSEMBLY__
struct task_struct;
#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <linux/atomic.h>
struct thread_info {
@ -134,10 +134,13 @@ struct thread_info {
#define _TIF_ADDR32 (1 << TIF_ADDR32)
#define _TIF_X32 (1 << TIF_X32)
/* work to do in syscall_trace_enter() */
/*
* work to do in syscall_trace_enter(). Also includes TIF_NOHZ for
* enter_from_user_mode()
*/
#define _TIF_WORK_SYSCALL_ENTRY \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \
_TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \
_TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
_TIF_NOHZ)
/* work to do on any return to user space */

View File

@ -5,8 +5,57 @@
#include <linux/sched.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <asm/special_insns.h>
static inline void __invpcid(unsigned long pcid, unsigned long addr,
unsigned long type)
{
struct { u64 d[2]; } desc = { { pcid, addr } };
/*
* The memory clobber is because the whole point is to invalidate
* stale TLB entries and, especially if we're flushing global
* mappings, we don't want the compiler to reorder any subsequent
* memory accesses before the TLB flush.
*
* The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
* invpcid (%rcx), %rax in long mode.
*/
asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
: : "m" (desc), "a" (type), "c" (&desc) : "memory");
}
#define INVPCID_TYPE_INDIV_ADDR 0
#define INVPCID_TYPE_SINGLE_CTXT 1
#define INVPCID_TYPE_ALL_INCL_GLOBAL 2
#define INVPCID_TYPE_ALL_NON_GLOBAL 3
/* Flush all mappings for a given pcid and addr, not including globals. */
static inline void invpcid_flush_one(unsigned long pcid,
unsigned long addr)
{
__invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
}
/* Flush all mappings for a given PCID, not including globals. */
static inline void invpcid_flush_single_context(unsigned long pcid)
{
__invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
}
/* Flush all mappings, including globals, for all PCIDs. */
static inline void invpcid_flush_all(void)
{
__invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
}
/* Flush all mappings for all PCIDs except globals. */
static inline void invpcid_flush_all_nonglobals(void)
{
__invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
}
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else
@ -104,6 +153,15 @@ static inline void __native_flush_tlb_global(void)
{
unsigned long flags;
if (static_cpu_has(X86_FEATURE_INVPCID)) {
/*
* Using INVPCID is considerably faster than a pair of writes
* to CR4 sandwiched inside an IRQ flag save/restore.
*/
invpcid_flush_all();
return;
}
/*
* Read-modify-write to CR4 - protect it from preemption and
* from interrupts. (Use the raw variant because this code can

View File

@ -29,6 +29,8 @@ static inline cycles_t get_cycles(void)
return rdtsc();
}
extern struct system_counterval_t convert_art_to_tsc(cycle_t art);
extern void tsc_init(void);
extern void mark_tsc_unstable(char *reason);
extern int unsynchronized_tsc(void);

View File

@ -8,7 +8,7 @@
#include <linux/errno.h>
#include <linux/lockdep.h>
#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/page.h>
/*

View File

@ -13,9 +13,6 @@ struct vdso_image {
void *data;
unsigned long size; /* Always a multiple of PAGE_SIZE */
/* text_mapping.pages is big enough for data/size page pointers */
struct vm_special_mapping text_mapping;
unsigned long alt, alt_len;
long sym_vvar_start; /* Negative offset to the vvar area */

View File

@ -37,6 +37,12 @@ struct vsyscall_gtod_data {
};
extern struct vsyscall_gtod_data vsyscall_gtod_data;
extern int vclocks_used;
static inline bool vclock_was_used(int vclock)
{
return READ_ONCE(vclocks_used) & (1 << vclock);
}
static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s)
{
unsigned ret;

View File

@ -256,7 +256,7 @@ struct sigcontext_64 {
__u16 cs;
__u16 gs;
__u16 fs;
__u16 __pad0;
__u16 ss;
__u64 err;
__u64 trapno;
__u64 oldmask;
@ -341,9 +341,37 @@ struct sigcontext {
__u64 rip;
__u64 eflags; /* RFLAGS */
__u16 cs;
/*
* Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"),
* Linux saved and restored fs and gs in these slots. This
* was counterproductive, as fsbase and gsbase were never
* saved, so arch_prctl was presumably unreliable.
*
* These slots should never be reused without extreme caution:
*
* - Some DOSEMU versions stash fs and gs in these slots manually,
* thus overwriting anything the kernel expects to be preserved
* in these slots.
*
* - If these slots are ever needed for any other purpose,
* there is some risk that very old 64-bit binaries could get
* confused. I doubt that many such binaries still work,
* though, since the same patch in 2.5.64 also removed the
* 64-bit set_thread_area syscall, so it appears that there
* is no TLS API beyond modify_ldt that works in both pre-
* and post-2.5.64 kernels.
*
* If the kernel ever adds explicit fs, gs, fsbase, and gsbase
* save/restore, it will most likely need to be opt-in and use
* different context slots.
*/
__u16 gs;
__u16 fs;
__u16 __pad0;
union {
__u16 ss; /* If UC_SIGCONTEXT_SS */
__u16 __pad0; /* Alias name for old (!UC_SIGCONTEXT_SS) user-space */
};
__u64 err;
__u64 trapno;
__u64 oldmask;

View File

@ -1,11 +1,54 @@
#ifndef _ASM_X86_UCONTEXT_H
#define _ASM_X86_UCONTEXT_H
#define UC_FP_XSTATE 0x1 /* indicates the presence of extended state
* information in the memory layout pointed
* by the fpstate pointer in the ucontext's
* sigcontext struct (uc_mcontext).
*/
/*
* Indicates the presence of extended state information in the memory
* layout pointed by the fpstate pointer in the ucontext's sigcontext
* struct (uc_mcontext).
*/
#define UC_FP_XSTATE 0x1
#ifdef __x86_64__
/*
* UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
* kernels that save SS in the sigcontext. All kernels that set
* UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
* regardless of SS (i.e. they implement espfix).
*
* Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
* when delivering a signal that came from 64-bit code.
*
* Sigreturn restores SS as follows:
*
* if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
* saved CS is not 64-bit)
* new SS = saved SS (will fail IRET and signal if invalid)
* else
* new SS = a flat 32-bit data segment
*
* This behavior serves three purposes:
*
* - Legacy programs that construct a 64-bit sigcontext from scratch
* with zero or garbage in the SS slot (e.g. old CRIU) and call
* sigreturn will still work.
*
* - Old DOSEMU versions sometimes catch a signal from a segmented
* context, delete the old SS segment (with modify_ldt), and change
* the saved CS to a 64-bit segment. These DOSEMU versions expect
* sigreturn to send them back to 64-bit mode without killing them,
* despite the fact that the SS selector when the signal was raised is
* no longer valid. UC_STRICT_RESTORE_SS will be clear, so the kernel
* will fix up SS for these DOSEMU versions.
*
* - Old and new programs that catch a signal and return without
* modifying the saved context will end up in exactly the state they
* started in, even if they were running in a segmented context when
* the signal was raised.. Old kernels would lose track of the
* previous SS value.
*/
#define UC_SIGCONTEXT_SS 0x2
#define UC_STRICT_RESTORE_SS 0x4
#endif
#include <asm-generic/ucontext.h>

View File

@ -53,7 +53,7 @@ void flat_init_apic_ldr(void)
apic_write(APIC_LDR, val);
}
static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
static void _flat_send_IPI_mask(unsigned long mask, int vector)
{
unsigned long flags;

View File

@ -30,7 +30,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x)
unsigned long value;
unsigned int id = (x >> 24) & 0xff;
if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
rdmsrl(MSR_FAM10H_NODE_ID, value);
id |= (value << 2) & 0xff00;
}
@ -178,7 +178,7 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
this_cpu_write(cpu_llc_id, node);
/* Account for nodes per socket in multi-core-module processors */
if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
rdmsrl(MSR_FAM10H_NODE_ID, val);
nodes = ((val >> 3) & 7) + 1;
}

View File

@ -18,6 +18,66 @@
#include <asm/proto.h>
#include <asm/ipi.h>
void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest)
{
/*
* Subtle. In the case of the 'never do double writes' workaround
* we have to lock out interrupts to be safe. As we don't care
* of the value read we use an atomic rmw access to avoid costly
* cli/sti. Otherwise we use an even cheaper single atomic write
* to the APIC.
*/
unsigned int cfg;
/*
* Wait for idle.
*/
__xapic_wait_icr_idle();
/*
* No need to touch the target chip field
*/
cfg = __prepare_ICR(shortcut, vector, dest);
/*
* Send the IPI. The write to APIC_ICR fires this off.
*/
native_apic_mem_write(APIC_ICR, cfg);
}
/*
* This is used to send an IPI with no shorthand notation (the destination is
* specified in bits 56 to 63 of the ICR).
*/
void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest)
{
unsigned long cfg;
/*
* Wait for idle.
*/
if (unlikely(vector == NMI_VECTOR))
safe_apic_wait_icr_idle();
else
__xapic_wait_icr_idle();
/*
* prepare target chip field
*/
cfg = __prepare_ICR2(mask);
native_apic_mem_write(APIC_ICR2, cfg);
/*
* program the ICR
*/
cfg = __prepare_ICR(0, vector, dest);
/*
* Send the IPI. The write to APIC_ICR fires this off.
*/
native_apic_mem_write(APIC_ICR, cfg);
}
void default_send_IPI_single_phys(int cpu, int vector)
{
unsigned long flags;

View File

@ -59,7 +59,6 @@ void common(void) {
#ifdef CONFIG_PARAVIRT
BLANK();
OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);

Some files were not shown because too many files have changed in this diff Show More