mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-16 16:54:20 +08:00
Fix AMDTEE memory leak in amdtee_open_session()
-----BEGIN PGP SIGNATURE----- iQJOBAABCgA4FiEEK3Mh2f1G6lqRPfDvpvbt8nX18hUFAl5X7PcaHGplbnMud2lr bGFuZGVyQGxpbmFyby5vcmcACgkQpvbt8nX18hXjKA/9HGAnIBnn2poGnrrIQ+b9 +Txa2DawaS/HgMm3WsCqWWY5pkSeB4tVfyOA+k7NcAlW8XtX5jvDlZRHz/t2wIj5 7ZS/HpBmOzpdeEfvKe/+lOyL81Ugvwitpxy/Az2KbtZtoTNekpCP2XqBvxr7WCkt nS28OWpSJZlY7bhahIsb20rwL/QIpkhiQNITlmzTuaeoWYvwrApuOeSKHbBNhqez cBP3aYK1aNvCKfCV6m08LW/09qrZ4c+rk9ZfA5LxPZUIsMKgpez/AkU7lhtcYh0U YS3FLpo6w8CG91H3lBihoTj8mh5WlNj2H00Vd6wuPfwOOkAtDxyOAHi5MasArmIK eeI+W1NKiAWavZ2ULlPHb6g1ekSZ/r0zuffe7MAw37yFWg0AuZq3e4tQ/o6i3e6F CznIJqh0tFmDsdF387XPsIuWYJeTQ0rQvxRDjYeIGmnLGFXb2gfsaU06bMCzO4Mb fXoGsFmr77NuL3a1kdnbOuODkMCvHArKmn/hjzCksDE1DWeQVFux1LZEV/VbRBVR zNrWcDzi+fVgpoPcqJ3IRoTVdDYGPWv/F8182CNAwdOFmgAmZWtHosJse+zTLUJL xKIYw2nNhC3/OoAt3DZ+Dq7GVrvH+GrmAS0C+CJf08naawYcKCB5C478yaw85umt fJt0s+9N02Q3g+rsUalSZ9Y= =8Bsc -----END PGP SIGNATURE----- Merge tag 'tee-amdtee-fix-for-5.6' of https://git.linaro.org/people/jens.wiklander/linux-tee into arm/fixes Fix AMDTEE memory leak in amdtee_open_session() * tag 'tee-amdtee-fix-for-5.6' of https://git.linaro.org/people/jens.wiklander/linux-tee: (344 commits) tee: amdtee: fix memory leak in amdtee_open_session() Linux 5.6-rc2 ext4: improve explanation of a mount failure caused by a misconfigured kernel Input: cyapa - replace zero-length array with flexible-array member Input: tca6416-keypad - replace zero-length array with flexible-array member Input: gpio_keys_polled - replace zero-length array with flexible-array member IB/mlx5: Use div64_u64 for num_var_hw_entries calculation nvme: fix the parameter order for nvme_get_log in nvme_get_fw_slot_info nvme/pci: move cqe check after device shutdown nvme: prevent warning triggered by nvme_stop_keep_alive nvme/tcp: fix bug on double requeue when send fails cifs: make sure we do not overflow the max EA buffer size cifs: enable change notification for SMB2.1 dialect netdevice.h: fix all kernel-doc and Sphinx warnings net: dsa: tag_ar9331: Make sure there is headroom for tag net: dsa: tag_qca: Make sure there is headroom for tag net, ip6_tunnel: enhance tunnel locate with link check net/smc: no peer ID in CLC decline for SMCD net/smc: transfer fasync_list in case of fallback net: hns3: fix a copying IPv6 address error in hclge_fd_get_flow_tuples() ... Link: https://lore.kernel.org/r/20200227165205.GA7926@jade Signed-off-by: Olof Johansson <olof@lixom.net>
This commit is contained in:
commit
f9a15f39e5
@ -1,9 +1,10 @@
|
||||
Ilitek ILI210x/ILI2117/ILI251x touchscreen controller
|
||||
Ilitek ILI210x/ILI2117/ILI2120/ILI251x touchscreen controller
|
||||
|
||||
Required properties:
|
||||
- compatible:
|
||||
ilitek,ili210x for ILI210x
|
||||
ilitek,ili2117 for ILI2117
|
||||
ilitek,ili2120 for ILI2120
|
||||
ilitek,ili251x for ILI251x
|
||||
|
||||
- reg: The I2C address of the device
|
||||
|
@ -71,9 +71,13 @@ b) Example for device tree::
|
||||
ipmb@10 {
|
||||
compatible = "ipmb-dev";
|
||||
reg = <0x10>;
|
||||
i2c-protocol;
|
||||
};
|
||||
};
|
||||
|
||||
If xmit of data to be done using raw i2c block vs smbus
|
||||
then "i2c-protocol" needs to be defined as above.
|
||||
|
||||
2) Manually from Linux::
|
||||
|
||||
modprobe ipmb-dev-int
|
||||
|
@ -1,9 +1,11 @@
|
||||
==================
|
||||
Guest halt polling
|
||||
==================
|
||||
|
||||
The cpuidle_haltpoll driver, with the haltpoll governor, allows
|
||||
the guest vcpus to poll for a specified amount of time before
|
||||
halting.
|
||||
|
||||
This provides the following benefits to host side polling:
|
||||
|
||||
1) The POLL flag is set while polling is performed, which allows
|
||||
@ -29,18 +31,21 @@ Module Parameters
|
||||
The haltpoll governor has 5 tunable module parameters:
|
||||
|
||||
1) guest_halt_poll_ns:
|
||||
|
||||
Maximum amount of time, in nanoseconds, that polling is
|
||||
performed before halting.
|
||||
|
||||
Default: 200000
|
||||
|
||||
2) guest_halt_poll_shrink:
|
||||
|
||||
Division factor used to shrink per-cpu guest_halt_poll_ns when
|
||||
wakeup event occurs after the global guest_halt_poll_ns.
|
||||
|
||||
Default: 2
|
||||
|
||||
3) guest_halt_poll_grow:
|
||||
|
||||
Multiplication factor used to grow per-cpu guest_halt_poll_ns
|
||||
when event occurs after per-cpu guest_halt_poll_ns
|
||||
but before global guest_halt_poll_ns.
|
||||
@ -48,6 +53,7 @@ but before global guest_halt_poll_ns.
|
||||
Default: 2
|
||||
|
||||
4) guest_halt_poll_grow_start:
|
||||
|
||||
The per-cpu guest_halt_poll_ns eventually reaches zero
|
||||
in case of an idle system. This value sets the initial
|
||||
per-cpu guest_halt_poll_ns when growing. This can
|
||||
@ -66,7 +72,7 @@ high once achieves global guest_halt_poll_ns value).
|
||||
|
||||
Default: Y
|
||||
|
||||
The module parameters can be set from the debugfs files in:
|
||||
The module parameters can be set from the debugfs files in::
|
||||
|
||||
/sys/module/haltpoll/parameters/
|
||||
|
||||
@ -74,5 +80,5 @@ Further Notes
|
||||
=============
|
||||
|
||||
- Care should be taken when setting the guest_halt_poll_ns parameter as a
|
||||
large value has the potential to drive the cpu usage to 100% on a machine which
|
||||
would be almost entirely idle otherwise.
|
||||
large value has the potential to drive the cpu usage to 100% on a machine
|
||||
which would be almost entirely idle otherwise.
|
@ -8,7 +8,9 @@ Linux Virtualization Support
|
||||
:maxdepth: 2
|
||||
|
||||
kvm/index
|
||||
uml/user_mode_linux
|
||||
paravirt_ops
|
||||
guest-halt-polling
|
||||
|
||||
.. only:: html and subproject
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,8 @@
|
||||
* Internal ABI between the kernel and HYP
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=======================================
|
||||
Internal ABI between the kernel and HYP
|
||||
=======================================
|
||||
|
||||
This file documents the interaction between the Linux kernel and the
|
||||
hypervisor layer when running Linux as a hypervisor (for example
|
||||
@ -19,25 +23,31 @@ and only act on individual CPUs.
|
||||
Unless specified otherwise, any built-in hypervisor must implement
|
||||
these functions (see arch/arm{,64}/include/asm/virt.h):
|
||||
|
||||
* r0/x0 = HVC_SET_VECTORS
|
||||
r1/x1 = vectors
|
||||
* ::
|
||||
|
||||
r0/x0 = HVC_SET_VECTORS
|
||||
r1/x1 = vectors
|
||||
|
||||
Set HVBAR/VBAR_EL2 to 'vectors' to enable a hypervisor. 'vectors'
|
||||
must be a physical address, and respect the alignment requirements
|
||||
of the architecture. Only implemented by the initial stubs, not by
|
||||
Linux hypervisors.
|
||||
|
||||
* r0/x0 = HVC_RESET_VECTORS
|
||||
* ::
|
||||
|
||||
r0/x0 = HVC_RESET_VECTORS
|
||||
|
||||
Turn HYP/EL2 MMU off, and reset HVBAR/VBAR_EL2 to the initials
|
||||
stubs' exception vector value. This effectively disables an existing
|
||||
hypervisor.
|
||||
|
||||
* r0/x0 = HVC_SOFT_RESTART
|
||||
r1/x1 = restart address
|
||||
x2 = x0's value when entering the next payload (arm64)
|
||||
x3 = x1's value when entering the next payload (arm64)
|
||||
x4 = x2's value when entering the next payload (arm64)
|
||||
* ::
|
||||
|
||||
r0/x0 = HVC_SOFT_RESTART
|
||||
r1/x1 = restart address
|
||||
x2 = x0's value when entering the next payload (arm64)
|
||||
x3 = x1's value when entering the next payload (arm64)
|
||||
x4 = x2's value when entering the next payload (arm64)
|
||||
|
||||
Mask all exceptions, disable the MMU, move the arguments into place
|
||||
(arm64 only), and jump to the restart address while at HYP/EL2. This
|
12
Documentation/virt/kvm/arm/index.rst
Normal file
12
Documentation/virt/kvm/arm/index.rst
Normal file
@ -0,0 +1,12 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===
|
||||
ARM
|
||||
===
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
hyp-abi
|
||||
psci
|
||||
pvtime
|
@ -1,3 +1,9 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=========================================
|
||||
Power State Coordination Interface (PSCI)
|
||||
=========================================
|
||||
|
||||
KVM implements the PSCI (Power State Coordination Interface)
|
||||
specification in order to provide services such as CPU on/off, reset
|
||||
and power-off to the guest.
|
||||
@ -30,32 +36,42 @@ The following register is defined:
|
||||
- Affects the whole VM (even if the register view is per-vcpu)
|
||||
|
||||
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
|
||||
Holds the state of the firmware support to mitigate CVE-2017-5715, as
|
||||
offered by KVM to the guest via a HVC call. The workaround is described
|
||||
under SMCCC_ARCH_WORKAROUND_1 in [1].
|
||||
Holds the state of the firmware support to mitigate CVE-2017-5715, as
|
||||
offered by KVM to the guest via a HVC call. The workaround is described
|
||||
under SMCCC_ARCH_WORKAROUND_1 in [1].
|
||||
|
||||
Accepted values are:
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL: KVM does not offer
|
||||
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL:
|
||||
KVM does not offer
|
||||
firmware support for the workaround. The mitigation status for the
|
||||
guest is unknown.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL: The workaround HVC call is
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL:
|
||||
The workaround HVC call is
|
||||
available to the guest and required for the mitigation.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED: The workaround HVC call
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED:
|
||||
The workaround HVC call
|
||||
is available to the guest, but it is not needed on this VCPU.
|
||||
|
||||
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
|
||||
Holds the state of the firmware support to mitigate CVE-2018-3639, as
|
||||
offered by KVM to the guest via a HVC call. The workaround is described
|
||||
under SMCCC_ARCH_WORKAROUND_2 in [1].
|
||||
Holds the state of the firmware support to mitigate CVE-2018-3639, as
|
||||
offered by KVM to the guest via a HVC call. The workaround is described
|
||||
under SMCCC_ARCH_WORKAROUND_2 in [1]_.
|
||||
|
||||
Accepted values are:
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL: A workaround is not
|
||||
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL:
|
||||
A workaround is not
|
||||
available. KVM does not offer firmware support for the workaround.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN: The workaround state is
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN:
|
||||
The workaround state is
|
||||
unknown. KVM does not offer firmware support for the workaround.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: The workaround is available,
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
|
||||
The workaround is available,
|
||||
and can be disabled by a vCPU. If
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for
|
||||
this vCPU.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: The workaround is
|
||||
always active on this vCPU or it is not needed.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
|
||||
The workaround is always active on this vCPU or it is not needed.
|
||||
|
||||
[1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf
|
||||
.. [1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf
|
@ -1,3 +1,6 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===============================================
|
||||
ARM Virtual Interrupt Translation Service (ITS)
|
||||
===============================================
|
||||
|
||||
@ -12,22 +15,32 @@ There can be multiple ITS controllers per guest, each of them has to have
|
||||
a separate, non-overlapping MMIO region.
|
||||
|
||||
|
||||
Groups:
|
||||
KVM_DEV_ARM_VGIC_GRP_ADDR
|
||||
Groups
|
||||
======
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_ADDR
|
||||
-------------------------
|
||||
|
||||
Attributes:
|
||||
KVM_VGIC_ITS_ADDR_TYPE (rw, 64-bit)
|
||||
Base address in the guest physical address space of the GICv3 ITS
|
||||
control register frame.
|
||||
This address needs to be 64K aligned and the region covers 128K.
|
||||
|
||||
Errors:
|
||||
-E2BIG: Address outside of addressable IPA range
|
||||
-EINVAL: Incorrectly aligned address
|
||||
-EEXIST: Address already configured
|
||||
-EFAULT: Invalid user pointer for attr->addr.
|
||||
-ENODEV: Incorrect attribute or the ITS is not supported.
|
||||
|
||||
======= =================================================
|
||||
-E2BIG Address outside of addressable IPA range
|
||||
-EINVAL Incorrectly aligned address
|
||||
-EEXIST Address already configured
|
||||
-EFAULT Invalid user pointer for attr->addr.
|
||||
-ENODEV Incorrect attribute or the ITS is not supported.
|
||||
======= =================================================
|
||||
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_CTRL
|
||||
KVM_DEV_ARM_VGIC_GRP_CTRL
|
||||
-------------------------
|
||||
|
||||
Attributes:
|
||||
KVM_DEV_ARM_VGIC_CTRL_INIT
|
||||
request the initialization of the ITS, no additional parameter in
|
||||
@ -58,16 +71,21 @@ Groups:
|
||||
"ITS Restore Sequence".
|
||||
|
||||
Errors:
|
||||
-ENXIO: ITS not properly configured as required prior to setting
|
||||
this attribute
|
||||
-ENOMEM: Memory shortage when allocating ITS internal data
|
||||
-EINVAL: Inconsistent restored data
|
||||
-EFAULT: Invalid guest ram access
|
||||
-EBUSY: One or more VCPUS are running
|
||||
-EACCES: The virtual ITS is backed by a physical GICv4 ITS, and the
|
||||
state is not available
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_ITS_REGS
|
||||
======= ==========================================================
|
||||
-ENXIO ITS not properly configured as required prior to setting
|
||||
this attribute
|
||||
-ENOMEM Memory shortage when allocating ITS internal data
|
||||
-EINVAL Inconsistent restored data
|
||||
-EFAULT Invalid guest ram access
|
||||
-EBUSY One or more VCPUS are running
|
||||
-EACCES The virtual ITS is backed by a physical GICv4 ITS, and the
|
||||
state is not available
|
||||
======= ==========================================================
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_ITS_REGS
|
||||
-----------------------------
|
||||
|
||||
Attributes:
|
||||
The attr field of kvm_device_attr encodes the offset of the
|
||||
ITS register, relative to the ITS control frame base address
|
||||
@ -78,6 +96,7 @@ Groups:
|
||||
be accessed with full length.
|
||||
|
||||
Writes to read-only registers are ignored by the kernel except for:
|
||||
|
||||
- GITS_CREADR. It must be restored otherwise commands in the queue
|
||||
will be re-executed after restoring CWRITER. GITS_CREADR must be
|
||||
restored before restoring the GITS_CTLR which is likely to enable the
|
||||
@ -91,30 +110,36 @@ Groups:
|
||||
|
||||
For other registers, getting or setting a register has the same
|
||||
effect as reading/writing the register on real hardware.
|
||||
Errors:
|
||||
-ENXIO: Offset does not correspond to any supported register
|
||||
-EFAULT: Invalid user pointer for attr->addr
|
||||
-EINVAL: Offset is not 64-bit aligned
|
||||
-EBUSY: one or more VCPUS are running
|
||||
|
||||
ITS Restore Sequence:
|
||||
-------------------------
|
||||
Errors:
|
||||
|
||||
======= ====================================================
|
||||
-ENXIO Offset does not correspond to any supported register
|
||||
-EFAULT Invalid user pointer for attr->addr
|
||||
-EINVAL Offset is not 64-bit aligned
|
||||
-EBUSY one or more VCPUS are running
|
||||
======= ====================================================
|
||||
|
||||
ITS Restore Sequence:
|
||||
---------------------
|
||||
|
||||
The following ordering must be followed when restoring the GIC and the ITS:
|
||||
|
||||
a) restore all guest memory and create vcpus
|
||||
b) restore all redistributors
|
||||
c) provide the ITS base address
|
||||
(KVM_DEV_ARM_VGIC_GRP_ADDR)
|
||||
d) restore the ITS in the following order:
|
||||
1. Restore GITS_CBASER
|
||||
2. Restore all other GITS_ registers, except GITS_CTLR!
|
||||
3. Load the ITS table data (KVM_DEV_ARM_ITS_RESTORE_TABLES)
|
||||
4. Restore GITS_CTLR
|
||||
|
||||
1. Restore GITS_CBASER
|
||||
2. Restore all other ``GITS_`` registers, except GITS_CTLR!
|
||||
3. Load the ITS table data (KVM_DEV_ARM_ITS_RESTORE_TABLES)
|
||||
4. Restore GITS_CTLR
|
||||
|
||||
Then vcpus can be started.
|
||||
|
||||
ITS Table ABI REV0:
|
||||
-------------------
|
||||
ITS Table ABI REV0:
|
||||
-------------------
|
||||
|
||||
Revision 0 of the ABI only supports the features of a virtual GICv3, and does
|
||||
not support a virtual GICv4 with support for direct injection of virtual
|
||||
@ -125,12 +150,13 @@ Then vcpus can be started.
|
||||
entries in the collection are listed in no particular order.
|
||||
All entries are 8 bytes.
|
||||
|
||||
Device Table Entry (DTE):
|
||||
Device Table Entry (DTE)::
|
||||
|
||||
bits: | 63| 62 ... 49 | 48 ... 5 | 4 ... 0 |
|
||||
values: | V | next | ITT_addr | Size |
|
||||
bits: | 63| 62 ... 49 | 48 ... 5 | 4 ... 0 |
|
||||
values: | V | next | ITT_addr | Size |
|
||||
|
||||
where:
|
||||
|
||||
where;
|
||||
- V indicates whether the entry is valid. If not, other fields
|
||||
are not meaningful.
|
||||
- next: equals to 0 if this entry is the last one; otherwise it
|
||||
@ -140,32 +166,34 @@ Then vcpus can be started.
|
||||
- Size specifies the supported number of bits for the EventID,
|
||||
minus one
|
||||
|
||||
Collection Table Entry (CTE):
|
||||
Collection Table Entry (CTE)::
|
||||
|
||||
bits: | 63| 62 .. 52 | 51 ... 16 | 15 ... 0 |
|
||||
values: | V | RES0 | RDBase | ICID |
|
||||
bits: | 63| 62 .. 52 | 51 ... 16 | 15 ... 0 |
|
||||
values: | V | RES0 | RDBase | ICID |
|
||||
|
||||
where:
|
||||
|
||||
- V indicates whether the entry is valid. If not, other fields are
|
||||
not meaningful.
|
||||
- RES0: reserved field with Should-Be-Zero-or-Preserved behavior.
|
||||
- RDBase is the PE number (GICR_TYPER.Processor_Number semantic),
|
||||
- ICID is the collection ID
|
||||
|
||||
Interrupt Translation Entry (ITE):
|
||||
Interrupt Translation Entry (ITE)::
|
||||
|
||||
bits: | 63 ... 48 | 47 ... 16 | 15 ... 0 |
|
||||
values: | next | pINTID | ICID |
|
||||
bits: | 63 ... 48 | 47 ... 16 | 15 ... 0 |
|
||||
values: | next | pINTID | ICID |
|
||||
|
||||
where:
|
||||
|
||||
- next: equals to 0 if this entry is the last one; otherwise it corresponds
|
||||
to the EventID offset to the next ITE capped by 2^16 -1.
|
||||
- pINTID is the physical LPI ID; if zero, it means the entry is not valid
|
||||
and other fields are not meaningful.
|
||||
- ICID is the collection ID
|
||||
|
||||
ITS Reset State:
|
||||
----------------
|
||||
ITS Reset State:
|
||||
----------------
|
||||
|
||||
RESET returns the ITS to the same state that it was when first created and
|
||||
initialized. When the RESET command returns, the following things are
|
@ -1,9 +1,12 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
==============================================================
|
||||
ARM Virtual Generic Interrupt Controller v3 and later (VGICv3)
|
||||
==============================================================
|
||||
|
||||
|
||||
Device types supported:
|
||||
KVM_DEV_TYPE_ARM_VGIC_V3 ARM Generic Interrupt Controller v3.0
|
||||
- KVM_DEV_TYPE_ARM_VGIC_V3 ARM Generic Interrupt Controller v3.0
|
||||
|
||||
Only one VGIC instance may be instantiated through this API. The created VGIC
|
||||
will act as the VM interrupt controller, requiring emulated user-space devices
|
||||
@ -15,7 +18,8 @@ Creating a guest GICv3 device requires a host GICv3 as well.
|
||||
|
||||
Groups:
|
||||
KVM_DEV_ARM_VGIC_GRP_ADDR
|
||||
Attributes:
|
||||
Attributes:
|
||||
|
||||
KVM_VGIC_V3_ADDR_TYPE_DIST (rw, 64-bit)
|
||||
Base address in the guest physical address space of the GICv3 distributor
|
||||
register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
|
||||
@ -29,21 +33,25 @@ Groups:
|
||||
This address needs to be 64K aligned.
|
||||
|
||||
KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION (rw, 64-bit)
|
||||
The attribute data pointed to by kvm_device_attr.addr is a __u64 value:
|
||||
bits: | 63 .... 52 | 51 .... 16 | 15 - 12 |11 - 0
|
||||
values: | count | base | flags | index
|
||||
The attribute data pointed to by kvm_device_attr.addr is a __u64 value::
|
||||
|
||||
bits: | 63 .... 52 | 51 .... 16 | 15 - 12 |11 - 0
|
||||
values: | count | base | flags | index
|
||||
|
||||
- index encodes the unique redistributor region index
|
||||
- flags: reserved for future use, currently 0
|
||||
- base field encodes bits [51:16] of the guest physical base address
|
||||
of the first redistributor in the region.
|
||||
- count encodes the number of redistributors in the region. Must be
|
||||
greater than 0.
|
||||
|
||||
There are two 64K pages for each redistributor in the region and
|
||||
redistributors are laid out contiguously within the region. Regions
|
||||
are filled with redistributors in the index order. The sum of all
|
||||
region count fields must be greater than or equal to the number of
|
||||
VCPUs. Redistributor regions must be registered in the incremental
|
||||
index order, starting from index 0.
|
||||
|
||||
The characteristics of a specific redistributor region can be read
|
||||
by presetting the index field in the attr data.
|
||||
Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
|
||||
@ -52,23 +60,27 @@ Groups:
|
||||
KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION attributes.
|
||||
|
||||
Errors:
|
||||
-E2BIG: Address outside of addressable IPA range
|
||||
-EINVAL: Incorrectly aligned address, bad redistributor region
|
||||
|
||||
======= =============================================================
|
||||
-E2BIG Address outside of addressable IPA range
|
||||
-EINVAL Incorrectly aligned address, bad redistributor region
|
||||
count/index, mixed redistributor region attribute usage
|
||||
-EEXIST: Address already configured
|
||||
-ENOENT: Attempt to read the characteristics of a non existing
|
||||
-EEXIST Address already configured
|
||||
-ENOENT Attempt to read the characteristics of a non existing
|
||||
redistributor region
|
||||
-ENXIO: The group or attribute is unknown/unsupported for this device
|
||||
-ENXIO The group or attribute is unknown/unsupported for this device
|
||||
or hardware support is missing.
|
||||
-EFAULT: Invalid user pointer for attr->addr.
|
||||
-EFAULT Invalid user pointer for attr->addr.
|
||||
======= =============================================================
|
||||
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_DIST_REGS
|
||||
KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
|
||||
Attributes:
|
||||
The attr field of kvm_device_attr encodes two values:
|
||||
bits: | 63 .... 32 | 31 .... 0 |
|
||||
values: | mpidr | offset |
|
||||
KVM_DEV_ARM_VGIC_GRP_DIST_REGS, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
|
||||
Attributes:
|
||||
|
||||
The attr field of kvm_device_attr encodes two values::
|
||||
|
||||
bits: | 63 .... 32 | 31 .... 0 |
|
||||
values: | mpidr | offset |
|
||||
|
||||
All distributor regs are (rw, 32-bit) and kvm_device_attr.addr points to a
|
||||
__u32 value. 64-bit registers must be accessed by separately accessing the
|
||||
@ -93,7 +105,8 @@ Groups:
|
||||
redistributor is accessed. The mpidr is ignored for the distributor.
|
||||
|
||||
The mpidr encoding is based on the affinity information in the
|
||||
architecture defined MPIDR, and the field is encoded as follows:
|
||||
architecture defined MPIDR, and the field is encoded as follows::
|
||||
|
||||
| 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
|
||||
| Aff3 | Aff2 | Aff1 | Aff0 |
|
||||
|
||||
@ -148,24 +161,30 @@ Groups:
|
||||
ignored.
|
||||
|
||||
Errors:
|
||||
-ENXIO: Getting or setting this register is not yet supported
|
||||
-EBUSY: One or more VCPUs are running
|
||||
|
||||
====== =====================================================
|
||||
-ENXIO Getting or setting this register is not yet supported
|
||||
-EBUSY One or more VCPUs are running
|
||||
====== =====================================================
|
||||
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS
|
||||
Attributes:
|
||||
The attr field of kvm_device_attr encodes two values:
|
||||
bits: | 63 .... 32 | 31 .... 16 | 15 .... 0 |
|
||||
values: | mpidr | RES | instr |
|
||||
Attributes:
|
||||
|
||||
The attr field of kvm_device_attr encodes two values::
|
||||
|
||||
bits: | 63 .... 32 | 31 .... 16 | 15 .... 0 |
|
||||
values: | mpidr | RES | instr |
|
||||
|
||||
The mpidr field encodes the CPU ID based on the affinity information in the
|
||||
architecture defined MPIDR, and the field is encoded as follows:
|
||||
architecture defined MPIDR, and the field is encoded as follows::
|
||||
|
||||
| 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
|
||||
| Aff3 | Aff2 | Aff1 | Aff0 |
|
||||
|
||||
The instr field encodes the system register to access based on the fields
|
||||
defined in the A64 instruction set encoding for system register access
|
||||
(RES means the bits are reserved for future use and should be zero):
|
||||
(RES means the bits are reserved for future use and should be zero)::
|
||||
|
||||
| 15 ... 14 | 13 ... 11 | 10 ... 7 | 6 ... 3 | 2 ... 0 |
|
||||
| Op 0 | Op1 | CRn | CRm | Op2 |
|
||||
@ -178,26 +197,35 @@ Groups:
|
||||
|
||||
CPU interface registers access is not implemented for AArch32 mode.
|
||||
Error -ENXIO is returned when accessed in AArch32 mode.
|
||||
|
||||
Errors:
|
||||
-ENXIO: Getting or setting this register is not yet supported
|
||||
-EBUSY: VCPU is running
|
||||
-EINVAL: Invalid mpidr or register value supplied
|
||||
|
||||
======= =====================================================
|
||||
-ENXIO Getting or setting this register is not yet supported
|
||||
-EBUSY VCPU is running
|
||||
-EINVAL Invalid mpidr or register value supplied
|
||||
======= =====================================================
|
||||
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_NR_IRQS
|
||||
Attributes:
|
||||
Attributes:
|
||||
|
||||
A value describing the number of interrupts (SGI, PPI and SPI) for
|
||||
this GIC instance, ranging from 64 to 1024, in increments of 32.
|
||||
|
||||
kvm_device_attr.addr points to a __u32 value.
|
||||
|
||||
Errors:
|
||||
-EINVAL: Value set is out of the expected range
|
||||
-EBUSY: Value has already be set.
|
||||
|
||||
======= ======================================
|
||||
-EINVAL Value set is out of the expected range
|
||||
-EBUSY Value has already be set.
|
||||
======= ======================================
|
||||
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_CTRL
|
||||
Attributes:
|
||||
Attributes:
|
||||
|
||||
KVM_DEV_ARM_VGIC_CTRL_INIT
|
||||
request the initialization of the VGIC, no additional parameter in
|
||||
kvm_device_attr.addr.
|
||||
@ -205,20 +233,26 @@ Groups:
|
||||
save all LPI pending bits into guest RAM pending tables.
|
||||
|
||||
The first kB of the pending table is not altered by this operation.
|
||||
|
||||
Errors:
|
||||
-ENXIO: VGIC not properly configured as required prior to calling
|
||||
this attribute
|
||||
-ENODEV: no online VCPU
|
||||
-ENOMEM: memory shortage when allocating vgic internal data
|
||||
-EFAULT: Invalid guest ram access
|
||||
-EBUSY: One or more VCPUS are running
|
||||
|
||||
======= ========================================================
|
||||
-ENXIO VGIC not properly configured as required prior to calling
|
||||
this attribute
|
||||
-ENODEV no online VCPU
|
||||
-ENOMEM memory shortage when allocating vgic internal data
|
||||
-EFAULT Invalid guest ram access
|
||||
-EBUSY One or more VCPUS are running
|
||||
======= ========================================================
|
||||
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO
|
||||
Attributes:
|
||||
The attr field of kvm_device_attr encodes the following values:
|
||||
bits: | 63 .... 32 | 31 .... 10 | 9 .... 0 |
|
||||
values: | mpidr | info | vINTID |
|
||||
Attributes:
|
||||
|
||||
The attr field of kvm_device_attr encodes the following values::
|
||||
|
||||
bits: | 63 .... 32 | 31 .... 10 | 9 .... 0 |
|
||||
values: | mpidr | info | vINTID |
|
||||
|
||||
The vINTID specifies which set of IRQs is reported on.
|
||||
|
||||
@ -228,6 +262,7 @@ Groups:
|
||||
VGIC_LEVEL_INFO_LINE_LEVEL:
|
||||
Get/Set the input level of the IRQ line for a set of 32 contiguously
|
||||
numbered interrupts.
|
||||
|
||||
vINTID must be a multiple of 32.
|
||||
|
||||
kvm_device_attr.addr points to a __u32 value which will contain a
|
||||
@ -243,9 +278,14 @@ Groups:
|
||||
reported with the same value regardless of the mpidr specified.
|
||||
|
||||
The mpidr field encodes the CPU ID based on the affinity information in the
|
||||
architecture defined MPIDR, and the field is encoded as follows:
|
||||
architecture defined MPIDR, and the field is encoded as follows::
|
||||
|
||||
| 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
|
||||
| Aff3 | Aff2 | Aff1 | Aff0 |
|
||||
|
||||
Errors:
|
||||
-EINVAL: vINTID is not multiple of 32 or
|
||||
info field is not VGIC_LEVEL_INFO_LINE_LEVEL
|
||||
|
||||
======= =============================================
|
||||
-EINVAL vINTID is not multiple of 32 or info field is
|
||||
not VGIC_LEVEL_INFO_LINE_LEVEL
|
||||
======= =============================================
|
@ -1,8 +1,12 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
==================================================
|
||||
ARM Virtual Generic Interrupt Controller v2 (VGIC)
|
||||
==================================================
|
||||
|
||||
Device types supported:
|
||||
KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0
|
||||
|
||||
- KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0
|
||||
|
||||
Only one VGIC instance may be instantiated through either this API or the
|
||||
legacy KVM_CREATE_IRQCHIP API. The created VGIC will act as the VM interrupt
|
||||
@ -17,7 +21,8 @@ create both a GICv3 and GICv2 device on the same VM.
|
||||
|
||||
Groups:
|
||||
KVM_DEV_ARM_VGIC_GRP_ADDR
|
||||
Attributes:
|
||||
Attributes:
|
||||
|
||||
KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit)
|
||||
Base address in the guest physical address space of the GIC distributor
|
||||
register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
|
||||
@ -27,19 +32,25 @@ Groups:
|
||||
Base address in the guest physical address space of the GIC virtual cpu
|
||||
interface register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
|
||||
This address needs to be 4K aligned and the region covers 4 KByte.
|
||||
|
||||
Errors:
|
||||
-E2BIG: Address outside of addressable IPA range
|
||||
-EINVAL: Incorrectly aligned address
|
||||
-EEXIST: Address already configured
|
||||
-ENXIO: The group or attribute is unknown/unsupported for this device
|
||||
|
||||
======= =============================================================
|
||||
-E2BIG Address outside of addressable IPA range
|
||||
-EINVAL Incorrectly aligned address
|
||||
-EEXIST Address already configured
|
||||
-ENXIO The group or attribute is unknown/unsupported for this device
|
||||
or hardware support is missing.
|
||||
-EFAULT: Invalid user pointer for attr->addr.
|
||||
-EFAULT Invalid user pointer for attr->addr.
|
||||
======= =============================================================
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_DIST_REGS
|
||||
Attributes:
|
||||
The attr field of kvm_device_attr encodes two values:
|
||||
bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
|
||||
values: | reserved | vcpu_index | offset |
|
||||
Attributes:
|
||||
|
||||
The attr field of kvm_device_attr encodes two values::
|
||||
|
||||
bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
|
||||
values: | reserved | vcpu_index | offset |
|
||||
|
||||
All distributor regs are (rw, 32-bit)
|
||||
|
||||
@ -58,16 +69,22 @@ Groups:
|
||||
KVM_DEV_ARM_VGIC_GRP_DIST_REGS and KVM_DEV_ARM_VGIC_GRP_CPU_REGS) to ensure
|
||||
the expected behavior. Unless GICD_IIDR has been set from userspace, writes
|
||||
to the interrupt group registers (GICD_IGROUPR) are ignored.
|
||||
|
||||
Errors:
|
||||
-ENXIO: Getting or setting this register is not yet supported
|
||||
-EBUSY: One or more VCPUs are running
|
||||
-EINVAL: Invalid vcpu_index supplied
|
||||
|
||||
======= =====================================================
|
||||
-ENXIO Getting or setting this register is not yet supported
|
||||
-EBUSY One or more VCPUs are running
|
||||
-EINVAL Invalid vcpu_index supplied
|
||||
======= =====================================================
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_CPU_REGS
|
||||
Attributes:
|
||||
The attr field of kvm_device_attr encodes two values:
|
||||
bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
|
||||
values: | reserved | vcpu_index | offset |
|
||||
Attributes:
|
||||
|
||||
The attr field of kvm_device_attr encodes two values::
|
||||
|
||||
bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
|
||||
values: | reserved | vcpu_index | offset |
|
||||
|
||||
All CPU interface regs are (rw, 32-bit)
|
||||
|
||||
@ -101,27 +118,39 @@ Groups:
|
||||
value left by 3 places to obtain the actual priority mask level.
|
||||
|
||||
Errors:
|
||||
-ENXIO: Getting or setting this register is not yet supported
|
||||
-EBUSY: One or more VCPUs are running
|
||||
-EINVAL: Invalid vcpu_index supplied
|
||||
|
||||
======= =====================================================
|
||||
-ENXIO Getting or setting this register is not yet supported
|
||||
-EBUSY One or more VCPUs are running
|
||||
-EINVAL Invalid vcpu_index supplied
|
||||
======= =====================================================
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_NR_IRQS
|
||||
Attributes:
|
||||
Attributes:
|
||||
|
||||
A value describing the number of interrupts (SGI, PPI and SPI) for
|
||||
this GIC instance, ranging from 64 to 1024, in increments of 32.
|
||||
|
||||
Errors:
|
||||
-EINVAL: Value set is out of the expected range
|
||||
-EBUSY: Value has already be set, or GIC has already been initialized
|
||||
with default values.
|
||||
|
||||
======= =============================================================
|
||||
-EINVAL Value set is out of the expected range
|
||||
-EBUSY Value has already be set, or GIC has already been initialized
|
||||
with default values.
|
||||
======= =============================================================
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_CTRL
|
||||
Attributes:
|
||||
Attributes:
|
||||
|
||||
KVM_DEV_ARM_VGIC_CTRL_INIT
|
||||
request the initialization of the VGIC or ITS, no additional parameter
|
||||
in kvm_device_attr.addr.
|
||||
|
||||
Errors:
|
||||
-ENXIO: VGIC not properly configured as required prior to calling
|
||||
this attribute
|
||||
-ENODEV: no online VCPU
|
||||
-ENOMEM: memory shortage when allocating vgic internal data
|
||||
|
||||
======= =========================================================
|
||||
-ENXIO VGIC not properly configured as required prior to calling
|
||||
this attribute
|
||||
-ENODEV no online VCPU
|
||||
-ENOMEM memory shortage when allocating vgic internal data
|
||||
======= =========================================================
|
19
Documentation/virt/kvm/devices/index.rst
Normal file
19
Documentation/virt/kvm/devices/index.rst
Normal file
@ -0,0 +1,19 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=======
|
||||
Devices
|
||||
=======
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
arm-vgic-its
|
||||
arm-vgic
|
||||
arm-vgic-v3
|
||||
mpic
|
||||
s390_flic
|
||||
vcpu
|
||||
vfio
|
||||
vm
|
||||
xics
|
||||
xive
|
@ -1,9 +1,13 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=========================
|
||||
MPIC interrupt controller
|
||||
=========================
|
||||
|
||||
Device types supported:
|
||||
KVM_DEV_TYPE_FSL_MPIC_20 Freescale MPIC v2.0
|
||||
KVM_DEV_TYPE_FSL_MPIC_42 Freescale MPIC v4.2
|
||||
|
||||
- KVM_DEV_TYPE_FSL_MPIC_20 Freescale MPIC v2.0
|
||||
- KVM_DEV_TYPE_FSL_MPIC_42 Freescale MPIC v4.2
|
||||
|
||||
Only one MPIC instance, of any type, may be instantiated. The created
|
||||
MPIC will act as the system interrupt controller, connecting to each
|
||||
@ -11,7 +15,8 @@ vcpu's interrupt inputs.
|
||||
|
||||
Groups:
|
||||
KVM_DEV_MPIC_GRP_MISC
|
||||
Attributes:
|
||||
Attributes:
|
||||
|
||||
KVM_DEV_MPIC_BASE_ADDR (rw, 64-bit)
|
||||
Base address of the 256 KiB MPIC register space. Must be
|
||||
naturally aligned. A value of zero disables the mapping.
|
@ -1,3 +1,6 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
====================================
|
||||
FLIC (floating interrupt controller)
|
||||
====================================
|
||||
|
||||
@ -31,8 +34,10 @@ Groups:
|
||||
Copies all floating interrupts into a buffer provided by userspace.
|
||||
When the buffer is too small it returns -ENOMEM, which is the indication
|
||||
for userspace to try again with a bigger buffer.
|
||||
|
||||
-ENOBUFS is returned when the allocation of a kernelspace buffer has
|
||||
failed.
|
||||
|
||||
-EFAULT is returned when copying data to userspace failed.
|
||||
All interrupts remain pending, i.e. are not deleted from the list of
|
||||
currently pending interrupts.
|
||||
@ -60,38 +65,41 @@ Groups:
|
||||
|
||||
KVM_DEV_FLIC_ADAPTER_REGISTER
|
||||
Register an I/O adapter interrupt source. Takes a kvm_s390_io_adapter
|
||||
describing the adapter to register:
|
||||
describing the adapter to register::
|
||||
|
||||
struct kvm_s390_io_adapter {
|
||||
__u32 id;
|
||||
__u8 isc;
|
||||
__u8 maskable;
|
||||
__u8 swap;
|
||||
__u8 flags;
|
||||
};
|
||||
struct kvm_s390_io_adapter {
|
||||
__u32 id;
|
||||
__u8 isc;
|
||||
__u8 maskable;
|
||||
__u8 swap;
|
||||
__u8 flags;
|
||||
};
|
||||
|
||||
id contains the unique id for the adapter, isc the I/O interruption subclass
|
||||
to use, maskable whether this adapter may be masked (interrupts turned off),
|
||||
swap whether the indicators need to be byte swapped, and flags contains
|
||||
further characteristics of the adapter.
|
||||
|
||||
Currently defined values for 'flags' are:
|
||||
|
||||
- KVM_S390_ADAPTER_SUPPRESSIBLE: adapter is subject to AIS
|
||||
(adapter-interrupt-suppression) facility. This flag only has an effect if
|
||||
the AIS capability is enabled.
|
||||
|
||||
Unknown flag values are ignored.
|
||||
|
||||
|
||||
KVM_DEV_FLIC_ADAPTER_MODIFY
|
||||
Modifies attributes of an existing I/O adapter interrupt source. Takes
|
||||
a kvm_s390_io_adapter_req specifying the adapter and the operation:
|
||||
a kvm_s390_io_adapter_req specifying the adapter and the operation::
|
||||
|
||||
struct kvm_s390_io_adapter_req {
|
||||
__u32 id;
|
||||
__u8 type;
|
||||
__u8 mask;
|
||||
__u16 pad0;
|
||||
__u64 addr;
|
||||
};
|
||||
struct kvm_s390_io_adapter_req {
|
||||
__u32 id;
|
||||
__u8 type;
|
||||
__u8 mask;
|
||||
__u16 pad0;
|
||||
__u64 addr;
|
||||
};
|
||||
|
||||
id specifies the adapter and type the operation. The supported operations
|
||||
are:
|
||||
@ -103,8 +111,9 @@ struct kvm_s390_io_adapter_req {
|
||||
perform a gmap translation for the guest address provided in addr,
|
||||
pin a userspace page for the translated address and add it to the
|
||||
list of mappings
|
||||
Note: A new mapping will be created unconditionally; therefore,
|
||||
the calling code should avoid making duplicate mappings.
|
||||
|
||||
.. note:: A new mapping will be created unconditionally; therefore,
|
||||
the calling code should avoid making duplicate mappings.
|
||||
|
||||
KVM_S390_IO_ADAPTER_UNMAP
|
||||
release a userspace page for the translated address specified in addr
|
||||
@ -112,16 +121,17 @@ struct kvm_s390_io_adapter_req {
|
||||
|
||||
KVM_DEV_FLIC_AISM
|
||||
modify the adapter-interruption-suppression mode for a given isc if the
|
||||
AIS capability is enabled. Takes a kvm_s390_ais_req describing:
|
||||
AIS capability is enabled. Takes a kvm_s390_ais_req describing::
|
||||
|
||||
struct kvm_s390_ais_req {
|
||||
__u8 isc;
|
||||
__u16 mode;
|
||||
};
|
||||
struct kvm_s390_ais_req {
|
||||
__u8 isc;
|
||||
__u16 mode;
|
||||
};
|
||||
|
||||
isc contains the target I/O interruption subclass, mode the target
|
||||
adapter-interruption-suppression mode. The following modes are
|
||||
currently supported:
|
||||
|
||||
- KVM_S390_AIS_MODE_ALL: ALL-Interruptions Mode, i.e. airq injection
|
||||
is always allowed;
|
||||
- KVM_S390_AIS_MODE_SINGLE: SINGLE-Interruption Mode, i.e. airq
|
||||
@ -139,12 +149,12 @@ struct kvm_s390_ais_req {
|
||||
|
||||
KVM_DEV_FLIC_AISM_ALL
|
||||
Gets or sets the adapter-interruption-suppression mode for all ISCs. Takes
|
||||
a kvm_s390_ais_all describing:
|
||||
a kvm_s390_ais_all describing::
|
||||
|
||||
struct kvm_s390_ais_all {
|
||||
__u8 simm; /* Single-Interruption-Mode mask */
|
||||
__u8 nimm; /* No-Interruption-Mode mask *
|
||||
};
|
||||
struct kvm_s390_ais_all {
|
||||
__u8 simm; /* Single-Interruption-Mode mask */
|
||||
__u8 nimm; /* No-Interruption-Mode mask *
|
||||
};
|
||||
|
||||
simm contains Single-Interruption-Mode mask for all ISCs, nimm contains
|
||||
No-Interruption-Mode mask for all ISCs. Each bit in simm and nimm corresponds
|
||||
@ -159,5 +169,5 @@ ENXIO, as specified in the API documentation). It is not possible to conclude
|
||||
that a FLIC operation is unavailable based on the error code resulting from a
|
||||
usage attempt.
|
||||
|
||||
Note: The KVM_DEV_FLIC_CLEAR_IO_IRQ ioctl will return EINVAL in case a zero
|
||||
schid is specified.
|
||||
.. note:: The KVM_DEV_FLIC_CLEAR_IO_IRQ ioctl will return EINVAL in case a
|
||||
zero schid is specified.
|
114
Documentation/virt/kvm/devices/vcpu.rst
Normal file
114
Documentation/virt/kvm/devices/vcpu.rst
Normal file
@ -0,0 +1,114 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
======================
|
||||
Generic vcpu interface
|
||||
======================
|
||||
|
||||
The virtual cpu "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
|
||||
KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same struct
|
||||
kvm_device_attr as other devices, but targets VCPU-wide settings and controls.
|
||||
|
||||
The groups and attributes per virtual cpu, if any, are architecture specific.
|
||||
|
||||
1. GROUP: KVM_ARM_VCPU_PMU_V3_CTRL
|
||||
==================================
|
||||
|
||||
:Architectures: ARM64
|
||||
|
||||
1.1. ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_IRQ
|
||||
---------------------------------------
|
||||
|
||||
:Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a
|
||||
pointer to an int
|
||||
|
||||
Returns:
|
||||
|
||||
======= ========================================================
|
||||
-EBUSY The PMU overflow interrupt is already set
|
||||
-ENXIO The overflow interrupt not set when attempting to get it
|
||||
-ENODEV PMUv3 not supported
|
||||
-EINVAL Invalid PMU overflow interrupt number supplied or
|
||||
trying to set the IRQ number without using an in-kernel
|
||||
irqchip.
|
||||
======= ========================================================
|
||||
|
||||
A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt
|
||||
number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt
|
||||
type must be same for each vcpu. As a PPI, the interrupt number is the same for
|
||||
all vcpus, while as an SPI it must be a separate number per vcpu.
|
||||
|
||||
1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT
|
||||
---------------------------------------
|
||||
|
||||
:Parameters: no additional parameter in kvm_device_attr.addr
|
||||
|
||||
Returns:
|
||||
|
||||
======= ======================================================
|
||||
-ENODEV PMUv3 not supported or GIC not initialized
|
||||
-ENXIO PMUv3 not properly configured or in-kernel irqchip not
|
||||
configured as required prior to calling this attribute
|
||||
-EBUSY PMUv3 already initialized
|
||||
======= ======================================================
|
||||
|
||||
Request the initialization of the PMUv3. If using the PMUv3 with an in-kernel
|
||||
virtual GIC implementation, this must be done after initializing the in-kernel
|
||||
irqchip.
|
||||
|
||||
|
||||
2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
|
||||
=================================
|
||||
|
||||
:Architectures: ARM, ARM64
|
||||
|
||||
2.1. ATTRIBUTES: KVM_ARM_VCPU_TIMER_IRQ_VTIMER, KVM_ARM_VCPU_TIMER_IRQ_PTIMER
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
:Parameters: in kvm_device_attr.addr the address for the timer interrupt is a
|
||||
pointer to an int
|
||||
|
||||
Returns:
|
||||
|
||||
======= =================================
|
||||
-EINVAL Invalid timer interrupt number
|
||||
-EBUSY One or more VCPUs has already run
|
||||
======= =================================
|
||||
|
||||
A value describing the architected timer interrupt number when connected to an
|
||||
in-kernel virtual GIC. These must be a PPI (16 <= intid < 32). Setting the
|
||||
attribute overrides the default values (see below).
|
||||
|
||||
============================= ==========================================
|
||||
KVM_ARM_VCPU_TIMER_IRQ_VTIMER The EL1 virtual timer intid (default: 27)
|
||||
KVM_ARM_VCPU_TIMER_IRQ_PTIMER The EL1 physical timer intid (default: 30)
|
||||
============================= ==========================================
|
||||
|
||||
Setting the same PPI for different timers will prevent the VCPUs from running.
|
||||
Setting the interrupt number on a VCPU configures all VCPUs created at that
|
||||
time to use the number provided for a given timer, overwriting any previously
|
||||
configured values on other VCPUs. Userspace should configure the interrupt
|
||||
numbers on at least one VCPU after creating all VCPUs and before running any
|
||||
VCPUs.
|
||||
|
||||
3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL
|
||||
==================================
|
||||
|
||||
:Architectures: ARM64
|
||||
|
||||
3.1 ATTRIBUTE: KVM_ARM_VCPU_PVTIME_IPA
|
||||
--------------------------------------
|
||||
|
||||
:Parameters: 64-bit base address
|
||||
|
||||
Returns:
|
||||
|
||||
======= ======================================
|
||||
-ENXIO Stolen time not implemented
|
||||
-EEXIST Base address already set for this VCPU
|
||||
-EINVAL Base address not 64 byte aligned
|
||||
======= ======================================
|
||||
|
||||
Specifies the base address of the stolen time structure for this VCPU. The
|
||||
base address must be 64 byte aligned and exist within a valid guest memory
|
||||
region. See Documentation/virt/kvm/arm/pvtime.txt for more information
|
||||
including the layout of the stolen time structure.
|
@ -1,76 +0,0 @@
|
||||
Generic vcpu interface
|
||||
====================================
|
||||
|
||||
The virtual cpu "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
|
||||
KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same struct
|
||||
kvm_device_attr as other devices, but targets VCPU-wide settings and controls.
|
||||
|
||||
The groups and attributes per virtual cpu, if any, are architecture specific.
|
||||
|
||||
1. GROUP: KVM_ARM_VCPU_PMU_V3_CTRL
|
||||
Architectures: ARM64
|
||||
|
||||
1.1. ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_IRQ
|
||||
Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a
|
||||
pointer to an int
|
||||
Returns: -EBUSY: The PMU overflow interrupt is already set
|
||||
-ENXIO: The overflow interrupt not set when attempting to get it
|
||||
-ENODEV: PMUv3 not supported
|
||||
-EINVAL: Invalid PMU overflow interrupt number supplied or
|
||||
trying to set the IRQ number without using an in-kernel
|
||||
irqchip.
|
||||
|
||||
A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt
|
||||
number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt
|
||||
type must be same for each vcpu. As a PPI, the interrupt number is the same for
|
||||
all vcpus, while as an SPI it must be a separate number per vcpu.
|
||||
|
||||
1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT
|
||||
Parameters: no additional parameter in kvm_device_attr.addr
|
||||
Returns: -ENODEV: PMUv3 not supported or GIC not initialized
|
||||
-ENXIO: PMUv3 not properly configured or in-kernel irqchip not
|
||||
configured as required prior to calling this attribute
|
||||
-EBUSY: PMUv3 already initialized
|
||||
|
||||
Request the initialization of the PMUv3. If using the PMUv3 with an in-kernel
|
||||
virtual GIC implementation, this must be done after initializing the in-kernel
|
||||
irqchip.
|
||||
|
||||
|
||||
2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
|
||||
Architectures: ARM,ARM64
|
||||
|
||||
2.1. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_VTIMER
|
||||
2.2. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_PTIMER
|
||||
Parameters: in kvm_device_attr.addr the address for the timer interrupt is a
|
||||
pointer to an int
|
||||
Returns: -EINVAL: Invalid timer interrupt number
|
||||
-EBUSY: One or more VCPUs has already run
|
||||
|
||||
A value describing the architected timer interrupt number when connected to an
|
||||
in-kernel virtual GIC. These must be a PPI (16 <= intid < 32). Setting the
|
||||
attribute overrides the default values (see below).
|
||||
|
||||
KVM_ARM_VCPU_TIMER_IRQ_VTIMER: The EL1 virtual timer intid (default: 27)
|
||||
KVM_ARM_VCPU_TIMER_IRQ_PTIMER: The EL1 physical timer intid (default: 30)
|
||||
|
||||
Setting the same PPI for different timers will prevent the VCPUs from running.
|
||||
Setting the interrupt number on a VCPU configures all VCPUs created at that
|
||||
time to use the number provided for a given timer, overwriting any previously
|
||||
configured values on other VCPUs. Userspace should configure the interrupt
|
||||
numbers on at least one VCPU after creating all VCPUs and before running any
|
||||
VCPUs.
|
||||
|
||||
3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL
|
||||
Architectures: ARM64
|
||||
|
||||
3.1 ATTRIBUTE: KVM_ARM_VCPU_PVTIME_IPA
|
||||
Parameters: 64-bit base address
|
||||
Returns: -ENXIO: Stolen time not implemented
|
||||
-EEXIST: Base address already set for this VCPU
|
||||
-EINVAL: Base address not 64 byte aligned
|
||||
|
||||
Specifies the base address of the stolen time structure for this VCPU. The
|
||||
base address must be 64 byte aligned and exist within a valid guest memory
|
||||
region. See Documentation/virt/kvm/arm/pvtime.txt for more information
|
||||
including the layout of the stolen time structure.
|
@ -1,8 +1,12 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===================
|
||||
VFIO virtual device
|
||||
===================
|
||||
|
||||
Device types supported:
|
||||
KVM_DEV_TYPE_VFIO
|
||||
|
||||
- KVM_DEV_TYPE_VFIO
|
||||
|
||||
Only one VFIO instance may be created per VM. The created device
|
||||
tracks VFIO groups in use by the VM and features of those groups
|
||||
@ -23,14 +27,15 @@ KVM_DEV_VFIO_GROUP attributes:
|
||||
for the VFIO group.
|
||||
KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: attaches a guest visible TCE table
|
||||
allocated by sPAPR KVM.
|
||||
kvm_device_attr.addr points to a struct:
|
||||
kvm_device_attr.addr points to a struct::
|
||||
|
||||
struct kvm_vfio_spapr_tce {
|
||||
__s32 groupfd;
|
||||
__s32 tablefd;
|
||||
};
|
||||
struct kvm_vfio_spapr_tce {
|
||||
__s32 groupfd;
|
||||
__s32 tablefd;
|
||||
};
|
||||
|
||||
where
|
||||
@groupfd is a file descriptor for a VFIO group;
|
||||
@tablefd is a file descriptor for a TCE table allocated via
|
||||
KVM_CREATE_SPAPR_TCE.
|
||||
where:
|
||||
|
||||
- @groupfd is a file descriptor for a VFIO group;
|
||||
- @tablefd is a file descriptor for a TCE table allocated via
|
||||
KVM_CREATE_SPAPR_TCE.
|
@ -1,5 +1,8 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
====================
|
||||
Generic vm interface
|
||||
====================================
|
||||
====================
|
||||
|
||||
The virtual machine "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
|
||||
KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same
|
||||
@ -10,30 +13,38 @@ The groups and attributes per virtual machine, if any, are architecture
|
||||
specific.
|
||||
|
||||
1. GROUP: KVM_S390_VM_MEM_CTRL
|
||||
Architectures: s390
|
||||
==============================
|
||||
|
||||
:Architectures: s390
|
||||
|
||||
1.1. ATTRIBUTE: KVM_S390_VM_MEM_ENABLE_CMMA
|
||||
Parameters: none
|
||||
Returns: -EBUSY if a vcpu is already defined, otherwise 0
|
||||
-------------------------------------------
|
||||
|
||||
:Parameters: none
|
||||
:Returns: -EBUSY if a vcpu is already defined, otherwise 0
|
||||
|
||||
Enables Collaborative Memory Management Assist (CMMA) for the virtual machine.
|
||||
|
||||
1.2. ATTRIBUTE: KVM_S390_VM_MEM_CLR_CMMA
|
||||
Parameters: none
|
||||
Returns: -EINVAL if CMMA was not enabled
|
||||
0 otherwise
|
||||
----------------------------------------
|
||||
|
||||
:Parameters: none
|
||||
:Returns: -EINVAL if CMMA was not enabled;
|
||||
0 otherwise
|
||||
|
||||
Clear the CMMA status for all guest pages, so any pages the guest marked
|
||||
as unused are again used any may not be reclaimed by the host.
|
||||
|
||||
1.3. ATTRIBUTE KVM_S390_VM_MEM_LIMIT_SIZE
|
||||
Parameters: in attr->addr the address for the new limit of guest memory
|
||||
Returns: -EFAULT if the given address is not accessible
|
||||
-EINVAL if the virtual machine is of type UCONTROL
|
||||
-E2BIG if the given guest memory is to big for that machine
|
||||
-EBUSY if a vcpu is already defined
|
||||
-ENOMEM if not enough memory is available for a new shadow guest mapping
|
||||
0 otherwise
|
||||
-----------------------------------------
|
||||
|
||||
:Parameters: in attr->addr the address for the new limit of guest memory
|
||||
:Returns: -EFAULT if the given address is not accessible;
|
||||
-EINVAL if the virtual machine is of type UCONTROL;
|
||||
-E2BIG if the given guest memory is to big for that machine;
|
||||
-EBUSY if a vcpu is already defined;
|
||||
-ENOMEM if not enough memory is available for a new shadow guest mapping;
|
||||
0 otherwise.
|
||||
|
||||
Allows userspace to query the actual limit and set a new limit for
|
||||
the maximum guest memory size. The limit will be rounded up to
|
||||
@ -42,78 +53,92 @@ the number of page table levels. In the case that there is no limit we will set
|
||||
the limit to KVM_S390_NO_MEM_LIMIT (U64_MAX).
|
||||
|
||||
2. GROUP: KVM_S390_VM_CPU_MODEL
|
||||
Architectures: s390
|
||||
===============================
|
||||
|
||||
:Architectures: s390
|
||||
|
||||
2.1. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE (r/o)
|
||||
---------------------------------------------
|
||||
|
||||
Allows user space to retrieve machine and kvm specific cpu related information:
|
||||
Allows user space to retrieve machine and kvm specific cpu related information::
|
||||
|
||||
struct kvm_s390_vm_cpu_machine {
|
||||
struct kvm_s390_vm_cpu_machine {
|
||||
__u64 cpuid; # CPUID of host
|
||||
__u32 ibc; # IBC level range offered by host
|
||||
__u8 pad[4];
|
||||
__u64 fac_mask[256]; # set of cpu facilities enabled by KVM
|
||||
__u64 fac_list[256]; # set of cpu facilities offered by host
|
||||
}
|
||||
}
|
||||
|
||||
Parameters: address of buffer to store the machine related cpu data
|
||||
of type struct kvm_s390_vm_cpu_machine*
|
||||
Returns: -EFAULT if the given address is not accessible from kernel space
|
||||
-ENOMEM if not enough memory is available to process the ioctl
|
||||
0 in case of success
|
||||
:Parameters: address of buffer to store the machine related cpu data
|
||||
of type struct kvm_s390_vm_cpu_machine*
|
||||
:Returns: -EFAULT if the given address is not accessible from kernel space;
|
||||
-ENOMEM if not enough memory is available to process the ioctl;
|
||||
0 in case of success.
|
||||
|
||||
2.2. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR (r/w)
|
||||
===============================================
|
||||
|
||||
Allows user space to retrieve or request to change cpu related information for a vcpu:
|
||||
Allows user space to retrieve or request to change cpu related information for a vcpu::
|
||||
|
||||
struct kvm_s390_vm_cpu_processor {
|
||||
struct kvm_s390_vm_cpu_processor {
|
||||
__u64 cpuid; # CPUID currently (to be) used by this vcpu
|
||||
__u16 ibc; # IBC level currently (to be) used by this vcpu
|
||||
__u8 pad[6];
|
||||
__u64 fac_list[256]; # set of cpu facilities currently (to be) used
|
||||
# by this vcpu
|
||||
}
|
||||
# by this vcpu
|
||||
}
|
||||
|
||||
KVM does not enforce or limit the cpu model data in any form. Take the information
|
||||
retrieved by means of KVM_S390_VM_CPU_MACHINE as hint for reasonable configuration
|
||||
setups. Instruction interceptions triggered by additionally set facility bits that
|
||||
are not handled by KVM need to by imlemented in the VM driver code.
|
||||
|
||||
Parameters: address of buffer to store/set the processor related cpu
|
||||
data of type struct kvm_s390_vm_cpu_processor*.
|
||||
Returns: -EBUSY in case 1 or more vcpus are already activated (only in write case)
|
||||
-EFAULT if the given address is not accessible from kernel space
|
||||
-ENOMEM if not enough memory is available to process the ioctl
|
||||
0 in case of success
|
||||
:Parameters: address of buffer to store/set the processor related cpu
|
||||
data of type struct kvm_s390_vm_cpu_processor*.
|
||||
:Returns: -EBUSY in case 1 or more vcpus are already activated (only in write case);
|
||||
-EFAULT if the given address is not accessible from kernel space;
|
||||
-ENOMEM if not enough memory is available to process the ioctl;
|
||||
0 in case of success.
|
||||
|
||||
.. _KVM_S390_VM_CPU_MACHINE_FEAT:
|
||||
|
||||
2.3. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_FEAT (r/o)
|
||||
--------------------------------------------------
|
||||
|
||||
Allows user space to retrieve available cpu features. A feature is available if
|
||||
provided by the hardware and supported by kvm. In theory, cpu features could
|
||||
even be completely emulated by kvm.
|
||||
|
||||
struct kvm_s390_vm_cpu_feat {
|
||||
__u64 feat[16]; # Bitmap (1 = feature available), MSB 0 bit numbering
|
||||
};
|
||||
::
|
||||
|
||||
Parameters: address of a buffer to load the feature list from.
|
||||
Returns: -EFAULT if the given address is not accessible from kernel space.
|
||||
0 in case of success.
|
||||
struct kvm_s390_vm_cpu_feat {
|
||||
__u64 feat[16]; # Bitmap (1 = feature available), MSB 0 bit numbering
|
||||
};
|
||||
|
||||
:Parameters: address of a buffer to load the feature list from.
|
||||
:Returns: -EFAULT if the given address is not accessible from kernel space;
|
||||
0 in case of success.
|
||||
|
||||
2.4. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_FEAT (r/w)
|
||||
----------------------------------------------------
|
||||
|
||||
Allows user space to retrieve or change enabled cpu features for all VCPUs of a
|
||||
VM. Features that are not available cannot be enabled.
|
||||
|
||||
See 2.3. for a description of the parameter struct.
|
||||
See :ref:`KVM_S390_VM_CPU_MACHINE_FEAT` for
|
||||
a description of the parameter struct.
|
||||
|
||||
Parameters: address of a buffer to store/load the feature list from.
|
||||
Returns: -EFAULT if the given address is not accessible from kernel space.
|
||||
-EINVAL if a cpu feature that is not available is to be enabled.
|
||||
-EBUSY if at least one VCPU has already been defined.
|
||||
:Parameters: address of a buffer to store/load the feature list from.
|
||||
:Returns: -EFAULT if the given address is not accessible from kernel space;
|
||||
-EINVAL if a cpu feature that is not available is to be enabled;
|
||||
-EBUSY if at least one VCPU has already been defined;
|
||||
0 in case of success.
|
||||
|
||||
.. _KVM_S390_VM_CPU_MACHINE_SUBFUNC:
|
||||
|
||||
2.5. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_SUBFUNC (r/o)
|
||||
-----------------------------------------------------
|
||||
|
||||
Allows user space to retrieve available cpu subfunctions without any filtering
|
||||
done by a set IBC. These subfunctions are indicated to the guest VCPU via
|
||||
@ -126,7 +151,9 @@ contained in the returned struct. If the affected instruction
|
||||
indicates subfunctions via a "test bit" mechanism, the subfunction codes are
|
||||
contained in the returned struct in MSB 0 bit numbering.
|
||||
|
||||
struct kvm_s390_vm_cpu_subfunc {
|
||||
::
|
||||
|
||||
struct kvm_s390_vm_cpu_subfunc {
|
||||
u8 plo[32]; # always valid (ESA/390 feature)
|
||||
u8 ptff[16]; # valid with TOD-clock steering
|
||||
u8 kmac[16]; # valid with Message-Security-Assist
|
||||
@ -143,13 +170,14 @@ struct kvm_s390_vm_cpu_subfunc {
|
||||
u8 kma[16]; # valid with Message-Security-Assist-Extension 8
|
||||
u8 kdsa[16]; # valid with Message-Security-Assist-Extension 9
|
||||
u8 reserved[1792]; # reserved for future instructions
|
||||
};
|
||||
};
|
||||
|
||||
Parameters: address of a buffer to load the subfunction blocks from.
|
||||
Returns: -EFAULT if the given address is not accessible from kernel space.
|
||||
:Parameters: address of a buffer to load the subfunction blocks from.
|
||||
:Returns: -EFAULT if the given address is not accessible from kernel space;
|
||||
0 in case of success.
|
||||
|
||||
2.6. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_SUBFUNC (r/w)
|
||||
-------------------------------------------------------
|
||||
|
||||
Allows user space to retrieve or change cpu subfunctions to be indicated for
|
||||
all VCPUs of a VM. This attribute will only be available if kernel and
|
||||
@ -164,107 +192,125 @@ As long as no data has been written, a read will fail. The IBC will be used
|
||||
to determine available subfunctions in this case, this will guarantee backward
|
||||
compatibility.
|
||||
|
||||
See 2.5. for a description of the parameter struct.
|
||||
See :ref:`KVM_S390_VM_CPU_MACHINE_SUBFUNC` for a
|
||||
description of the parameter struct.
|
||||
|
||||
Parameters: address of a buffer to store/load the subfunction blocks from.
|
||||
Returns: -EFAULT if the given address is not accessible from kernel space.
|
||||
-EINVAL when reading, if there was no write yet.
|
||||
-EBUSY if at least one VCPU has already been defined.
|
||||
:Parameters: address of a buffer to store/load the subfunction blocks from.
|
||||
:Returns: -EFAULT if the given address is not accessible from kernel space;
|
||||
-EINVAL when reading, if there was no write yet;
|
||||
-EBUSY if at least one VCPU has already been defined;
|
||||
0 in case of success.
|
||||
|
||||
3. GROUP: KVM_S390_VM_TOD
|
||||
Architectures: s390
|
||||
=========================
|
||||
|
||||
:Architectures: s390
|
||||
|
||||
3.1. ATTRIBUTE: KVM_S390_VM_TOD_HIGH
|
||||
------------------------------------
|
||||
|
||||
Allows user space to set/get the TOD clock extension (u8) (superseded by
|
||||
KVM_S390_VM_TOD_EXT).
|
||||
|
||||
Parameters: address of a buffer in user space to store the data (u8) to
|
||||
Returns: -EFAULT if the given address is not accessible from kernel space
|
||||
:Parameters: address of a buffer in user space to store the data (u8) to
|
||||
:Returns: -EFAULT if the given address is not accessible from kernel space;
|
||||
-EINVAL if setting the TOD clock extension to != 0 is not supported
|
||||
|
||||
3.2. ATTRIBUTE: KVM_S390_VM_TOD_LOW
|
||||
-----------------------------------
|
||||
|
||||
Allows user space to set/get bits 0-63 of the TOD clock register as defined in
|
||||
the POP (u64).
|
||||
|
||||
Parameters: address of a buffer in user space to store the data (u64) to
|
||||
Returns: -EFAULT if the given address is not accessible from kernel space
|
||||
:Parameters: address of a buffer in user space to store the data (u64) to
|
||||
:Returns: -EFAULT if the given address is not accessible from kernel space
|
||||
|
||||
3.3. ATTRIBUTE: KVM_S390_VM_TOD_EXT
|
||||
-----------------------------------
|
||||
|
||||
Allows user space to set/get bits 0-63 of the TOD clock register as defined in
|
||||
the POP (u64). If the guest CPU model supports the TOD clock extension (u8), it
|
||||
also allows user space to get/set it. If the guest CPU model does not support
|
||||
it, it is stored as 0 and not allowed to be set to a value != 0.
|
||||
|
||||
Parameters: address of a buffer in user space to store the data
|
||||
(kvm_s390_vm_tod_clock) to
|
||||
Returns: -EFAULT if the given address is not accessible from kernel space
|
||||
:Parameters: address of a buffer in user space to store the data
|
||||
(kvm_s390_vm_tod_clock) to
|
||||
:Returns: -EFAULT if the given address is not accessible from kernel space;
|
||||
-EINVAL if setting the TOD clock extension to != 0 is not supported
|
||||
|
||||
4. GROUP: KVM_S390_VM_CRYPTO
|
||||
Architectures: s390
|
||||
============================
|
||||
|
||||
:Architectures: s390
|
||||
|
||||
4.1. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_AES_KW (w/o)
|
||||
------------------------------------------------------
|
||||
|
||||
Allows user space to enable aes key wrapping, including generating a new
|
||||
wrapping key.
|
||||
|
||||
Parameters: none
|
||||
Returns: 0
|
||||
:Parameters: none
|
||||
:Returns: 0
|
||||
|
||||
4.2. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_DEA_KW (w/o)
|
||||
------------------------------------------------------
|
||||
|
||||
Allows user space to enable dea key wrapping, including generating a new
|
||||
wrapping key.
|
||||
|
||||
Parameters: none
|
||||
Returns: 0
|
||||
:Parameters: none
|
||||
:Returns: 0
|
||||
|
||||
4.3. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_AES_KW (w/o)
|
||||
-------------------------------------------------------
|
||||
|
||||
Allows user space to disable aes key wrapping, clearing the wrapping key.
|
||||
|
||||
Parameters: none
|
||||
Returns: 0
|
||||
:Parameters: none
|
||||
:Returns: 0
|
||||
|
||||
4.4. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_DEA_KW (w/o)
|
||||
-------------------------------------------------------
|
||||
|
||||
Allows user space to disable dea key wrapping, clearing the wrapping key.
|
||||
|
||||
Parameters: none
|
||||
Returns: 0
|
||||
:Parameters: none
|
||||
:Returns: 0
|
||||
|
||||
5. GROUP: KVM_S390_VM_MIGRATION
|
||||
Architectures: s390
|
||||
===============================
|
||||
|
||||
:Architectures: s390
|
||||
|
||||
5.1. ATTRIBUTE: KVM_S390_VM_MIGRATION_STOP (w/o)
|
||||
------------------------------------------------
|
||||
|
||||
Allows userspace to stop migration mode, needed for PGSTE migration.
|
||||
Setting this attribute when migration mode is not active will have no
|
||||
effects.
|
||||
|
||||
Parameters: none
|
||||
Returns: 0
|
||||
:Parameters: none
|
||||
:Returns: 0
|
||||
|
||||
5.2. ATTRIBUTE: KVM_S390_VM_MIGRATION_START (w/o)
|
||||
-------------------------------------------------
|
||||
|
||||
Allows userspace to start migration mode, needed for PGSTE migration.
|
||||
Setting this attribute when migration mode is already active will have
|
||||
no effects.
|
||||
|
||||
Parameters: none
|
||||
Returns: -ENOMEM if there is not enough free memory to start migration mode
|
||||
-EINVAL if the state of the VM is invalid (e.g. no memory defined)
|
||||
:Parameters: none
|
||||
:Returns: -ENOMEM if there is not enough free memory to start migration mode;
|
||||
-EINVAL if the state of the VM is invalid (e.g. no memory defined);
|
||||
0 in case of success.
|
||||
|
||||
5.3. ATTRIBUTE: KVM_S390_VM_MIGRATION_STATUS (r/o)
|
||||
--------------------------------------------------
|
||||
|
||||
Allows userspace to query the status of migration mode.
|
||||
|
||||
Parameters: address of a buffer in user space to store the data (u64) to;
|
||||
the data itself is either 0 if migration mode is disabled or 1
|
||||
if it is enabled
|
||||
Returns: -EFAULT if the given address is not accessible from kernel space
|
||||
:Parameters: address of a buffer in user space to store the data (u64) to;
|
||||
the data itself is either 0 if migration mode is disabled or 1
|
||||
if it is enabled
|
||||
:Returns: -EFAULT if the given address is not accessible from kernel space;
|
||||
0 in case of success.
|
@ -1,20 +1,31 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=========================
|
||||
XICS interrupt controller
|
||||
=========================
|
||||
|
||||
Device type supported: KVM_DEV_TYPE_XICS
|
||||
|
||||
Groups:
|
||||
1. KVM_DEV_XICS_GRP_SOURCES
|
||||
Attributes: One per interrupt source, indexed by the source number.
|
||||
Attributes:
|
||||
|
||||
One per interrupt source, indexed by the source number.
|
||||
2. KVM_DEV_XICS_GRP_CTRL
|
||||
Attributes:
|
||||
2.1 KVM_DEV_XICS_NR_SERVERS (write only)
|
||||
Attributes:
|
||||
|
||||
2.1 KVM_DEV_XICS_NR_SERVERS (write only)
|
||||
|
||||
The kvm_device_attr.addr points to a __u32 value which is the number of
|
||||
interrupt server numbers (ie, highest possible vcpu id plus one).
|
||||
|
||||
Errors:
|
||||
-EINVAL: Value greater than KVM_MAX_VCPU_ID.
|
||||
-EFAULT: Invalid user pointer for attr->addr.
|
||||
-EBUSY: A vcpu is already connected to the device.
|
||||
|
||||
======= ==========================================
|
||||
-EINVAL Value greater than KVM_MAX_VCPU_ID.
|
||||
-EFAULT Invalid user pointer for attr->addr.
|
||||
-EBUSY A vcpu is already connected to the device.
|
||||
======= ==========================================
|
||||
|
||||
This device emulates the XICS (eXternal Interrupt Controller
|
||||
Specification) defined in PAPR. The XICS has a set of interrupt
|
||||
@ -53,24 +64,29 @@ the interrupt source number. The 64 bit state word has the following
|
||||
bitfields, starting from the least-significant end of the word:
|
||||
|
||||
* Destination (server number), 32 bits
|
||||
|
||||
This specifies where the interrupt should be sent, and is the
|
||||
interrupt server number specified for the destination vcpu.
|
||||
|
||||
* Priority, 8 bits
|
||||
|
||||
This is the priority specified for this interrupt source, where 0 is
|
||||
the highest priority and 255 is the lowest. An interrupt with a
|
||||
priority of 255 will never be delivered.
|
||||
|
||||
* Level sensitive flag, 1 bit
|
||||
|
||||
This bit is 1 for a level-sensitive interrupt source, or 0 for
|
||||
edge-sensitive (or MSI).
|
||||
|
||||
* Masked flag, 1 bit
|
||||
|
||||
This bit is set to 1 if the interrupt is masked (cannot be delivered
|
||||
regardless of its priority), for example by the ibm,int-off RTAS
|
||||
call, or 0 if it is not masked.
|
||||
|
||||
* Pending flag, 1 bit
|
||||
|
||||
This bit is 1 if the source has a pending interrupt, otherwise 0.
|
||||
|
||||
Only one XICS instance may be created per VM.
|
@ -1,8 +1,11 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===========================================================
|
||||
POWER9 eXternal Interrupt Virtualization Engine (XIVE Gen1)
|
||||
==========================================================
|
||||
===========================================================
|
||||
|
||||
Device types supported:
|
||||
KVM_DEV_TYPE_XIVE POWER9 XIVE Interrupt Controller generation 1
|
||||
- KVM_DEV_TYPE_XIVE POWER9 XIVE Interrupt Controller generation 1
|
||||
|
||||
This device acts as a VM interrupt controller. It provides the KVM
|
||||
interface to configure the interrupt sources of a VM in the underlying
|
||||
@ -64,72 +67,100 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
|
||||
|
||||
* Groups:
|
||||
|
||||
1. KVM_DEV_XIVE_GRP_CTRL
|
||||
Provides global controls on the device
|
||||
1. KVM_DEV_XIVE_GRP_CTRL
|
||||
Provides global controls on the device
|
||||
|
||||
Attributes:
|
||||
1.1 KVM_DEV_XIVE_RESET (write only)
|
||||
Resets the interrupt controller configuration for sources and event
|
||||
queues. To be used by kexec and kdump.
|
||||
|
||||
Errors: none
|
||||
|
||||
1.2 KVM_DEV_XIVE_EQ_SYNC (write only)
|
||||
Sync all the sources and queues and mark the EQ pages dirty. This
|
||||
to make sure that a consistent memory state is captured when
|
||||
migrating the VM.
|
||||
|
||||
Errors: none
|
||||
|
||||
1.3 KVM_DEV_XIVE_NR_SERVERS (write only)
|
||||
The kvm_device_attr.addr points to a __u32 value which is the number of
|
||||
interrupt server numbers (ie, highest possible vcpu id plus one).
|
||||
Errors:
|
||||
-EINVAL: Value greater than KVM_MAX_VCPU_ID.
|
||||
-EFAULT: Invalid user pointer for attr->addr.
|
||||
-EBUSY: A vCPU is already connected to the device.
|
||||
|
||||
2. KVM_DEV_XIVE_GRP_SOURCE (write only)
|
||||
Initializes a new source in the XIVE device and mask it.
|
||||
Errors:
|
||||
|
||||
======= ==========================================
|
||||
-EINVAL Value greater than KVM_MAX_VCPU_ID.
|
||||
-EFAULT Invalid user pointer for attr->addr.
|
||||
-EBUSY A vCPU is already connected to the device.
|
||||
======= ==========================================
|
||||
|
||||
2. KVM_DEV_XIVE_GRP_SOURCE (write only)
|
||||
Initializes a new source in the XIVE device and mask it.
|
||||
|
||||
Attributes:
|
||||
Interrupt source number (64-bit)
|
||||
The kvm_device_attr.addr points to a __u64 value:
|
||||
bits: | 63 .... 2 | 1 | 0
|
||||
values: | unused | level | type
|
||||
|
||||
The kvm_device_attr.addr points to a __u64 value::
|
||||
|
||||
bits: | 63 .... 2 | 1 | 0
|
||||
values: | unused | level | type
|
||||
|
||||
- type: 0:MSI 1:LSI
|
||||
- level: assertion level in case of an LSI.
|
||||
Errors:
|
||||
-E2BIG: Interrupt source number is out of range
|
||||
-ENOMEM: Could not create a new source block
|
||||
-EFAULT: Invalid user pointer for attr->addr.
|
||||
-ENXIO: Could not allocate underlying HW interrupt
|
||||
|
||||
3. KVM_DEV_XIVE_GRP_SOURCE_CONFIG (write only)
|
||||
Configures source targeting
|
||||
Errors:
|
||||
|
||||
======= ==========================================
|
||||
-E2BIG Interrupt source number is out of range
|
||||
-ENOMEM Could not create a new source block
|
||||
-EFAULT Invalid user pointer for attr->addr.
|
||||
-ENXIO Could not allocate underlying HW interrupt
|
||||
======= ==========================================
|
||||
|
||||
3. KVM_DEV_XIVE_GRP_SOURCE_CONFIG (write only)
|
||||
Configures source targeting
|
||||
|
||||
Attributes:
|
||||
Interrupt source number (64-bit)
|
||||
The kvm_device_attr.addr points to a __u64 value:
|
||||
bits: | 63 .... 33 | 32 | 31 .. 3 | 2 .. 0
|
||||
values: | eisn | mask | server | priority
|
||||
|
||||
The kvm_device_attr.addr points to a __u64 value::
|
||||
|
||||
bits: | 63 .... 33 | 32 | 31 .. 3 | 2 .. 0
|
||||
values: | eisn | mask | server | priority
|
||||
|
||||
- priority: 0-7 interrupt priority level
|
||||
- server: CPU number chosen to handle the interrupt
|
||||
- mask: mask flag (unused)
|
||||
- eisn: Effective Interrupt Source Number
|
||||
Errors:
|
||||
-ENOENT: Unknown source number
|
||||
-EINVAL: Not initialized source number
|
||||
-EINVAL: Invalid priority
|
||||
-EINVAL: Invalid CPU number.
|
||||
-EFAULT: Invalid user pointer for attr->addr.
|
||||
-ENXIO: CPU event queues not configured or configuration of the
|
||||
underlying HW interrupt failed
|
||||
-EBUSY: No CPU available to serve interrupt
|
||||
|
||||
4. KVM_DEV_XIVE_GRP_EQ_CONFIG (read-write)
|
||||
Configures an event queue of a CPU
|
||||
Errors:
|
||||
|
||||
======= =======================================================
|
||||
-ENOENT Unknown source number
|
||||
-EINVAL Not initialized source number
|
||||
-EINVAL Invalid priority
|
||||
-EINVAL Invalid CPU number.
|
||||
-EFAULT Invalid user pointer for attr->addr.
|
||||
-ENXIO CPU event queues not configured or configuration of the
|
||||
underlying HW interrupt failed
|
||||
-EBUSY No CPU available to serve interrupt
|
||||
======= =======================================================
|
||||
|
||||
4. KVM_DEV_XIVE_GRP_EQ_CONFIG (read-write)
|
||||
Configures an event queue of a CPU
|
||||
|
||||
Attributes:
|
||||
EQ descriptor identifier (64-bit)
|
||||
The EQ descriptor identifier is a tuple (server, priority) :
|
||||
bits: | 63 .... 32 | 31 .. 3 | 2 .. 0
|
||||
values: | unused | server | priority
|
||||
The kvm_device_attr.addr points to :
|
||||
|
||||
The EQ descriptor identifier is a tuple (server, priority)::
|
||||
|
||||
bits: | 63 .... 32 | 31 .. 3 | 2 .. 0
|
||||
values: | unused | server | priority
|
||||
|
||||
The kvm_device_attr.addr points to::
|
||||
|
||||
struct kvm_ppc_xive_eq {
|
||||
__u32 flags;
|
||||
__u32 qshift;
|
||||
@ -138,8 +169,9 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
|
||||
__u32 qindex;
|
||||
__u8 pad[40];
|
||||
};
|
||||
|
||||
- flags: queue flags
|
||||
KVM_XIVE_EQ_ALWAYS_NOTIFY (required)
|
||||
KVM_XIVE_EQ_ALWAYS_NOTIFY (required)
|
||||
forces notification without using the coalescing mechanism
|
||||
provided by the XIVE END ESBs.
|
||||
- qshift: queue size (power of 2)
|
||||
@ -147,22 +179,31 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
|
||||
- qtoggle: current queue toggle bit
|
||||
- qindex: current queue index
|
||||
- pad: reserved for future use
|
||||
Errors:
|
||||
-ENOENT: Invalid CPU number
|
||||
-EINVAL: Invalid priority
|
||||
-EINVAL: Invalid flags
|
||||
-EINVAL: Invalid queue size
|
||||
-EINVAL: Invalid queue address
|
||||
-EFAULT: Invalid user pointer for attr->addr.
|
||||
-EIO: Configuration of the underlying HW failed
|
||||
|
||||
5. KVM_DEV_XIVE_GRP_SOURCE_SYNC (write only)
|
||||
Synchronize the source to flush event notifications
|
||||
Errors:
|
||||
|
||||
======= =========================================
|
||||
-ENOENT Invalid CPU number
|
||||
-EINVAL Invalid priority
|
||||
-EINVAL Invalid flags
|
||||
-EINVAL Invalid queue size
|
||||
-EINVAL Invalid queue address
|
||||
-EFAULT Invalid user pointer for attr->addr.
|
||||
-EIO Configuration of the underlying HW failed
|
||||
======= =========================================
|
||||
|
||||
5. KVM_DEV_XIVE_GRP_SOURCE_SYNC (write only)
|
||||
Synchronize the source to flush event notifications
|
||||
|
||||
Attributes:
|
||||
Interrupt source number (64-bit)
|
||||
|
||||
Errors:
|
||||
-ENOENT: Unknown source number
|
||||
-EINVAL: Not initialized source number
|
||||
|
||||
======= =============================
|
||||
-ENOENT Unknown source number
|
||||
-EINVAL Not initialized source number
|
||||
======= =============================
|
||||
|
||||
* VCPU state
|
||||
|
||||
@ -175,11 +216,12 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
|
||||
as it synthesizes the priorities of the pending interrupts. We
|
||||
capture a bit more to report debug information.
|
||||
|
||||
KVM_REG_PPC_VP_STATE (2 * 64bits)
|
||||
bits: | 63 .... 32 | 31 .... 0 |
|
||||
values: | TIMA word0 | TIMA word1 |
|
||||
bits: | 127 .......... 64 |
|
||||
values: | unused |
|
||||
KVM_REG_PPC_VP_STATE (2 * 64bits)::
|
||||
|
||||
bits: | 63 .... 32 | 31 .... 0 |
|
||||
values: | TIMA word0 | TIMA word1 |
|
||||
bits: | 127 .......... 64 |
|
||||
values: | unused |
|
||||
|
||||
* Migration:
|
||||
|
||||
@ -196,7 +238,7 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
|
||||
3. Capture the state of the source targeting, the EQs configuration
|
||||
and the state of thread interrupt context registers.
|
||||
|
||||
Restore is similar :
|
||||
Restore is similar:
|
||||
|
||||
1. Restore the EQ configuration. As targeting depends on it.
|
||||
2. Restore targeting
|
@ -1,3 +1,6 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===========================
|
||||
The KVM halt polling system
|
||||
===========================
|
||||
|
||||
@ -68,7 +71,8 @@ steady state polling interval but will only really do a good job for wakeups
|
||||
which come at an approximately constant rate, otherwise there will be constant
|
||||
adjustment of the polling interval.
|
||||
|
||||
[0] total block time: the time between when the halt polling function is
|
||||
[0] total block time:
|
||||
the time between when the halt polling function is
|
||||
invoked and a wakeup source received (irrespective of
|
||||
whether the scheduler is invoked within that function).
|
||||
|
||||
@ -81,31 +85,32 @@ shrunk. These variables are defined in include/linux/kvm_host.h and as module
|
||||
parameters in virt/kvm/kvm_main.c, or arch/powerpc/kvm/book3s_hv.c in the
|
||||
powerpc kvm-hv case.
|
||||
|
||||
Module Parameter | Description | Default Value
|
||||
--------------------------------------------------------------------------------
|
||||
halt_poll_ns | The global max polling | KVM_HALT_POLL_NS_DEFAULT
|
||||
| interval which defines |
|
||||
| the ceiling value of the |
|
||||
| polling interval for | (per arch value)
|
||||
| each vcpu. |
|
||||
--------------------------------------------------------------------------------
|
||||
halt_poll_ns_grow | The value by which the | 2
|
||||
| halt polling interval is |
|
||||
| multiplied in the |
|
||||
| grow_halt_poll_ns() |
|
||||
| function. |
|
||||
--------------------------------------------------------------------------------
|
||||
halt_poll_ns_grow_start | The initial value to grow | 10000
|
||||
| to from zero in the |
|
||||
| grow_halt_poll_ns() |
|
||||
| function. |
|
||||
--------------------------------------------------------------------------------
|
||||
halt_poll_ns_shrink | The value by which the | 0
|
||||
| halt polling interval is |
|
||||
| divided in the |
|
||||
| shrink_halt_poll_ns() |
|
||||
| function. |
|
||||
--------------------------------------------------------------------------------
|
||||
+-----------------------+---------------------------+-------------------------+
|
||||
|Module Parameter | Description | Default Value |
|
||||
+-----------------------+---------------------------+-------------------------+
|
||||
|halt_poll_ns | The global max polling | KVM_HALT_POLL_NS_DEFAULT|
|
||||
| | interval which defines | |
|
||||
| | the ceiling value of the | |
|
||||
| | polling interval for | (per arch value) |
|
||||
| | each vcpu. | |
|
||||
+-----------------------+---------------------------+-------------------------+
|
||||
|halt_poll_ns_grow | The value by which the | 2 |
|
||||
| | halt polling interval is | |
|
||||
| | multiplied in the | |
|
||||
| | grow_halt_poll_ns() | |
|
||||
| | function. | |
|
||||
+-----------------------+---------------------------+-------------------------+
|
||||
|halt_poll_ns_grow_start| The initial value to grow | 10000 |
|
||||
| | to from zero in the | |
|
||||
| | grow_halt_poll_ns() | |
|
||||
| | function. | |
|
||||
+-----------------------+---------------------------+-------------------------+
|
||||
|halt_poll_ns_shrink | The value by which the | 0 |
|
||||
| | halt polling interval is | |
|
||||
| | divided in the | |
|
||||
| | shrink_halt_poll_ns() | |
|
||||
| | function. | |
|
||||
+-----------------------+---------------------------+-------------------------+
|
||||
|
||||
These module parameters can be set from the debugfs files in:
|
||||
|
||||
@ -117,20 +122,19 @@ Note: that these module parameters are system wide values and are not able to
|
||||
Further Notes
|
||||
=============
|
||||
|
||||
- Care should be taken when setting the halt_poll_ns module parameter as a
|
||||
large value has the potential to drive the cpu usage to 100% on a machine which
|
||||
would be almost entirely idle otherwise. This is because even if a guest has
|
||||
wakeups during which very little work is done and which are quite far apart, if
|
||||
the period is shorter than the global max polling interval (halt_poll_ns) then
|
||||
the host will always poll for the entire block time and thus cpu utilisation
|
||||
will go to 100%.
|
||||
- Care should be taken when setting the halt_poll_ns module parameter as a large value
|
||||
has the potential to drive the cpu usage to 100% on a machine which would be almost
|
||||
entirely idle otherwise. This is because even if a guest has wakeups during which very
|
||||
little work is done and which are quite far apart, if the period is shorter than the
|
||||
global max polling interval (halt_poll_ns) then the host will always poll for the
|
||||
entire block time and thus cpu utilisation will go to 100%.
|
||||
|
||||
- Halt polling essentially presents a trade off between power usage and latency
|
||||
and the module parameters should be used to tune the affinity for this. Idle
|
||||
cpu time is essentially converted to host kernel time with the aim of decreasing
|
||||
latency when entering the guest.
|
||||
- Halt polling essentially presents a trade off between power usage and latency and
|
||||
the module parameters should be used to tune the affinity for this. Idle cpu time is
|
||||
essentially converted to host kernel time with the aim of decreasing latency when
|
||||
entering the guest.
|
||||
|
||||
- Halt polling will only be conducted by the host when no other tasks are
|
||||
runnable on that cpu, otherwise the polling will cease immediately and
|
||||
schedule will be invoked to allow that other task to run. Thus this doesn't
|
||||
allow a guest to denial of service the cpu.
|
||||
- Halt polling will only be conducted by the host when no other tasks are runnable on
|
||||
that cpu, otherwise the polling will cease immediately and schedule will be invoked to
|
||||
allow that other task to run. Thus this doesn't allow a guest to denial of service the
|
||||
cpu.
|
@ -1,5 +1,9 @@
|
||||
Linux KVM Hypercall:
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===================
|
||||
Linux KVM Hypercall
|
||||
===================
|
||||
|
||||
X86:
|
||||
KVM Hypercalls have a three-byte sequence of either the vmcall or the vmmcall
|
||||
instruction. The hypervisor can replace it with instructions that are
|
||||
@ -20,7 +24,7 @@ S390:
|
||||
For further information on the S390 diagnose call as supported by KVM,
|
||||
refer to Documentation/virt/kvm/s390-diag.txt.
|
||||
|
||||
PowerPC:
|
||||
PowerPC:
|
||||
It uses R3-R10 and hypercall number in R11. R4-R11 are used as output registers.
|
||||
Return value is placed in R3.
|
||||
|
||||
@ -34,7 +38,8 @@ MIPS:
|
||||
the return value is placed in $2 (v0).
|
||||
|
||||
KVM Hypercalls Documentation
|
||||
===========================
|
||||
============================
|
||||
|
||||
The template for each hypercall is:
|
||||
1. Hypercall name.
|
||||
2. Architecture(s)
|
||||
@ -43,56 +48,64 @@ The template for each hypercall is:
|
||||
|
||||
1. KVM_HC_VAPIC_POLL_IRQ
|
||||
------------------------
|
||||
Architecture: x86
|
||||
Status: active
|
||||
Purpose: Trigger guest exit so that the host can check for pending
|
||||
interrupts on reentry.
|
||||
|
||||
:Architecture: x86
|
||||
:Status: active
|
||||
:Purpose: Trigger guest exit so that the host can check for pending
|
||||
interrupts on reentry.
|
||||
|
||||
2. KVM_HC_MMU_OP
|
||||
------------------------
|
||||
Architecture: x86
|
||||
Status: deprecated.
|
||||
Purpose: Support MMU operations such as writing to PTE,
|
||||
flushing TLB, release PT.
|
||||
----------------
|
||||
|
||||
:Architecture: x86
|
||||
:Status: deprecated.
|
||||
:Purpose: Support MMU operations such as writing to PTE,
|
||||
flushing TLB, release PT.
|
||||
|
||||
3. KVM_HC_FEATURES
|
||||
------------------------
|
||||
Architecture: PPC
|
||||
Status: active
|
||||
Purpose: Expose hypercall availability to the guest. On x86 platforms, cpuid
|
||||
used to enumerate which hypercalls are available. On PPC, either device tree
|
||||
based lookup ( which is also what EPAPR dictates) OR KVM specific enumeration
|
||||
mechanism (which is this hypercall) can be used.
|
||||
------------------
|
||||
|
||||
:Architecture: PPC
|
||||
:Status: active
|
||||
:Purpose: Expose hypercall availability to the guest. On x86 platforms, cpuid
|
||||
used to enumerate which hypercalls are available. On PPC, either
|
||||
device tree based lookup ( which is also what EPAPR dictates)
|
||||
OR KVM specific enumeration mechanism (which is this hypercall)
|
||||
can be used.
|
||||
|
||||
4. KVM_HC_PPC_MAP_MAGIC_PAGE
|
||||
------------------------
|
||||
Architecture: PPC
|
||||
Status: active
|
||||
Purpose: To enable communication between the hypervisor and guest there is a
|
||||
shared page that contains parts of supervisor visible register state.
|
||||
The guest can map this shared page to access its supervisor register through
|
||||
memory using this hypercall.
|
||||
----------------------------
|
||||
|
||||
:Architecture: PPC
|
||||
:Status: active
|
||||
:Purpose: To enable communication between the hypervisor and guest there is a
|
||||
shared page that contains parts of supervisor visible register state.
|
||||
The guest can map this shared page to access its supervisor register
|
||||
through memory using this hypercall.
|
||||
|
||||
5. KVM_HC_KICK_CPU
|
||||
------------------------
|
||||
Architecture: x86
|
||||
Status: active
|
||||
Purpose: Hypercall used to wakeup a vcpu from HLT state
|
||||
Usage example : A vcpu of a paravirtualized guest that is busywaiting in guest
|
||||
kernel mode for an event to occur (ex: a spinlock to become available) can
|
||||
execute HLT instruction once it has busy-waited for more than a threshold
|
||||
time-interval. Execution of HLT instruction would cause the hypervisor to put
|
||||
the vcpu to sleep until occurrence of an appropriate event. Another vcpu of the
|
||||
same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall,
|
||||
specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0)
|
||||
is used in the hypercall for future use.
|
||||
------------------
|
||||
|
||||
:Architecture: x86
|
||||
:Status: active
|
||||
:Purpose: Hypercall used to wakeup a vcpu from HLT state
|
||||
:Usage example:
|
||||
A vcpu of a paravirtualized guest that is busywaiting in guest
|
||||
kernel mode for an event to occur (ex: a spinlock to become available) can
|
||||
execute HLT instruction once it has busy-waited for more than a threshold
|
||||
time-interval. Execution of HLT instruction would cause the hypervisor to put
|
||||
the vcpu to sleep until occurrence of an appropriate event. Another vcpu of the
|
||||
same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall,
|
||||
specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0)
|
||||
is used in the hypercall for future use.
|
||||
|
||||
|
||||
6. KVM_HC_CLOCK_PAIRING
|
||||
------------------------
|
||||
Architecture: x86
|
||||
Status: active
|
||||
Purpose: Hypercall used to synchronize host and guest clocks.
|
||||
-----------------------
|
||||
:Architecture: x86
|
||||
:Status: active
|
||||
:Purpose: Hypercall used to synchronize host and guest clocks.
|
||||
|
||||
Usage:
|
||||
|
||||
a0: guest physical address where host copies
|
||||
@ -101,6 +114,8 @@ a0: guest physical address where host copies
|
||||
a1: clock_type, ATM only KVM_CLOCK_PAIRING_WALLCLOCK (0)
|
||||
is supported (corresponding to the host's CLOCK_REALTIME clock).
|
||||
|
||||
::
|
||||
|
||||
struct kvm_clock_pairing {
|
||||
__s64 sec;
|
||||
__s64 nsec;
|
||||
@ -123,15 +138,16 @@ Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
|
||||
or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
|
||||
|
||||
6. KVM_HC_SEND_IPI
|
||||
------------------------
|
||||
Architecture: x86
|
||||
Status: active
|
||||
Purpose: Send IPIs to multiple vCPUs.
|
||||
------------------
|
||||
|
||||
a0: lower part of the bitmap of destination APIC IDs
|
||||
a1: higher part of the bitmap of destination APIC IDs
|
||||
a2: the lowest APIC ID in bitmap
|
||||
a3: APIC ICR
|
||||
:Architecture: x86
|
||||
:Status: active
|
||||
:Purpose: Send IPIs to multiple vCPUs.
|
||||
|
||||
- a0: lower part of the bitmap of destination APIC IDs
|
||||
- a1: higher part of the bitmap of destination APIC IDs
|
||||
- a2: the lowest APIC ID in bitmap
|
||||
- a3: APIC ICR
|
||||
|
||||
The hypercall lets a guest send multicast IPIs, with at most 128
|
||||
128 destinations per hypercall in 64-bit mode and 64 vCPUs per
|
||||
@ -143,12 +159,13 @@ corresponds to the APIC ID a2+1, and so on.
|
||||
Returns the number of CPUs to which the IPIs were delivered successfully.
|
||||
|
||||
7. KVM_HC_SCHED_YIELD
|
||||
------------------------
|
||||
Architecture: x86
|
||||
Status: active
|
||||
Purpose: Hypercall used to yield if the IPI target vCPU is preempted
|
||||
---------------------
|
||||
|
||||
:Architecture: x86
|
||||
:Status: active
|
||||
:Purpose: Hypercall used to yield if the IPI target vCPU is preempted
|
||||
|
||||
a0: destination APIC ID
|
||||
|
||||
Usage example: When sending a call-function IPI-many to vCPUs, yield if
|
||||
any of the IPI target vCPUs was preempted.
|
||||
:Usage example: When sending a call-function IPI-many to vCPUs, yield if
|
||||
any of the IPI target vCPUs was preempted.
|
@ -7,6 +7,22 @@ KVM
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
api
|
||||
amd-memory-encryption
|
||||
cpuid
|
||||
halt-polling
|
||||
hypercalls
|
||||
locking
|
||||
mmu
|
||||
msr
|
||||
nested-vmx
|
||||
ppc-pv
|
||||
s390-diag
|
||||
timekeeping
|
||||
vcpu-requests
|
||||
|
||||
review-checklist
|
||||
|
||||
arm/index
|
||||
|
||||
devices/index
|
||||
|
243
Documentation/virt/kvm/locking.rst
Normal file
243
Documentation/virt/kvm/locking.rst
Normal file
@ -0,0 +1,243 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=================
|
||||
KVM Lock Overview
|
||||
=================
|
||||
|
||||
1. Acquisition Orders
|
||||
---------------------
|
||||
|
||||
The acquisition orders for mutexes are as follows:
|
||||
|
||||
- kvm->lock is taken outside vcpu->mutex
|
||||
|
||||
- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
|
||||
|
||||
- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
|
||||
them together is quite rare.
|
||||
|
||||
On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
|
||||
|
||||
Everything else is a leaf: no other lock is taken inside the critical
|
||||
sections.
|
||||
|
||||
2. Exception
|
||||
------------
|
||||
|
||||
Fast page fault:
|
||||
|
||||
Fast page fault is the fast path which fixes the guest page fault out of
|
||||
the mmu-lock on x86. Currently, the page fault can be fast in one of the
|
||||
following two cases:
|
||||
|
||||
1. Access Tracking: The SPTE is not present, but it is marked for access
|
||||
tracking i.e. the SPTE_SPECIAL_MASK is set. That means we need to
|
||||
restore the saved R/X bits. This is described in more detail later below.
|
||||
|
||||
2. Write-Protection: The SPTE is present and the fault is
|
||||
caused by write-protect. That means we just need to change the W bit of
|
||||
the spte.
|
||||
|
||||
What we use to avoid all the race is the SPTE_HOST_WRITEABLE bit and
|
||||
SPTE_MMU_WRITEABLE bit on the spte:
|
||||
|
||||
- SPTE_HOST_WRITEABLE means the gfn is writable on host.
|
||||
- SPTE_MMU_WRITEABLE means the gfn is writable on mmu. The bit is set when
|
||||
the gfn is writable on guest mmu and it is not write-protected by shadow
|
||||
page write-protection.
|
||||
|
||||
On fast page fault path, we will use cmpxchg to atomically set the spte W
|
||||
bit if spte.SPTE_HOST_WRITEABLE = 1 and spte.SPTE_WRITE_PROTECT = 1, or
|
||||
restore the saved R/X bits if VMX_EPT_TRACK_ACCESS mask is set, or both. This
|
||||
is safe because whenever changing these bits can be detected by cmpxchg.
|
||||
|
||||
But we need carefully check these cases:
|
||||
|
||||
1) The mapping from gfn to pfn
|
||||
|
||||
The mapping from gfn to pfn may be changed since we can only ensure the pfn
|
||||
is not changed during cmpxchg. This is a ABA problem, for example, below case
|
||||
will happen:
|
||||
|
||||
+------------------------------------------------------------------------+
|
||||
| At the beginning:: |
|
||||
| |
|
||||
| gpte = gfn1 |
|
||||
| gfn1 is mapped to pfn1 on host |
|
||||
| spte is the shadow page table entry corresponding with gpte and |
|
||||
| spte = pfn1 |
|
||||
+------------------------------------------------------------------------+
|
||||
| On fast page fault path: |
|
||||
+------------------------------------+-----------------------------------+
|
||||
| CPU 0: | CPU 1: |
|
||||
+------------------------------------+-----------------------------------+
|
||||
| :: | |
|
||||
| | |
|
||||
| old_spte = *spte; | |
|
||||
+------------------------------------+-----------------------------------+
|
||||
| | pfn1 is swapped out:: |
|
||||
| | |
|
||||
| | spte = 0; |
|
||||
| | |
|
||||
| | pfn1 is re-alloced for gfn2. |
|
||||
| | |
|
||||
| | gpte is changed to point to |
|
||||
| | gfn2 by the guest:: |
|
||||
| | |
|
||||
| | spte = pfn1; |
|
||||
+------------------------------------+-----------------------------------+
|
||||
| :: |
|
||||
| |
|
||||
| if (cmpxchg(spte, old_spte, old_spte+W) |
|
||||
| mark_page_dirty(vcpu->kvm, gfn1) |
|
||||
| OOPS!!! |
|
||||
+------------------------------------------------------------------------+
|
||||
|
||||
We dirty-log for gfn1, that means gfn2 is lost in dirty-bitmap.
|
||||
|
||||
For direct sp, we can easily avoid it since the spte of direct sp is fixed
|
||||
to gfn. For indirect sp, before we do cmpxchg, we call gfn_to_pfn_atomic()
|
||||
to pin gfn to pfn, because after gfn_to_pfn_atomic():
|
||||
|
||||
- We have held the refcount of pfn that means the pfn can not be freed and
|
||||
be reused for another gfn.
|
||||
- The pfn is writable that means it can not be shared between different gfns
|
||||
by KSM.
|
||||
|
||||
Then, we can ensure the dirty bitmaps is correctly set for a gfn.
|
||||
|
||||
Currently, to simplify the whole things, we disable fast page fault for
|
||||
indirect shadow page.
|
||||
|
||||
2) Dirty bit tracking
|
||||
|
||||
In the origin code, the spte can be fast updated (non-atomically) if the
|
||||
spte is read-only and the Accessed bit has already been set since the
|
||||
Accessed bit and Dirty bit can not be lost.
|
||||
|
||||
But it is not true after fast page fault since the spte can be marked
|
||||
writable between reading spte and updating spte. Like below case:
|
||||
|
||||
+------------------------------------------------------------------------+
|
||||
| At the beginning:: |
|
||||
| |
|
||||
| spte.W = 0 |
|
||||
| spte.Accessed = 1 |
|
||||
+------------------------------------+-----------------------------------+
|
||||
| CPU 0: | CPU 1: |
|
||||
+------------------------------------+-----------------------------------+
|
||||
| In mmu_spte_clear_track_bits():: | |
|
||||
| | |
|
||||
| old_spte = *spte; | |
|
||||
| | |
|
||||
| | |
|
||||
| /* 'if' condition is satisfied. */| |
|
||||
| if (old_spte.Accessed == 1 && | |
|
||||
| old_spte.W == 0) | |
|
||||
| spte = 0ull; | |
|
||||
+------------------------------------+-----------------------------------+
|
||||
| | on fast page fault path:: |
|
||||
| | |
|
||||
| | spte.W = 1 |
|
||||
| | |
|
||||
| | memory write on the spte:: |
|
||||
| | |
|
||||
| | spte.Dirty = 1 |
|
||||
+------------------------------------+-----------------------------------+
|
||||
| :: | |
|
||||
| | |
|
||||
| else | |
|
||||
| old_spte = xchg(spte, 0ull) | |
|
||||
| if (old_spte.Accessed == 1) | |
|
||||
| kvm_set_pfn_accessed(spte.pfn);| |
|
||||
| if (old_spte.Dirty == 1) | |
|
||||
| kvm_set_pfn_dirty(spte.pfn); | |
|
||||
| OOPS!!! | |
|
||||
+------------------------------------+-----------------------------------+
|
||||
|
||||
The Dirty bit is lost in this case.
|
||||
|
||||
In order to avoid this kind of issue, we always treat the spte as "volatile"
|
||||
if it can be updated out of mmu-lock, see spte_has_volatile_bits(), it means,
|
||||
the spte is always atomically updated in this case.
|
||||
|
||||
3) flush tlbs due to spte updated
|
||||
|
||||
If the spte is updated from writable to readonly, we should flush all TLBs,
|
||||
otherwise rmap_write_protect will find a read-only spte, even though the
|
||||
writable spte might be cached on a CPU's TLB.
|
||||
|
||||
As mentioned before, the spte can be updated to writable out of mmu-lock on
|
||||
fast page fault path, in order to easily audit the path, we see if TLBs need
|
||||
be flushed caused by this reason in mmu_spte_update() since this is a common
|
||||
function to update spte (present -> present).
|
||||
|
||||
Since the spte is "volatile" if it can be updated out of mmu-lock, we always
|
||||
atomically update the spte, the race caused by fast page fault can be avoided,
|
||||
See the comments in spte_has_volatile_bits() and mmu_spte_update().
|
||||
|
||||
Lockless Access Tracking:
|
||||
|
||||
This is used for Intel CPUs that are using EPT but do not support the EPT A/D
|
||||
bits. In this case, when the KVM MMU notifier is called to track accesses to a
|
||||
page (via kvm_mmu_notifier_clear_flush_young), it marks the PTE as not-present
|
||||
by clearing the RWX bits in the PTE and storing the original R & X bits in
|
||||
some unused/ignored bits. In addition, the SPTE_SPECIAL_MASK is also set on the
|
||||
PTE (using the ignored bit 62). When the VM tries to access the page later on,
|
||||
a fault is generated and the fast page fault mechanism described above is used
|
||||
to atomically restore the PTE to a Present state. The W bit is not saved when
|
||||
the PTE is marked for access tracking and during restoration to the Present
|
||||
state, the W bit is set depending on whether or not it was a write access. If
|
||||
it wasn't, then the W bit will remain clear until a write access happens, at
|
||||
which time it will be set using the Dirty tracking mechanism described above.
|
||||
|
||||
3. Reference
|
||||
------------
|
||||
|
||||
:Name: kvm_lock
|
||||
:Type: mutex
|
||||
:Arch: any
|
||||
:Protects: - vm_list
|
||||
|
||||
:Name: kvm_count_lock
|
||||
:Type: raw_spinlock_t
|
||||
:Arch: any
|
||||
:Protects: - hardware virtualization enable/disable
|
||||
:Comment: 'raw' because hardware enabling/disabling must be atomic /wrt
|
||||
migration.
|
||||
|
||||
:Name: kvm_arch::tsc_write_lock
|
||||
:Type: raw_spinlock
|
||||
:Arch: x86
|
||||
:Protects: - kvm_arch::{last_tsc_write,last_tsc_nsec,last_tsc_offset}
|
||||
- tsc offset in vmcb
|
||||
:Comment: 'raw' because updating the tsc offsets must not be preempted.
|
||||
|
||||
:Name: kvm->mmu_lock
|
||||
:Type: spinlock_t
|
||||
:Arch: any
|
||||
:Protects: -shadow page/shadow tlb entry
|
||||
:Comment: it is a spinlock since it is used in mmu notifier.
|
||||
|
||||
:Name: kvm->srcu
|
||||
:Type: srcu lock
|
||||
:Arch: any
|
||||
:Protects: - kvm->memslots
|
||||
- kvm->buses
|
||||
:Comment: The srcu read lock must be held while accessing memslots (e.g.
|
||||
when using gfn_to_* functions) and while accessing in-kernel
|
||||
MMIO/PIO address->device structure mapping (kvm->buses).
|
||||
The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
|
||||
if it is needed by multiple functions.
|
||||
|
||||
:Name: blocked_vcpu_on_cpu_lock
|
||||
:Type: spinlock_t
|
||||
:Arch: x86
|
||||
:Protects: blocked_vcpu_on_cpu
|
||||
:Comment: This is a per-CPU lock and it is used for VT-d posted-interrupts.
|
||||
When VT-d posted-interrupts is supported and the VM has assigned
|
||||
devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
|
||||
protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
|
||||
wakeup notification event since external interrupts from the
|
||||
assigned devices happens, we will find the vCPU on the list to
|
||||
wakeup.
|
@ -1,215 +0,0 @@
|
||||
KVM Lock Overview
|
||||
=================
|
||||
|
||||
1. Acquisition Orders
|
||||
---------------------
|
||||
|
||||
The acquisition orders for mutexes are as follows:
|
||||
|
||||
- kvm->lock is taken outside vcpu->mutex
|
||||
|
||||
- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
|
||||
|
||||
- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
|
||||
them together is quite rare.
|
||||
|
||||
On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
|
||||
|
||||
Everything else is a leaf: no other lock is taken inside the critical
|
||||
sections.
|
||||
|
||||
2: Exception
|
||||
------------
|
||||
|
||||
Fast page fault:
|
||||
|
||||
Fast page fault is the fast path which fixes the guest page fault out of
|
||||
the mmu-lock on x86. Currently, the page fault can be fast in one of the
|
||||
following two cases:
|
||||
|
||||
1. Access Tracking: The SPTE is not present, but it is marked for access
|
||||
tracking i.e. the SPTE_SPECIAL_MASK is set. That means we need to
|
||||
restore the saved R/X bits. This is described in more detail later below.
|
||||
|
||||
2. Write-Protection: The SPTE is present and the fault is
|
||||
caused by write-protect. That means we just need to change the W bit of the
|
||||
spte.
|
||||
|
||||
What we use to avoid all the race is the SPTE_HOST_WRITEABLE bit and
|
||||
SPTE_MMU_WRITEABLE bit on the spte:
|
||||
- SPTE_HOST_WRITEABLE means the gfn is writable on host.
|
||||
- SPTE_MMU_WRITEABLE means the gfn is writable on mmu. The bit is set when
|
||||
the gfn is writable on guest mmu and it is not write-protected by shadow
|
||||
page write-protection.
|
||||
|
||||
On fast page fault path, we will use cmpxchg to atomically set the spte W
|
||||
bit if spte.SPTE_HOST_WRITEABLE = 1 and spte.SPTE_WRITE_PROTECT = 1, or
|
||||
restore the saved R/X bits if VMX_EPT_TRACK_ACCESS mask is set, or both. This
|
||||
is safe because whenever changing these bits can be detected by cmpxchg.
|
||||
|
||||
But we need carefully check these cases:
|
||||
1): The mapping from gfn to pfn
|
||||
The mapping from gfn to pfn may be changed since we can only ensure the pfn
|
||||
is not changed during cmpxchg. This is a ABA problem, for example, below case
|
||||
will happen:
|
||||
|
||||
At the beginning:
|
||||
gpte = gfn1
|
||||
gfn1 is mapped to pfn1 on host
|
||||
spte is the shadow page table entry corresponding with gpte and
|
||||
spte = pfn1
|
||||
|
||||
VCPU 0 VCPU0
|
||||
on fast page fault path:
|
||||
|
||||
old_spte = *spte;
|
||||
pfn1 is swapped out:
|
||||
spte = 0;
|
||||
|
||||
pfn1 is re-alloced for gfn2.
|
||||
|
||||
gpte is changed to point to
|
||||
gfn2 by the guest:
|
||||
spte = pfn1;
|
||||
|
||||
if (cmpxchg(spte, old_spte, old_spte+W)
|
||||
mark_page_dirty(vcpu->kvm, gfn1)
|
||||
OOPS!!!
|
||||
|
||||
We dirty-log for gfn1, that means gfn2 is lost in dirty-bitmap.
|
||||
|
||||
For direct sp, we can easily avoid it since the spte of direct sp is fixed
|
||||
to gfn. For indirect sp, before we do cmpxchg, we call gfn_to_pfn_atomic()
|
||||
to pin gfn to pfn, because after gfn_to_pfn_atomic():
|
||||
- We have held the refcount of pfn that means the pfn can not be freed and
|
||||
be reused for another gfn.
|
||||
- The pfn is writable that means it can not be shared between different gfns
|
||||
by KSM.
|
||||
|
||||
Then, we can ensure the dirty bitmaps is correctly set for a gfn.
|
||||
|
||||
Currently, to simplify the whole things, we disable fast page fault for
|
||||
indirect shadow page.
|
||||
|
||||
2): Dirty bit tracking
|
||||
In the origin code, the spte can be fast updated (non-atomically) if the
|
||||
spte is read-only and the Accessed bit has already been set since the
|
||||
Accessed bit and Dirty bit can not be lost.
|
||||
|
||||
But it is not true after fast page fault since the spte can be marked
|
||||
writable between reading spte and updating spte. Like below case:
|
||||
|
||||
At the beginning:
|
||||
spte.W = 0
|
||||
spte.Accessed = 1
|
||||
|
||||
VCPU 0 VCPU0
|
||||
In mmu_spte_clear_track_bits():
|
||||
|
||||
old_spte = *spte;
|
||||
|
||||
/* 'if' condition is satisfied. */
|
||||
if (old_spte.Accessed == 1 &&
|
||||
old_spte.W == 0)
|
||||
spte = 0ull;
|
||||
on fast page fault path:
|
||||
spte.W = 1
|
||||
memory write on the spte:
|
||||
spte.Dirty = 1
|
||||
|
||||
|
||||
else
|
||||
old_spte = xchg(spte, 0ull)
|
||||
|
||||
|
||||
if (old_spte.Accessed == 1)
|
||||
kvm_set_pfn_accessed(spte.pfn);
|
||||
if (old_spte.Dirty == 1)
|
||||
kvm_set_pfn_dirty(spte.pfn);
|
||||
OOPS!!!
|
||||
|
||||
The Dirty bit is lost in this case.
|
||||
|
||||
In order to avoid this kind of issue, we always treat the spte as "volatile"
|
||||
if it can be updated out of mmu-lock, see spte_has_volatile_bits(), it means,
|
||||
the spte is always atomically updated in this case.
|
||||
|
||||
3): flush tlbs due to spte updated
|
||||
If the spte is updated from writable to readonly, we should flush all TLBs,
|
||||
otherwise rmap_write_protect will find a read-only spte, even though the
|
||||
writable spte might be cached on a CPU's TLB.
|
||||
|
||||
As mentioned before, the spte can be updated to writable out of mmu-lock on
|
||||
fast page fault path, in order to easily audit the path, we see if TLBs need
|
||||
be flushed caused by this reason in mmu_spte_update() since this is a common
|
||||
function to update spte (present -> present).
|
||||
|
||||
Since the spte is "volatile" if it can be updated out of mmu-lock, we always
|
||||
atomically update the spte, the race caused by fast page fault can be avoided,
|
||||
See the comments in spte_has_volatile_bits() and mmu_spte_update().
|
||||
|
||||
Lockless Access Tracking:
|
||||
|
||||
This is used for Intel CPUs that are using EPT but do not support the EPT A/D
|
||||
bits. In this case, when the KVM MMU notifier is called to track accesses to a
|
||||
page (via kvm_mmu_notifier_clear_flush_young), it marks the PTE as not-present
|
||||
by clearing the RWX bits in the PTE and storing the original R & X bits in
|
||||
some unused/ignored bits. In addition, the SPTE_SPECIAL_MASK is also set on the
|
||||
PTE (using the ignored bit 62). When the VM tries to access the page later on,
|
||||
a fault is generated and the fast page fault mechanism described above is used
|
||||
to atomically restore the PTE to a Present state. The W bit is not saved when
|
||||
the PTE is marked for access tracking and during restoration to the Present
|
||||
state, the W bit is set depending on whether or not it was a write access. If
|
||||
it wasn't, then the W bit will remain clear until a write access happens, at
|
||||
which time it will be set using the Dirty tracking mechanism described above.
|
||||
|
||||
3. Reference
|
||||
------------
|
||||
|
||||
Name: kvm_lock
|
||||
Type: mutex
|
||||
Arch: any
|
||||
Protects: - vm_list
|
||||
|
||||
Name: kvm_count_lock
|
||||
Type: raw_spinlock_t
|
||||
Arch: any
|
||||
Protects: - hardware virtualization enable/disable
|
||||
Comment: 'raw' because hardware enabling/disabling must be atomic /wrt
|
||||
migration.
|
||||
|
||||
Name: kvm_arch::tsc_write_lock
|
||||
Type: raw_spinlock
|
||||
Arch: x86
|
||||
Protects: - kvm_arch::{last_tsc_write,last_tsc_nsec,last_tsc_offset}
|
||||
- tsc offset in vmcb
|
||||
Comment: 'raw' because updating the tsc offsets must not be preempted.
|
||||
|
||||
Name: kvm->mmu_lock
|
||||
Type: spinlock_t
|
||||
Arch: any
|
||||
Protects: -shadow page/shadow tlb entry
|
||||
Comment: it is a spinlock since it is used in mmu notifier.
|
||||
|
||||
Name: kvm->srcu
|
||||
Type: srcu lock
|
||||
Arch: any
|
||||
Protects: - kvm->memslots
|
||||
- kvm->buses
|
||||
Comment: The srcu read lock must be held while accessing memslots (e.g.
|
||||
when using gfn_to_* functions) and while accessing in-kernel
|
||||
MMIO/PIO address->device structure mapping (kvm->buses).
|
||||
The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
|
||||
if it is needed by multiple functions.
|
||||
|
||||
Name: blocked_vcpu_on_cpu_lock
|
||||
Type: spinlock_t
|
||||
Arch: x86
|
||||
Protects: blocked_vcpu_on_cpu
|
||||
Comment: This is a per-CPU lock and it is used for VT-d posted-interrupts.
|
||||
When VT-d posted-interrupts is supported and the VM has assigned
|
||||
devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
|
||||
protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
|
||||
wakeup notification event since external interrupts from the
|
||||
assigned devices happens, we will find the vCPU on the list to
|
||||
wakeup.
|
@ -1,3 +1,6 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
======================
|
||||
The x86 kvm shadow mmu
|
||||
======================
|
||||
|
||||
@ -7,27 +10,37 @@ physical addresses to host physical addresses.
|
||||
|
||||
The mmu code attempts to satisfy the following requirements:
|
||||
|
||||
- correctness: the guest should not be able to determine that it is running
|
||||
- correctness:
|
||||
the guest should not be able to determine that it is running
|
||||
on an emulated mmu except for timing (we attempt to comply
|
||||
with the specification, not emulate the characteristics of
|
||||
a particular implementation such as tlb size)
|
||||
- security: the guest must not be able to touch host memory not assigned
|
||||
- security:
|
||||
the guest must not be able to touch host memory not assigned
|
||||
to it
|
||||
- performance: minimize the performance penalty imposed by the mmu
|
||||
- scaling: need to scale to large memory and large vcpu guests
|
||||
- hardware: support the full range of x86 virtualization hardware
|
||||
- integration: Linux memory management code must be in control of guest memory
|
||||
- performance:
|
||||
minimize the performance penalty imposed by the mmu
|
||||
- scaling:
|
||||
need to scale to large memory and large vcpu guests
|
||||
- hardware:
|
||||
support the full range of x86 virtualization hardware
|
||||
- integration:
|
||||
Linux memory management code must be in control of guest memory
|
||||
so that swapping, page migration, page merging, transparent
|
||||
hugepages, and similar features work without change
|
||||
- dirty tracking: report writes to guest memory to enable live migration
|
||||
- dirty tracking:
|
||||
report writes to guest memory to enable live migration
|
||||
and framebuffer-based displays
|
||||
- footprint: keep the amount of pinned kernel memory low (most memory
|
||||
- footprint:
|
||||
keep the amount of pinned kernel memory low (most memory
|
||||
should be shrinkable)
|
||||
- reliability: avoid multipage or GFP_ATOMIC allocations
|
||||
- reliability:
|
||||
avoid multipage or GFP_ATOMIC allocations
|
||||
|
||||
Acronyms
|
||||
========
|
||||
|
||||
==== ====================================================================
|
||||
pfn host page frame number
|
||||
hpa host physical address
|
||||
hva host virtual address
|
||||
@ -41,6 +54,7 @@ pte page table entry (used also to refer generically to paging structure
|
||||
gpte guest pte (referring to gfns)
|
||||
spte shadow pte (referring to pfns)
|
||||
tdp two dimensional paging (vendor neutral term for NPT and EPT)
|
||||
==== ====================================================================
|
||||
|
||||
Virtual and real hardware supported
|
||||
===================================
|
||||
@ -90,11 +104,13 @@ Events
|
||||
The mmu is driven by events, some from the guest, some from the host.
|
||||
|
||||
Guest generated events:
|
||||
|
||||
- writes to control registers (especially cr3)
|
||||
- invlpg/invlpga instruction execution
|
||||
- access to missing or protected translations
|
||||
|
||||
Host generated events:
|
||||
|
||||
- changes in the gpa->hpa translation (either through gpa->hva changes or
|
||||
through hva->hpa changes)
|
||||
- memory pressure (the shrinker)
|
||||
@ -117,16 +133,19 @@ Leaf ptes point at guest pages.
|
||||
The following table shows translations encoded by leaf ptes, with higher-level
|
||||
translations in parentheses:
|
||||
|
||||
Non-nested guests:
|
||||
Non-nested guests::
|
||||
|
||||
nonpaging: gpa->hpa
|
||||
paging: gva->gpa->hpa
|
||||
paging, tdp: (gva->)gpa->hpa
|
||||
Nested guests:
|
||||
|
||||
Nested guests::
|
||||
|
||||
non-tdp: ngva->gpa->hpa (*)
|
||||
tdp: (ngva->)ngpa->gpa->hpa
|
||||
|
||||
(*) the guest hypervisor will encode the ngva->gpa translation into its page
|
||||
tables if npt is not present
|
||||
(*) the guest hypervisor will encode the ngva->gpa translation into its page
|
||||
tables if npt is not present
|
||||
|
||||
Shadow pages contain the following information:
|
||||
role.level:
|
||||
@ -291,28 +310,41 @@ Handling a page fault is performed as follows:
|
||||
|
||||
- if the RSV bit of the error code is set, the page fault is caused by guest
|
||||
accessing MMIO and cached MMIO information is available.
|
||||
|
||||
- walk shadow page table
|
||||
- check for valid generation number in the spte (see "Fast invalidation of
|
||||
MMIO sptes" below)
|
||||
- cache the information to vcpu->arch.mmio_gva, vcpu->arch.mmio_access and
|
||||
vcpu->arch.mmio_gfn, and call the emulator
|
||||
|
||||
- If both P bit and R/W bit of error code are set, this could possibly
|
||||
be handled as a "fast page fault" (fixed without taking the MMU lock). See
|
||||
the description in Documentation/virt/kvm/locking.txt.
|
||||
|
||||
- if needed, walk the guest page tables to determine the guest translation
|
||||
(gva->gpa or ngpa->gpa)
|
||||
|
||||
- if permissions are insufficient, reflect the fault back to the guest
|
||||
|
||||
- determine the host page
|
||||
|
||||
- if this is an mmio request, there is no host page; cache the info to
|
||||
vcpu->arch.mmio_gva, vcpu->arch.mmio_access and vcpu->arch.mmio_gfn
|
||||
|
||||
- walk the shadow page table to find the spte for the translation,
|
||||
instantiating missing intermediate page tables as necessary
|
||||
|
||||
- If this is an mmio request, cache the mmio info to the spte and set some
|
||||
reserved bit on the spte (see callers of kvm_mmu_set_mmio_spte_mask)
|
||||
|
||||
- try to unsynchronize the page
|
||||
|
||||
- if successful, we can let the guest continue and modify the gpte
|
||||
|
||||
- emulate the instruction
|
||||
|
||||
- if failed, unshadow the page and let the guest continue
|
||||
|
||||
- update any translations that were modified by the instruction
|
||||
|
||||
invlpg handling:
|
||||
@ -324,10 +356,12 @@ invlpg handling:
|
||||
Guest control register updates:
|
||||
|
||||
- mov to cr3
|
||||
|
||||
- look up new shadow roots
|
||||
- synchronize newly reachable shadow pages
|
||||
|
||||
- mov to cr0/cr4/efer
|
||||
|
||||
- set up mmu context for new paging mode
|
||||
- look up new shadow roots
|
||||
- synchronize newly reachable shadow pages
|
||||
@ -358,6 +392,7 @@ on fault type:
|
||||
(user write faults generate a #PF)
|
||||
|
||||
In the first case there are two additional complications:
|
||||
|
||||
- if CR4.SMEP is enabled: since we've turned the page into a kernel page,
|
||||
the kernel may now execute it. We handle this by also setting spte.nx.
|
||||
If we get a user fetch or read fault, we'll change spte.u=1 and
|
||||
@ -446,4 +481,3 @@ Further reading
|
||||
|
||||
- NPT presentation from KVM Forum 2008
|
||||
http://www.linux-kvm.org/images/c/c8/KvmForum2008%24kdf2008_21.pdf
|
||||
|
@ -1,6 +1,10 @@
|
||||
KVM-specific MSRs.
|
||||
Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010
|
||||
=====================================================
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=================
|
||||
KVM-specific MSRs
|
||||
=================
|
||||
|
||||
:Author: Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010
|
||||
|
||||
KVM makes use of some custom MSRs to service some requests.
|
||||
|
||||
@ -9,34 +13,39 @@ Custom MSRs have a range reserved for them, that goes from
|
||||
but they are deprecated and their use is discouraged.
|
||||
|
||||
Custom MSR list
|
||||
--------
|
||||
---------------
|
||||
|
||||
The current supported Custom MSR list is:
|
||||
|
||||
MSR_KVM_WALL_CLOCK_NEW: 0x4b564d00
|
||||
MSR_KVM_WALL_CLOCK_NEW:
|
||||
0x4b564d00
|
||||
|
||||
data: 4-byte alignment physical address of a memory area which must be
|
||||
data:
|
||||
4-byte alignment physical address of a memory area which must be
|
||||
in guest RAM. This memory is expected to hold a copy of the following
|
||||
structure:
|
||||
structure::
|
||||
|
||||
struct pvclock_wall_clock {
|
||||
struct pvclock_wall_clock {
|
||||
u32 version;
|
||||
u32 sec;
|
||||
u32 nsec;
|
||||
} __attribute__((__packed__));
|
||||
} __attribute__((__packed__));
|
||||
|
||||
whose data will be filled in by the hypervisor. The hypervisor is only
|
||||
guaranteed to update this data at the moment of MSR write.
|
||||
Users that want to reliably query this information more than once have
|
||||
to write more than once to this MSR. Fields have the following meanings:
|
||||
|
||||
version: guest has to check version before and after grabbing
|
||||
version:
|
||||
guest has to check version before and after grabbing
|
||||
time information and check that they are both equal and even.
|
||||
An odd version indicates an in-progress update.
|
||||
|
||||
sec: number of seconds for wallclock at time of boot.
|
||||
sec:
|
||||
number of seconds for wallclock at time of boot.
|
||||
|
||||
nsec: number of nanoseconds for wallclock at time of boot.
|
||||
nsec:
|
||||
number of nanoseconds for wallclock at time of boot.
|
||||
|
||||
In order to get the current wallclock time, the system_time from
|
||||
MSR_KVM_SYSTEM_TIME_NEW needs to be added.
|
||||
@ -47,13 +56,15 @@ MSR_KVM_WALL_CLOCK_NEW: 0x4b564d00
|
||||
Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
|
||||
leaf prior to usage.
|
||||
|
||||
MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
|
||||
MSR_KVM_SYSTEM_TIME_NEW:
|
||||
0x4b564d01
|
||||
|
||||
data: 4-byte aligned physical address of a memory area which must be in
|
||||
data:
|
||||
4-byte aligned physical address of a memory area which must be in
|
||||
guest RAM, plus an enable bit in bit 0. This memory is expected to hold
|
||||
a copy of the following structure:
|
||||
a copy of the following structure::
|
||||
|
||||
struct pvclock_vcpu_time_info {
|
||||
struct pvclock_vcpu_time_info {
|
||||
u32 version;
|
||||
u32 pad0;
|
||||
u64 tsc_timestamp;
|
||||
@ -62,7 +73,7 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
|
||||
s8 tsc_shift;
|
||||
u8 flags;
|
||||
u8 pad[2];
|
||||
} __attribute__((__packed__)); /* 32 bytes */
|
||||
} __attribute__((__packed__)); /* 32 bytes */
|
||||
|
||||
whose data will be filled in by the hypervisor periodically. Only one
|
||||
write, or registration, is needed for each VCPU. The interval between
|
||||
@ -72,23 +83,28 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
|
||||
|
||||
Fields have the following meanings:
|
||||
|
||||
version: guest has to check version before and after grabbing
|
||||
version:
|
||||
guest has to check version before and after grabbing
|
||||
time information and check that they are both equal and even.
|
||||
An odd version indicates an in-progress update.
|
||||
|
||||
tsc_timestamp: the tsc value at the current VCPU at the time
|
||||
tsc_timestamp:
|
||||
the tsc value at the current VCPU at the time
|
||||
of the update of this structure. Guests can subtract this value
|
||||
from current tsc to derive a notion of elapsed time since the
|
||||
structure update.
|
||||
|
||||
system_time: a host notion of monotonic time, including sleep
|
||||
system_time:
|
||||
a host notion of monotonic time, including sleep
|
||||
time at the time this structure was last updated. Unit is
|
||||
nanoseconds.
|
||||
|
||||
tsc_to_system_mul: multiplier to be used when converting
|
||||
tsc_to_system_mul:
|
||||
multiplier to be used when converting
|
||||
tsc-related quantity to nanoseconds
|
||||
|
||||
tsc_shift: shift to be used when converting tsc-related
|
||||
tsc_shift:
|
||||
shift to be used when converting tsc-related
|
||||
quantity to nanoseconds. This shift will ensure that
|
||||
multiplication with tsc_to_system_mul does not overflow.
|
||||
A positive value denotes a left shift, a negative value
|
||||
@ -96,7 +112,7 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
|
||||
|
||||
The conversion from tsc to nanoseconds involves an additional
|
||||
right shift by 32 bits. With this information, guests can
|
||||
derive per-CPU time by doing:
|
||||
derive per-CPU time by doing::
|
||||
|
||||
time = (current_tsc - tsc_timestamp)
|
||||
if (tsc_shift >= 0)
|
||||
@ -106,29 +122,34 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
|
||||
time = (time * tsc_to_system_mul) >> 32
|
||||
time = time + system_time
|
||||
|
||||
flags: bits in this field indicate extended capabilities
|
||||
flags:
|
||||
bits in this field indicate extended capabilities
|
||||
coordinated between the guest and the hypervisor. Availability
|
||||
of specific flags has to be checked in 0x40000001 cpuid leaf.
|
||||
Current flags are:
|
||||
|
||||
flag bit | cpuid bit | meaning
|
||||
-------------------------------------------------------------
|
||||
| | time measures taken across
|
||||
0 | 24 | multiple cpus are guaranteed to
|
||||
| | be monotonic
|
||||
-------------------------------------------------------------
|
||||
| | guest vcpu has been paused by
|
||||
1 | N/A | the host
|
||||
| | See 4.70 in api.txt
|
||||
-------------------------------------------------------------
|
||||
|
||||
+-----------+--------------+----------------------------------+
|
||||
| flag bit | cpuid bit | meaning |
|
||||
+-----------+--------------+----------------------------------+
|
||||
| | | time measures taken across |
|
||||
| 0 | 24 | multiple cpus are guaranteed to |
|
||||
| | | be monotonic |
|
||||
+-----------+--------------+----------------------------------+
|
||||
| | | guest vcpu has been paused by |
|
||||
| 1 | N/A | the host |
|
||||
| | | See 4.70 in api.txt |
|
||||
+-----------+--------------+----------------------------------+
|
||||
|
||||
Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
|
||||
leaf prior to usage.
|
||||
|
||||
|
||||
MSR_KVM_WALL_CLOCK: 0x11
|
||||
MSR_KVM_WALL_CLOCK:
|
||||
0x11
|
||||
|
||||
data and functioning: same as MSR_KVM_WALL_CLOCK_NEW. Use that instead.
|
||||
data and functioning:
|
||||
same as MSR_KVM_WALL_CLOCK_NEW. Use that instead.
|
||||
|
||||
This MSR falls outside the reserved KVM range and may be removed in the
|
||||
future. Its usage is deprecated.
|
||||
@ -136,9 +157,11 @@ MSR_KVM_WALL_CLOCK: 0x11
|
||||
Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid
|
||||
leaf prior to usage.
|
||||
|
||||
MSR_KVM_SYSTEM_TIME: 0x12
|
||||
MSR_KVM_SYSTEM_TIME:
|
||||
0x12
|
||||
|
||||
data and functioning: same as MSR_KVM_SYSTEM_TIME_NEW. Use that instead.
|
||||
data and functioning:
|
||||
same as MSR_KVM_SYSTEM_TIME_NEW. Use that instead.
|
||||
|
||||
This MSR falls outside the reserved KVM range and may be removed in the
|
||||
future. Its usage is deprecated.
|
||||
@ -146,7 +169,7 @@ MSR_KVM_SYSTEM_TIME: 0x12
|
||||
Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid
|
||||
leaf prior to usage.
|
||||
|
||||
The suggested algorithm for detecting kvmclock presence is then:
|
||||
The suggested algorithm for detecting kvmclock presence is then::
|
||||
|
||||
if (!kvm_para_available()) /* refer to cpuid.txt */
|
||||
return NON_PRESENT;
|
||||
@ -163,8 +186,11 @@ MSR_KVM_SYSTEM_TIME: 0x12
|
||||
} else
|
||||
return NON_PRESENT;
|
||||
|
||||
MSR_KVM_ASYNC_PF_EN: 0x4b564d02
|
||||
data: Bits 63-6 hold 64-byte aligned physical address of a
|
||||
MSR_KVM_ASYNC_PF_EN:
|
||||
0x4b564d02
|
||||
|
||||
data:
|
||||
Bits 63-6 hold 64-byte aligned physical address of a
|
||||
64 byte memory area which must be in guest RAM and must be
|
||||
zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1
|
||||
when asynchronous page faults are enabled on the vcpu 0 when
|
||||
@ -200,20 +226,22 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02
|
||||
Currently type 2 APF will be always delivered on the same vcpu as
|
||||
type 1 was, but guest should not rely on that.
|
||||
|
||||
MSR_KVM_STEAL_TIME: 0x4b564d03
|
||||
MSR_KVM_STEAL_TIME:
|
||||
0x4b564d03
|
||||
|
||||
data: 64-byte alignment physical address of a memory area which must be
|
||||
data:
|
||||
64-byte alignment physical address of a memory area which must be
|
||||
in guest RAM, plus an enable bit in bit 0. This memory is expected to
|
||||
hold a copy of the following structure:
|
||||
hold a copy of the following structure::
|
||||
|
||||
struct kvm_steal_time {
|
||||
struct kvm_steal_time {
|
||||
__u64 steal;
|
||||
__u32 version;
|
||||
__u32 flags;
|
||||
__u8 preempted;
|
||||
__u8 u8_pad[3];
|
||||
__u32 pad[11];
|
||||
}
|
||||
}
|
||||
|
||||
whose data will be filled in by the hypervisor periodically. Only one
|
||||
write, or registration, is needed for each VCPU. The interval between
|
||||
@ -224,25 +252,32 @@ MSR_KVM_STEAL_TIME: 0x4b564d03
|
||||
|
||||
Fields have the following meanings:
|
||||
|
||||
version: a sequence counter. In other words, guest has to check
|
||||
version:
|
||||
a sequence counter. In other words, guest has to check
|
||||
this field before and after grabbing time information and make
|
||||
sure they are both equal and even. An odd version indicates an
|
||||
in-progress update.
|
||||
|
||||
flags: At this point, always zero. May be used to indicate
|
||||
flags:
|
||||
At this point, always zero. May be used to indicate
|
||||
changes in this structure in the future.
|
||||
|
||||
steal: the amount of time in which this vCPU did not run, in
|
||||
steal:
|
||||
the amount of time in which this vCPU did not run, in
|
||||
nanoseconds. Time during which the vcpu is idle, will not be
|
||||
reported as steal time.
|
||||
|
||||
preempted: indicate the vCPU who owns this struct is running or
|
||||
preempted:
|
||||
indicate the vCPU who owns this struct is running or
|
||||
not. Non-zero values mean the vCPU has been preempted. Zero
|
||||
means the vCPU is not preempted. NOTE, it is always zero if the
|
||||
the hypervisor doesn't support this field.
|
||||
|
||||
MSR_KVM_EOI_EN: 0x4b564d04
|
||||
data: Bit 0 is 1 when PV end of interrupt is enabled on the vcpu; 0
|
||||
MSR_KVM_EOI_EN:
|
||||
0x4b564d04
|
||||
|
||||
data:
|
||||
Bit 0 is 1 when PV end of interrupt is enabled on the vcpu; 0
|
||||
when disabled. Bit 1 is reserved and must be zero. When PV end of
|
||||
interrupt is enabled (bit 0 set), bits 63-2 hold a 4-byte aligned
|
||||
physical address of a 4 byte memory area which must be in guest RAM and
|
||||
@ -274,11 +309,13 @@ MSR_KVM_EOI_EN: 0x4b564d04
|
||||
clear it using a single CPU instruction, such as test and clear, or
|
||||
compare and exchange.
|
||||
|
||||
MSR_KVM_POLL_CONTROL: 0x4b564d05
|
||||
MSR_KVM_POLL_CONTROL:
|
||||
0x4b564d05
|
||||
|
||||
Control host-side polling.
|
||||
|
||||
data: Bit 0 enables (1) or disables (0) host-side HLT polling logic.
|
||||
data:
|
||||
Bit 0 enables (1) or disables (0) host-side HLT polling logic.
|
||||
|
||||
KVM guests can request the host not to poll on HLT, for example if
|
||||
they are performing polling themselves.
|
||||
|
@ -1,3 +1,6 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
==========
|
||||
Nested VMX
|
||||
==========
|
||||
|
||||
@ -41,9 +44,9 @@ No modifications are required to user space (qemu). However, qemu's default
|
||||
emulated CPU type (qemu64) does not list the "VMX" CPU feature, so it must be
|
||||
explicitly enabled, by giving qemu one of the following options:
|
||||
|
||||
-cpu host (emulated CPU has all features of the real CPU)
|
||||
- cpu host (emulated CPU has all features of the real CPU)
|
||||
|
||||
-cpu qemu64,+vmx (add just the vmx feature to a named CPU type)
|
||||
- cpu qemu64,+vmx (add just the vmx feature to a named CPU type)
|
||||
|
||||
|
||||
ABIs
|
||||
@ -75,6 +78,8 @@ of this structure changes, this can break live migration across KVM versions.
|
||||
VMCS12_REVISION (from vmx.c) should be changed if struct vmcs12 or its inner
|
||||
struct shadow_vmcs is ever changed.
|
||||
|
||||
::
|
||||
|
||||
typedef u64 natural_width;
|
||||
struct __packed vmcs12 {
|
||||
/* According to the Intel spec, a VMCS region must start with
|
||||
@ -220,21 +225,21 @@ Authors
|
||||
-------
|
||||
|
||||
These patches were written by:
|
||||
Abel Gordon, abelg <at> il.ibm.com
|
||||
Nadav Har'El, nyh <at> il.ibm.com
|
||||
Orit Wasserman, oritw <at> il.ibm.com
|
||||
Ben-Ami Yassor, benami <at> il.ibm.com
|
||||
Muli Ben-Yehuda, muli <at> il.ibm.com
|
||||
- Abel Gordon, abelg <at> il.ibm.com
|
||||
- Nadav Har'El, nyh <at> il.ibm.com
|
||||
- Orit Wasserman, oritw <at> il.ibm.com
|
||||
- Ben-Ami Yassor, benami <at> il.ibm.com
|
||||
- Muli Ben-Yehuda, muli <at> il.ibm.com
|
||||
|
||||
With contributions by:
|
||||
Anthony Liguori, aliguori <at> us.ibm.com
|
||||
Mike Day, mdday <at> us.ibm.com
|
||||
Michael Factor, factor <at> il.ibm.com
|
||||
Zvi Dubitzky, dubi <at> il.ibm.com
|
||||
- Anthony Liguori, aliguori <at> us.ibm.com
|
||||
- Mike Day, mdday <at> us.ibm.com
|
||||
- Michael Factor, factor <at> il.ibm.com
|
||||
- Zvi Dubitzky, dubi <at> il.ibm.com
|
||||
|
||||
And valuable reviews by:
|
||||
Avi Kivity, avi <at> redhat.com
|
||||
Gleb Natapov, gleb <at> redhat.com
|
||||
Marcelo Tosatti, mtosatti <at> redhat.com
|
||||
Kevin Tian, kevin.tian <at> intel.com
|
||||
and others.
|
||||
- Avi Kivity, avi <at> redhat.com
|
||||
- Gleb Natapov, gleb <at> redhat.com
|
||||
- Marcelo Tosatti, mtosatti <at> redhat.com
|
||||
- Kevin Tian, kevin.tian <at> intel.com
|
||||
- and others.
|
@ -1,3 +1,6 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=================================
|
||||
The PPC KVM paravirtual interface
|
||||
=================================
|
||||
|
||||
@ -34,8 +37,9 @@ up the hypercall. To call a hypercall, just call these instructions.
|
||||
|
||||
The parameters are as follows:
|
||||
|
||||
======== ================ ================
|
||||
Register IN OUT
|
||||
|
||||
======== ================ ================
|
||||
r0 - volatile
|
||||
r3 1st parameter Return code
|
||||
r4 2nd parameter 1st output value
|
||||
@ -47,6 +51,7 @@ The parameters are as follows:
|
||||
r10 8th parameter 7th output value
|
||||
r11 hypercall number 8th output value
|
||||
r12 - volatile
|
||||
======== ================ ================
|
||||
|
||||
Hypercall definitions are shared in generic code, so the same hypercall numbers
|
||||
apply for x86 and powerpc alike with the exception that each KVM hypercall
|
||||
@ -54,11 +59,13 @@ also needs to be ORed with the KVM vendor code which is (42 << 16).
|
||||
|
||||
Return codes can be as follows:
|
||||
|
||||
==== =========================
|
||||
Code Meaning
|
||||
|
||||
==== =========================
|
||||
0 Success
|
||||
12 Hypercall not implemented
|
||||
<0 Error
|
||||
==== =========================
|
||||
|
||||
The magic page
|
||||
==============
|
||||
@ -72,7 +79,7 @@ desired location. The first parameter indicates the effective address when the
|
||||
MMU is enabled. The second parameter indicates the address in real mode, if
|
||||
applicable to the target. For now, we always map the page to -4096. This way we
|
||||
can access it using absolute load and store functions. The following
|
||||
instruction reads the first field of the magic page:
|
||||
instruction reads the first field of the magic page::
|
||||
|
||||
ld rX, -4096(0)
|
||||
|
||||
@ -93,8 +100,10 @@ a bitmap of available features inside the magic page.
|
||||
|
||||
The following enhancements to the magic page are currently available:
|
||||
|
||||
============================ =======================================
|
||||
KVM_MAGIC_FEAT_SR Maps SR registers r/w in the magic page
|
||||
KVM_MAGIC_FEAT_MAS0_TO_SPRG7 Maps MASn, ESR, PIR and high SPRGs
|
||||
============================ =======================================
|
||||
|
||||
For enhanced features in the magic page, please check for the existence of the
|
||||
feature before using them!
|
||||
@ -121,8 +130,8 @@ when entering the guest or don't have any impact on the hypervisor's behavior.
|
||||
|
||||
The following bits are safe to be set inside the guest:
|
||||
|
||||
MSR_EE
|
||||
MSR_RI
|
||||
- MSR_EE
|
||||
- MSR_RI
|
||||
|
||||
If any other bit changes in the MSR, please still use mtmsr(d).
|
||||
|
||||
@ -138,9 +147,9 @@ guest. Implementing any of those mappings is optional, as the instruction traps
|
||||
also act on the shared page. So calling privileged instructions still works as
|
||||
before.
|
||||
|
||||
======================= ================================
|
||||
From To
|
||||
==== ==
|
||||
|
||||
======================= ================================
|
||||
mfmsr rX ld rX, magic_page->msr
|
||||
mfsprg rX, 0 ld rX, magic_page->sprg0
|
||||
mfsprg rX, 1 ld rX, magic_page->sprg1
|
||||
@ -173,7 +182,7 @@ mtsrin rX, rY b <special mtsrin section>
|
||||
|
||||
[BookE only]
|
||||
wrteei [0|1] b <special wrteei section>
|
||||
|
||||
======================= ================================
|
||||
|
||||
Some instructions require more logic to determine what's going on than a load
|
||||
or store instruction can deliver. To enable patching of those, we keep some
|
||||
@ -191,6 +200,7 @@ for example.
|
||||
|
||||
Hypercall ABIs in KVM on PowerPC
|
||||
=================================
|
||||
|
||||
1) KVM hypercalls (ePAPR)
|
||||
|
||||
These are ePAPR compliant hypercall implementation (mentioned above). Even
|
@ -1,3 +1,6 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
================================
|
||||
Review checklist for kvm patches
|
||||
================================
|
||||
|
@ -1,3 +1,6 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=============================
|
||||
The s390 DIAGNOSE call on KVM
|
||||
=============================
|
||||
|
||||
@ -16,12 +19,12 @@ DIAGNOSE calls by the guest cause a mandatory intercept. This implies
|
||||
all supported DIAGNOSE calls need to be handled by either KVM or its
|
||||
userspace.
|
||||
|
||||
All DIAGNOSE calls supported by KVM use the RS-a format:
|
||||
All DIAGNOSE calls supported by KVM use the RS-a format::
|
||||
|
||||
--------------------------------------
|
||||
| '83' | R1 | R3 | B2 | D2 |
|
||||
--------------------------------------
|
||||
0 8 12 16 20 31
|
||||
--------------------------------------
|
||||
| '83' | R1 | R3 | B2 | D2 |
|
||||
--------------------------------------
|
||||
0 8 12 16 20 31
|
||||
|
||||
The second-operand address (obtained by the base/displacement calculation)
|
||||
is not used to address data. Instead, bits 48-63 of this address specify
|
@ -1,17 +1,21 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
Timekeeping Virtualization for X86-Based Architectures
|
||||
======================================================
|
||||
Timekeeping Virtualization for X86-Based Architectures
|
||||
======================================================
|
||||
|
||||
Zachary Amsden <zamsden@redhat.com>
|
||||
Copyright (c) 2010, Red Hat. All rights reserved.
|
||||
:Author: Zachary Amsden <zamsden@redhat.com>
|
||||
:Copyright: (c) 2010, Red Hat. All rights reserved.
|
||||
|
||||
1) Overview
|
||||
2) Timing Devices
|
||||
3) TSC Hardware
|
||||
4) Virtualization Problems
|
||||
.. Contents
|
||||
|
||||
=========================================================================
|
||||
1) Overview
|
||||
2) Timing Devices
|
||||
3) TSC Hardware
|
||||
4) Virtualization Problems
|
||||
|
||||
1) Overview
|
||||
1. Overview
|
||||
===========
|
||||
|
||||
One of the most complicated parts of the X86 platform, and specifically,
|
||||
the virtualization of this platform is the plethora of timing devices available
|
||||
@ -27,15 +31,15 @@ The purpose of this document is to collect data and information relevant to
|
||||
timekeeping which may be difficult to find elsewhere, specifically,
|
||||
information relevant to KVM and hardware-based virtualization.
|
||||
|
||||
=========================================================================
|
||||
|
||||
2) Timing Devices
|
||||
2. Timing Devices
|
||||
=================
|
||||
|
||||
First we discuss the basic hardware devices available. TSC and the related
|
||||
KVM clock are special enough to warrant a full exposition and are described in
|
||||
the following section.
|
||||
|
||||
2.1) i8254 - PIT
|
||||
2.1. i8254 - PIT
|
||||
----------------
|
||||
|
||||
One of the first timer devices available is the programmable interrupt timer,
|
||||
or PIT. The PIT has a fixed frequency 1.193182 MHz base clock and three
|
||||
@ -50,13 +54,13 @@ The PIT uses I/O ports 0x40 - 0x43. Access to the 16-bit counters is done
|
||||
using single or multiple byte access to the I/O ports. There are 6 modes
|
||||
available, but not all modes are available to all timers, as only timer 2
|
||||
has a connected gate input, required for modes 1 and 5. The gate line is
|
||||
controlled by port 61h, bit 0, as illustrated in the following diagram.
|
||||
controlled by port 61h, bit 0, as illustrated in the following diagram::
|
||||
|
||||
-------------- ----------------
|
||||
| | | |
|
||||
| 1.1932 MHz |---------->| CLOCK OUT | ---------> IRQ 0
|
||||
| Clock | | | |
|
||||
-------------- | +->| GATE TIMER 0 |
|
||||
-------------- ----------------
|
||||
| | | |
|
||||
| 1.1932 MHz|---------->| CLOCK OUT | ---------> IRQ 0
|
||||
| Clock | | | |
|
||||
-------------- | +->| GATE TIMER 0 |
|
||||
| ----------------
|
||||
|
|
||||
| ----------------
|
||||
@ -70,29 +74,33 @@ controlled by port 61h, bit 0, as illustrated in the following diagram.
|
||||
| | |
|
||||
|------>| CLOCK OUT | ---------> Port 61h, bit 5
|
||||
| | |
|
||||
Port 61h, bit 0 ---------->| GATE TIMER 2 | \_.---- ____
|
||||
Port 61h, bit 0 -------->| GATE TIMER 2 | \_.---- ____
|
||||
---------------- _| )--|LPF|---Speaker
|
||||
/ *---- \___/
|
||||
Port 61h, bit 1 -----------------------------------/
|
||||
Port 61h, bit 1 ---------------------------------/
|
||||
|
||||
The timer modes are now described.
|
||||
|
||||
Mode 0: Single Timeout. This is a one-shot software timeout that counts down
|
||||
Mode 0: Single Timeout.
|
||||
This is a one-shot software timeout that counts down
|
||||
when the gate is high (always true for timers 0 and 1). When the count
|
||||
reaches zero, the output goes high.
|
||||
|
||||
Mode 1: Triggered One-shot. The output is initially set high. When the gate
|
||||
Mode 1: Triggered One-shot.
|
||||
The output is initially set high. When the gate
|
||||
line is set high, a countdown is initiated (which does not stop if the gate is
|
||||
lowered), during which the output is set low. When the count reaches zero,
|
||||
the output goes high.
|
||||
|
||||
Mode 2: Rate Generator. The output is initially set high. When the countdown
|
||||
Mode 2: Rate Generator.
|
||||
The output is initially set high. When the countdown
|
||||
reaches 1, the output goes low for one count and then returns high. The value
|
||||
is reloaded and the countdown automatically resumes. If the gate line goes
|
||||
low, the count is halted. If the output is low when the gate is lowered, the
|
||||
output automatically goes high (this only affects timer 2).
|
||||
|
||||
Mode 3: Square Wave. This generates a high / low square wave. The count
|
||||
Mode 3: Square Wave.
|
||||
This generates a high / low square wave. The count
|
||||
determines the length of the pulse, which alternates between high and low
|
||||
when zero is reached. The count only proceeds when gate is high and is
|
||||
automatically reloaded on reaching zero. The count is decremented twice at
|
||||
@ -103,12 +111,14 @@ Mode 3: Square Wave. This generates a high / low square wave. The count
|
||||
values are not observed when reading. This is the intended mode for timer 2,
|
||||
which generates sine-like tones by low-pass filtering the square wave output.
|
||||
|
||||
Mode 4: Software Strobe. After programming this mode and loading the counter,
|
||||
Mode 4: Software Strobe.
|
||||
After programming this mode and loading the counter,
|
||||
the output remains high until the counter reaches zero. Then the output
|
||||
goes low for 1 clock cycle and returns high. The counter is not reloaded.
|
||||
Counting only occurs when gate is high.
|
||||
|
||||
Mode 5: Hardware Strobe. After programming and loading the counter, the
|
||||
Mode 5: Hardware Strobe.
|
||||
After programming and loading the counter, the
|
||||
output remains high. When the gate is raised, a countdown is initiated
|
||||
(which does not stop if the gate is lowered). When the counter reaches zero,
|
||||
the output goes low for 1 clock cycle and then returns high. The counter is
|
||||
@ -118,49 +128,49 @@ In addition to normal binary counting, the PIT supports BCD counting. The
|
||||
command port, 0x43 is used to set the counter and mode for each of the three
|
||||
timers.
|
||||
|
||||
PIT commands, issued to port 0x43, using the following bit encoding:
|
||||
PIT commands, issued to port 0x43, using the following bit encoding::
|
||||
|
||||
Bit 7-4: Command (See table below)
|
||||
Bit 3-1: Mode (000 = Mode 0, 101 = Mode 5, 11X = undefined)
|
||||
Bit 0 : Binary (0) / BCD (1)
|
||||
Bit 7-4: Command (See table below)
|
||||
Bit 3-1: Mode (000 = Mode 0, 101 = Mode 5, 11X = undefined)
|
||||
Bit 0 : Binary (0) / BCD (1)
|
||||
|
||||
Command table:
|
||||
Command table::
|
||||
|
||||
0000 - Latch Timer 0 count for port 0x40
|
||||
0000 - Latch Timer 0 count for port 0x40
|
||||
sample and hold the count to be read in port 0x40;
|
||||
additional commands ignored until counter is read;
|
||||
mode bits ignored.
|
||||
|
||||
0001 - Set Timer 0 LSB mode for port 0x40
|
||||
0001 - Set Timer 0 LSB mode for port 0x40
|
||||
set timer to read LSB only and force MSB to zero;
|
||||
mode bits set timer mode
|
||||
|
||||
0010 - Set Timer 0 MSB mode for port 0x40
|
||||
0010 - Set Timer 0 MSB mode for port 0x40
|
||||
set timer to read MSB only and force LSB to zero;
|
||||
mode bits set timer mode
|
||||
|
||||
0011 - Set Timer 0 16-bit mode for port 0x40
|
||||
0011 - Set Timer 0 16-bit mode for port 0x40
|
||||
set timer to read / write LSB first, then MSB;
|
||||
mode bits set timer mode
|
||||
|
||||
0100 - Latch Timer 1 count for port 0x41 - as described above
|
||||
0101 - Set Timer 1 LSB mode for port 0x41 - as described above
|
||||
0110 - Set Timer 1 MSB mode for port 0x41 - as described above
|
||||
0111 - Set Timer 1 16-bit mode for port 0x41 - as described above
|
||||
0100 - Latch Timer 1 count for port 0x41 - as described above
|
||||
0101 - Set Timer 1 LSB mode for port 0x41 - as described above
|
||||
0110 - Set Timer 1 MSB mode for port 0x41 - as described above
|
||||
0111 - Set Timer 1 16-bit mode for port 0x41 - as described above
|
||||
|
||||
1000 - Latch Timer 2 count for port 0x42 - as described above
|
||||
1001 - Set Timer 2 LSB mode for port 0x42 - as described above
|
||||
1010 - Set Timer 2 MSB mode for port 0x42 - as described above
|
||||
1011 - Set Timer 2 16-bit mode for port 0x42 as described above
|
||||
1000 - Latch Timer 2 count for port 0x42 - as described above
|
||||
1001 - Set Timer 2 LSB mode for port 0x42 - as described above
|
||||
1010 - Set Timer 2 MSB mode for port 0x42 - as described above
|
||||
1011 - Set Timer 2 16-bit mode for port 0x42 as described above
|
||||
|
||||
1101 - General counter latch
|
||||
1101 - General counter latch
|
||||
Latch combination of counters into corresponding ports
|
||||
Bit 3 = Counter 2
|
||||
Bit 2 = Counter 1
|
||||
Bit 1 = Counter 0
|
||||
Bit 0 = Unused
|
||||
|
||||
1110 - Latch timer status
|
||||
1110 - Latch timer status
|
||||
Latch combination of counter mode into corresponding ports
|
||||
Bit 3 = Counter 2
|
||||
Bit 2 = Counter 1
|
||||
@ -177,7 +187,8 @@ Command table:
|
||||
Bit 3-1 = Mode
|
||||
Bit 0 = Binary (0) / BCD mode (1)
|
||||
|
||||
2.2) RTC
|
||||
2.2. RTC
|
||||
--------
|
||||
|
||||
The second device which was available in the original PC was the MC146818 real
|
||||
time clock. The original device is now obsolete, and usually emulated by the
|
||||
@ -201,21 +212,21 @@ in progress, as indicated in the status register.
|
||||
The clock uses a 32.768kHz crystal, so bits 6-4 of register A should be
|
||||
programmed to a 32kHz divider if the RTC is to count seconds.
|
||||
|
||||
This is the RAM map originally used for the RTC/CMOS:
|
||||
This is the RAM map originally used for the RTC/CMOS::
|
||||
|
||||
Location Size Description
|
||||
------------------------------------------
|
||||
00h byte Current second (BCD)
|
||||
01h byte Seconds alarm (BCD)
|
||||
02h byte Current minute (BCD)
|
||||
03h byte Minutes alarm (BCD)
|
||||
04h byte Current hour (BCD)
|
||||
05h byte Hours alarm (BCD)
|
||||
06h byte Current day of week (BCD)
|
||||
07h byte Current day of month (BCD)
|
||||
08h byte Current month (BCD)
|
||||
09h byte Current year (BCD)
|
||||
0Ah byte Register A
|
||||
Location Size Description
|
||||
------------------------------------------
|
||||
00h byte Current second (BCD)
|
||||
01h byte Seconds alarm (BCD)
|
||||
02h byte Current minute (BCD)
|
||||
03h byte Minutes alarm (BCD)
|
||||
04h byte Current hour (BCD)
|
||||
05h byte Hours alarm (BCD)
|
||||
06h byte Current day of week (BCD)
|
||||
07h byte Current day of month (BCD)
|
||||
08h byte Current month (BCD)
|
||||
09h byte Current year (BCD)
|
||||
0Ah byte Register A
|
||||
bit 7 = Update in progress
|
||||
bit 6-4 = Divider for clock
|
||||
000 = 4.194 MHz
|
||||
@ -234,7 +245,7 @@ Location Size Description
|
||||
1101 = 125 mS
|
||||
1110 = 250 mS
|
||||
1111 = 500 mS
|
||||
0Bh byte Register B
|
||||
0Bh byte Register B
|
||||
bit 7 = Run (0) / Halt (1)
|
||||
bit 6 = Periodic interrupt enable
|
||||
bit 5 = Alarm interrupt enable
|
||||
@ -243,19 +254,20 @@ Location Size Description
|
||||
bit 2 = BCD calendar (0) / Binary (1)
|
||||
bit 1 = 12-hour mode (0) / 24-hour mode (1)
|
||||
bit 0 = 0 (DST off) / 1 (DST enabled)
|
||||
OCh byte Register C (read only)
|
||||
OCh byte Register C (read only)
|
||||
bit 7 = interrupt request flag (IRQF)
|
||||
bit 6 = periodic interrupt flag (PF)
|
||||
bit 5 = alarm interrupt flag (AF)
|
||||
bit 4 = update interrupt flag (UF)
|
||||
bit 3-0 = reserved
|
||||
ODh byte Register D (read only)
|
||||
ODh byte Register D (read only)
|
||||
bit 7 = RTC has power
|
||||
bit 6-0 = reserved
|
||||
32h byte Current century BCD (*)
|
||||
32h byte Current century BCD (*)
|
||||
(*) location vendor specific and now determined from ACPI global tables
|
||||
|
||||
2.3) APIC
|
||||
2.3. APIC
|
||||
---------
|
||||
|
||||
On Pentium and later processors, an on-board timer is available to each CPU
|
||||
as part of the Advanced Programmable Interrupt Controller. The APIC is
|
||||
@ -276,7 +288,8 @@ timer is programmed through the LVT (local vector timer) register, is capable
|
||||
of one-shot or periodic operation, and is based on the bus clock divided down
|
||||
by the programmable divider register.
|
||||
|
||||
2.4) HPET
|
||||
2.4. HPET
|
||||
---------
|
||||
|
||||
HPET is quite complex, and was originally intended to replace the PIT / RTC
|
||||
support of the X86 PC. It remains to be seen whether that will be the case, as
|
||||
@ -297,7 +310,8 @@ indicated through ACPI tables by the BIOS.
|
||||
Detailed specification of the HPET is beyond the current scope of this
|
||||
document, as it is also very well documented elsewhere.
|
||||
|
||||
2.5) Offboard Timers
|
||||
2.5. Offboard Timers
|
||||
--------------------
|
||||
|
||||
Several cards, both proprietary (watchdog boards) and commonplace (e1000) have
|
||||
timing chips built into the cards which may have registers which are accessible
|
||||
@ -307,9 +321,8 @@ general frowned upon as not playing by the agreed rules of the game. Such a
|
||||
timer device would require additional support to be virtualized properly and is
|
||||
not considered important at this time as no known operating system does this.
|
||||
|
||||
=========================================================================
|
||||
|
||||
3) TSC Hardware
|
||||
3. TSC Hardware
|
||||
===============
|
||||
|
||||
The TSC or time stamp counter is relatively simple in theory; it counts
|
||||
instruction cycles issued by the processor, which can be used as a measure of
|
||||
@ -340,7 +353,8 @@ allows the guest visible TSC to be offset by a constant. Newer implementations
|
||||
promise to allow the TSC to additionally be scaled, but this hardware is not
|
||||
yet widely available.
|
||||
|
||||
3.1) TSC synchronization
|
||||
3.1. TSC synchronization
|
||||
------------------------
|
||||
|
||||
The TSC is a CPU-local clock in most implementations. This means, on SMP
|
||||
platforms, the TSCs of different CPUs may start at different times depending
|
||||
@ -357,7 +371,8 @@ practice, getting a perfectly synchronized TSC will not be possible unless all
|
||||
values are read from the same clock, which generally only is possible on single
|
||||
socket systems or those with special hardware support.
|
||||
|
||||
3.2) TSC and CPU hotplug
|
||||
3.2. TSC and CPU hotplug
|
||||
------------------------
|
||||
|
||||
As touched on already, CPUs which arrive later than the boot time of the system
|
||||
may not have a TSC value that is synchronized with the rest of the system.
|
||||
@ -367,7 +382,8 @@ a guarantee. This can have the effect of bringing a system from a state where
|
||||
TSC is synchronized back to a state where TSC synchronization flaws, however
|
||||
small, may be exposed to the OS and any virtualization environment.
|
||||
|
||||
3.3) TSC and multi-socket / NUMA
|
||||
3.3. TSC and multi-socket / NUMA
|
||||
--------------------------------
|
||||
|
||||
Multi-socket systems, especially large multi-socket systems are likely to have
|
||||
individual clocksources rather than a single, universally distributed clock.
|
||||
@ -385,7 +401,8 @@ standards for telecommunications and computer equipment.
|
||||
It is recommended not to trust the TSCs to remain synchronized on NUMA or
|
||||
multiple socket systems for these reasons.
|
||||
|
||||
3.4) TSC and C-states
|
||||
3.4. TSC and C-states
|
||||
---------------------
|
||||
|
||||
C-states, or idling states of the processor, especially C1E and deeper sleep
|
||||
states may be problematic for TSC as well. The TSC may stop advancing in such
|
||||
@ -396,7 +413,8 @@ based on CPU and chipset identifications.
|
||||
The TSC in such a case may be corrected by catching it up to a known external
|
||||
clocksource.
|
||||
|
||||
3.5) TSC frequency change / P-states
|
||||
3.5. TSC frequency change / P-states
|
||||
------------------------------------
|
||||
|
||||
To make things slightly more interesting, some CPUs may change frequency. They
|
||||
may or may not run the TSC at the same rate, and because the frequency change
|
||||
@ -416,14 +434,16 @@ other processors. In such cases, the TSC on halted CPUs could advance faster
|
||||
than that of non-halted processors. AMD Turion processors are known to have
|
||||
this problem.
|
||||
|
||||
3.6) TSC and STPCLK / T-states
|
||||
3.6. TSC and STPCLK / T-states
|
||||
------------------------------
|
||||
|
||||
External signals given to the processor may also have the effect of stopping
|
||||
the TSC. This is typically done for thermal emergency power control to prevent
|
||||
an overheating condition, and typically, there is no way to detect that this
|
||||
condition has happened.
|
||||
|
||||
3.7) TSC virtualization - VMX
|
||||
3.7. TSC virtualization - VMX
|
||||
-----------------------------
|
||||
|
||||
VMX provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP
|
||||
instructions, which is enough for full virtualization of TSC in any manner. In
|
||||
@ -431,14 +451,16 @@ addition, VMX allows passing through the host TSC plus an additional TSC_OFFSET
|
||||
field specified in the VMCS. Special instructions must be used to read and
|
||||
write the VMCS field.
|
||||
|
||||
3.8) TSC virtualization - SVM
|
||||
3.8. TSC virtualization - SVM
|
||||
-----------------------------
|
||||
|
||||
SVM provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP
|
||||
instructions, which is enough for full virtualization of TSC in any manner. In
|
||||
addition, SVM allows passing through the host TSC plus an additional offset
|
||||
field specified in the SVM control block.
|
||||
|
||||
3.9) TSC feature bits in Linux
|
||||
3.9. TSC feature bits in Linux
|
||||
------------------------------
|
||||
|
||||
In summary, there is no way to guarantee the TSC remains in perfect
|
||||
synchronization unless it is explicitly guaranteed by the architecture. Even
|
||||
@ -448,13 +470,16 @@ despite being locally consistent.
|
||||
The following feature bits are used by Linux to signal various TSC attributes,
|
||||
but they can only be taken to be meaningful for UP or single node systems.
|
||||
|
||||
X86_FEATURE_TSC : The TSC is available in hardware
|
||||
X86_FEATURE_RDTSCP : The RDTSCP instruction is available
|
||||
X86_FEATURE_CONSTANT_TSC : The TSC rate is unchanged with P-states
|
||||
X86_FEATURE_NONSTOP_TSC : The TSC does not stop in C-states
|
||||
X86_FEATURE_TSC_RELIABLE : TSC sync checks are skipped (VMware)
|
||||
========================= =======================================
|
||||
X86_FEATURE_TSC The TSC is available in hardware
|
||||
X86_FEATURE_RDTSCP The RDTSCP instruction is available
|
||||
X86_FEATURE_CONSTANT_TSC The TSC rate is unchanged with P-states
|
||||
X86_FEATURE_NONSTOP_TSC The TSC does not stop in C-states
|
||||
X86_FEATURE_TSC_RELIABLE TSC sync checks are skipped (VMware)
|
||||
========================= =======================================
|
||||
|
||||
4) Virtualization Problems
|
||||
4. Virtualization Problems
|
||||
==========================
|
||||
|
||||
Timekeeping is especially problematic for virtualization because a number of
|
||||
challenges arise. The most obvious problem is that time is now shared between
|
||||
@ -473,7 +498,8 @@ BIOS, but not in such an extreme fashion. However, the fact that SMM mode may
|
||||
cause similar problems to virtualization makes it a good justification for
|
||||
solving many of these problems on bare metal.
|
||||
|
||||
4.1) Interrupt clocking
|
||||
4.1. Interrupt clocking
|
||||
-----------------------
|
||||
|
||||
One of the most immediate problems that occurs with legacy operating systems
|
||||
is that the system timekeeping routines are often designed to keep track of
|
||||
@ -502,7 +528,8 @@ thus requires interrupt slewing to keep proper time. It does use a low enough
|
||||
rate (ed: is it 18.2 Hz?) however that it has not yet been a problem in
|
||||
practice.
|
||||
|
||||
4.2) TSC sampling and serialization
|
||||
4.2. TSC sampling and serialization
|
||||
-----------------------------------
|
||||
|
||||
As the highest precision time source available, the cycle counter of the CPU
|
||||
has aroused much interest from developers. As explained above, this timer has
|
||||
@ -524,7 +551,8 @@ it may be necessary for an implementation to guard against "backwards" reads of
|
||||
the TSC as seen from other CPUs, even in an otherwise perfectly synchronized
|
||||
system.
|
||||
|
||||
4.3) Timespec aliasing
|
||||
4.3. Timespec aliasing
|
||||
----------------------
|
||||
|
||||
Additionally, this lack of serialization from the TSC poses another challenge
|
||||
when using results of the TSC when measured against another time source. As
|
||||
@ -548,7 +576,8 @@ This aliasing requires care in the computation and recalibration of kvmclock
|
||||
and any other values derived from TSC computation (such as TSC virtualization
|
||||
itself).
|
||||
|
||||
4.4) Migration
|
||||
4.4. Migration
|
||||
--------------
|
||||
|
||||
Migration of a virtual machine raises problems for timekeeping in two ways.
|
||||
First, the migration itself may take time, during which interrupts cannot be
|
||||
@ -566,7 +595,8 @@ always be caught up to the original rate. KVM clock avoids these problems by
|
||||
simply storing multipliers and offsets against the TSC for the guest to convert
|
||||
back into nanosecond resolution values.
|
||||
|
||||
4.5) Scheduling
|
||||
4.5. Scheduling
|
||||
---------------
|
||||
|
||||
Since scheduling may be based on precise timing and firing of interrupts, the
|
||||
scheduling algorithms of an operating system may be adversely affected by
|
||||
@ -579,7 +609,8 @@ In an attempt to work around this, several implementations have provided a
|
||||
paravirtualized scheduler clock, which reveals the true amount of CPU time for
|
||||
which a virtual machine has been running.
|
||||
|
||||
4.6) Watchdogs
|
||||
4.6. Watchdogs
|
||||
--------------
|
||||
|
||||
Watchdog timers, such as the lock detector in Linux may fire accidentally when
|
||||
running under hardware virtualization due to timer interrupts being delayed or
|
||||
@ -587,7 +618,8 @@ misinterpretation of the passage of real time. Usually, these warnings are
|
||||
spurious and can be ignored, but in some circumstances it may be necessary to
|
||||
disable such detection.
|
||||
|
||||
4.7) Delays and precision timing
|
||||
4.7. Delays and precision timing
|
||||
--------------------------------
|
||||
|
||||
Precise timing and delays may not be possible in a virtualized system. This
|
||||
can happen if the system is controlling physical hardware, or issues delays to
|
||||
@ -600,7 +632,8 @@ The second issue may cause performance problems, but this is unlikely to be a
|
||||
significant issue. In many cases these delays may be eliminated through
|
||||
configuration or paravirtualization.
|
||||
|
||||
4.8) Covert channels and leaks
|
||||
4.8. Covert channels and leaks
|
||||
------------------------------
|
||||
|
||||
In addition to the above problems, time information will inevitably leak to the
|
||||
guest about the host in anything but a perfect implementation of virtualized
|
File diff suppressed because it is too large
Load Diff
28
MAINTAINERS
28
MAINTAINERS
@ -2796,11 +2796,11 @@ F: drivers/block/aoe/
|
||||
|
||||
ATHEROS 71XX/9XXX GPIO DRIVER
|
||||
M: Alban Bedel <albeu@free.fr>
|
||||
S: Maintained
|
||||
W: https://github.com/AlbanBedel/linux
|
||||
T: git git://github.com/AlbanBedel/linux
|
||||
S: Maintained
|
||||
F: drivers/gpio/gpio-ath79.c
|
||||
F: Documentation/devicetree/bindings/gpio/gpio-ath79.txt
|
||||
F: drivers/gpio/gpio-ath79.c
|
||||
|
||||
ATHEROS 71XX/9XXX USB PHY DRIVER
|
||||
M: Alban Bedel <albeu@free.fr>
|
||||
@ -3422,8 +3422,8 @@ BROADCOM BRCMSTB GPIO DRIVER
|
||||
M: Gregory Fong <gregory.0xf0@gmail.com>
|
||||
L: bcm-kernel-feedback-list@broadcom.com
|
||||
S: Supported
|
||||
F: drivers/gpio/gpio-brcmstb.c
|
||||
F: Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.txt
|
||||
F: drivers/gpio/gpio-brcmstb.c
|
||||
|
||||
BROADCOM BRCMSTB I2C DRIVER
|
||||
M: Kamal Dasu <kdasu.kdev@gmail.com>
|
||||
@ -3481,8 +3481,8 @@ BROADCOM KONA GPIO DRIVER
|
||||
M: Ray Jui <rjui@broadcom.com>
|
||||
L: bcm-kernel-feedback-list@broadcom.com
|
||||
S: Supported
|
||||
F: drivers/gpio/gpio-bcm-kona.c
|
||||
F: Documentation/devicetree/bindings/gpio/brcm,kona-gpio.txt
|
||||
F: drivers/gpio/gpio-bcm-kona.c
|
||||
|
||||
BROADCOM NETXTREME-E ROCE DRIVER
|
||||
M: Selvin Xavier <selvin.xavier@broadcom.com>
|
||||
@ -3597,8 +3597,8 @@ F: sound/pci/bt87x.c
|
||||
|
||||
BT8XXGPIO DRIVER
|
||||
M: Michael Buesch <m@bues.ch>
|
||||
W: http://bu3sch.de/btgpio.php
|
||||
S: Maintained
|
||||
W: http://bu3sch.de/btgpio.php
|
||||
F: drivers/gpio/gpio-bt8xx.c
|
||||
|
||||
BTRFS FILE SYSTEM
|
||||
@ -7143,18 +7143,18 @@ GPIO SUBSYSTEM
|
||||
M: Linus Walleij <linus.walleij@linaro.org>
|
||||
M: Bartosz Golaszewski <bgolaszewski@baylibre.com>
|
||||
L: linux-gpio@vger.kernel.org
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
|
||||
S: Maintained
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
|
||||
F: Documentation/ABI/obsolete/sysfs-gpio
|
||||
F: Documentation/ABI/testing/gpio-cdev
|
||||
F: Documentation/admin-guide/gpio/
|
||||
F: Documentation/devicetree/bindings/gpio/
|
||||
F: Documentation/driver-api/gpio/
|
||||
F: Documentation/admin-guide/gpio/
|
||||
F: Documentation/ABI/testing/gpio-cdev
|
||||
F: Documentation/ABI/obsolete/sysfs-gpio
|
||||
F: drivers/gpio/
|
||||
F: include/asm-generic/gpio.h
|
||||
F: include/linux/gpio/
|
||||
F: include/linux/gpio.h
|
||||
F: include/linux/of_gpio.h
|
||||
F: include/asm-generic/gpio.h
|
||||
F: include/uapi/linux/gpio.h
|
||||
F: tools/gpio/
|
||||
|
||||
@ -8055,8 +8055,8 @@ F: drivers/scsi/ips.*
|
||||
ICH LPC AND GPIO DRIVER
|
||||
M: Peter Tyser <ptyser@xes-inc.com>
|
||||
S: Maintained
|
||||
F: drivers/mfd/lpc_ich.c
|
||||
F: drivers/gpio/gpio-ich.c
|
||||
F: drivers/mfd/lpc_ich.c
|
||||
|
||||
ICY I2C DRIVER
|
||||
M: Max Staudt <max@enpas.org>
|
||||
@ -16075,8 +16075,8 @@ F: Documentation/devicetree/bindings/reset/snps,axs10x-reset.txt
|
||||
SYNOPSYS CREG GPIO DRIVER
|
||||
M: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
|
||||
S: Maintained
|
||||
F: drivers/gpio/gpio-creg-snps.c
|
||||
F: Documentation/devicetree/bindings/gpio/snps,creg-gpio.txt
|
||||
F: drivers/gpio/gpio-creg-snps.c
|
||||
|
||||
SYNOPSYS DESIGNWARE 8250 UART DRIVER
|
||||
R: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
|
||||
@ -16087,8 +16087,8 @@ SYNOPSYS DESIGNWARE APB GPIO DRIVER
|
||||
M: Hoan Tran <hoan@os.amperecomputing.com>
|
||||
L: linux-gpio@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/gpio/gpio-dwapb.c
|
||||
F: Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt
|
||||
F: drivers/gpio/gpio-dwapb.c
|
||||
|
||||
SYNOPSYS DESIGNWARE AXI DMAC DRIVER
|
||||
M: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
|
||||
@ -18414,8 +18414,8 @@ M: Nandor Han <nandor.han@ge.com>
|
||||
M: Semi Malinen <semi.malinen@ge.com>
|
||||
L: linux-gpio@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/gpio/gpio-xra1403.c
|
||||
F: Documentation/devicetree/bindings/gpio/gpio-xra1403.txt
|
||||
F: drivers/gpio/gpio-xra1403.c
|
||||
|
||||
XTENSA XTFPGA PLATFORM SUPPORT
|
||||
M: Max Filippov <jcmvbkbc@gmail.com>
|
||||
|
2
Makefile
2
Makefile
@ -2,7 +2,7 @@
|
||||
VERSION = 5
|
||||
PATCHLEVEL = 6
|
||||
SUBLEVEL = 0
|
||||
EXTRAVERSION = -rc1
|
||||
EXTRAVERSION = -rc2
|
||||
NAME = Kleptomaniac Octopus
|
||||
|
||||
# *DOCUMENTATION*
|
||||
|
@ -78,13 +78,10 @@ static int ftrace_modify_code(unsigned long pc, unsigned long old,
|
||||
{
|
||||
unsigned long replaced;
|
||||
|
||||
if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) {
|
||||
if (IS_ENABLED(CONFIG_THUMB2_KERNEL))
|
||||
old = __opcode_to_mem_thumb32(old);
|
||||
new = __opcode_to_mem_thumb32(new);
|
||||
} else {
|
||||
else
|
||||
old = __opcode_to_mem_arm(old);
|
||||
new = __opcode_to_mem_arm(new);
|
||||
}
|
||||
|
||||
if (validate) {
|
||||
if (probe_kernel_read(&replaced, (void *)pc, MCOUNT_INSN_SIZE))
|
||||
|
@ -16,10 +16,10 @@ struct patch {
|
||||
unsigned int insn;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
static DEFINE_RAW_SPINLOCK(patch_lock);
|
||||
|
||||
static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
|
||||
__acquires(&patch_lock)
|
||||
{
|
||||
unsigned int uintaddr = (uintptr_t) addr;
|
||||
bool module = !core_kernel_text(uintaddr);
|
||||
@ -34,8 +34,6 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
|
||||
|
||||
if (flags)
|
||||
raw_spin_lock_irqsave(&patch_lock, *flags);
|
||||
else
|
||||
__acquire(&patch_lock);
|
||||
|
||||
set_fixmap(fixmap, page_to_phys(page));
|
||||
|
||||
@ -43,15 +41,19 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
|
||||
}
|
||||
|
||||
static void __kprobes patch_unmap(int fixmap, unsigned long *flags)
|
||||
__releases(&patch_lock)
|
||||
{
|
||||
clear_fixmap(fixmap);
|
||||
|
||||
if (flags)
|
||||
raw_spin_unlock_irqrestore(&patch_lock, *flags);
|
||||
else
|
||||
__release(&patch_lock);
|
||||
}
|
||||
#else
|
||||
static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
|
||||
{
|
||||
return addr;
|
||||
}
|
||||
static void __kprobes patch_unmap(int fixmap, unsigned long *flags) { }
|
||||
#endif
|
||||
|
||||
void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap)
|
||||
{
|
||||
@ -64,8 +66,6 @@ void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap)
|
||||
|
||||
if (remap)
|
||||
waddr = patch_map(addr, FIX_TEXT_POKE0, &flags);
|
||||
else
|
||||
__acquire(&patch_lock);
|
||||
|
||||
if (thumb2 && __opcode_is_thumb16(insn)) {
|
||||
*(u16 *)waddr = __opcode_to_mem_thumb16(insn);
|
||||
@ -102,8 +102,7 @@ void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap)
|
||||
if (waddr != addr) {
|
||||
flush_kernel_vmap_range(waddr, twopage ? size / 2 : size);
|
||||
patch_unmap(FIX_TEXT_POKE0, &flags);
|
||||
} else
|
||||
__release(&patch_lock);
|
||||
}
|
||||
|
||||
flush_icache_range((uintptr_t)(addr),
|
||||
(uintptr_t)(addr) + size);
|
||||
|
@ -33,7 +33,6 @@ static inline u32 disr_to_esr(u64 disr)
|
||||
|
||||
asmlinkage void enter_from_user_mode(void);
|
||||
void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
|
||||
void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
|
||||
void do_undefinstr(struct pt_regs *regs);
|
||||
asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr);
|
||||
void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
|
||||
@ -47,7 +46,4 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr);
|
||||
void do_cp15instr(unsigned int esr, struct pt_regs *regs);
|
||||
void do_el0_svc(struct pt_regs *regs);
|
||||
void do_el0_svc_compat(struct pt_regs *regs);
|
||||
void do_el0_ia_bp_hardening(unsigned long addr, unsigned int esr,
|
||||
struct pt_regs *regs);
|
||||
|
||||
#endif /* __ASM_EXCEPTION_H */
|
||||
|
@ -18,6 +18,10 @@
|
||||
* See:
|
||||
* https://lore.kernel.org/lkml/20200110100612.GC2827@hirez.programming.kicks-ass.net
|
||||
*/
|
||||
#define vcpu_is_preempted(cpu) false
|
||||
#define vcpu_is_preempted vcpu_is_preempted
|
||||
static inline bool vcpu_is_preempted(int cpu)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* __ASM_SPINLOCK_H */
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <linux/sched.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <asm/archrandom.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/kernel-pgtable.h>
|
||||
|
@ -466,6 +466,13 @@ static void ssbs_thread_switch(struct task_struct *next)
|
||||
if (unlikely(next->flags & PF_KTHREAD))
|
||||
return;
|
||||
|
||||
/*
|
||||
* If all CPUs implement the SSBS extension, then we just need to
|
||||
* context-switch the PSTATE field.
|
||||
*/
|
||||
if (cpu_have_feature(cpu_feature(SSBS)))
|
||||
return;
|
||||
|
||||
/* If the mitigation is enabled, then we leave SSBS clear. */
|
||||
if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) ||
|
||||
test_tsk_thread_flag(next, TIF_SSBD))
|
||||
@ -608,8 +615,6 @@ long get_tagged_addr_ctrl(void)
|
||||
* only prevents the tagged address ABI enabling via prctl() and does not
|
||||
* disable it for tasks that already opted in to the relaxed ABI.
|
||||
*/
|
||||
static int zero;
|
||||
static int one = 1;
|
||||
|
||||
static struct ctl_table tagged_addr_sysctl_table[] = {
|
||||
{
|
||||
@ -618,8 +623,8 @@ static struct ctl_table tagged_addr_sysctl_table[] = {
|
||||
.data = &tagged_addr_disabled,
|
||||
.maxlen = sizeof(int),
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &one,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_ONE,
|
||||
},
|
||||
{ }
|
||||
};
|
||||
|
@ -23,7 +23,7 @@
|
||||
#include <linux/irq.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/clocksource.h>
|
||||
#include <linux/clk-provider.h>
|
||||
#include <linux/of_clk.h>
|
||||
#include <linux/acpi.h>
|
||||
|
||||
#include <clocksource/arm_arch_timer.h>
|
||||
|
@ -15,7 +15,8 @@ void uv_query_info(void)
|
||||
if (!test_facility(158))
|
||||
return;
|
||||
|
||||
if (uv_call(0, (uint64_t)&uvcb))
|
||||
/* rc==0x100 means that there is additional data we do not process */
|
||||
if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != 0x100)
|
||||
return;
|
||||
|
||||
if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) &&
|
||||
|
@ -155,7 +155,7 @@ static inline void get_tod_clock_ext(char *clk)
|
||||
|
||||
static inline unsigned long long get_tod_clock(void)
|
||||
{
|
||||
unsigned char clk[STORE_CLOCK_EXT_SIZE];
|
||||
char clk[STORE_CLOCK_EXT_SIZE];
|
||||
|
||||
get_tod_clock_ext(clk);
|
||||
return *((unsigned long long *)&clk[1]);
|
||||
|
@ -250,6 +250,7 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
|
||||
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
|
||||
[PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
|
||||
[PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
|
||||
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287,
|
||||
|
@ -4765,6 +4765,7 @@ __init int intel_pmu_init(void)
|
||||
break;
|
||||
|
||||
case INTEL_FAM6_ATOM_TREMONT_D:
|
||||
case INTEL_FAM6_ATOM_TREMONT:
|
||||
x86_pmu.late_ack = true;
|
||||
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
|
@ -40,17 +40,18 @@
|
||||
* Model specific counters:
|
||||
* MSR_CORE_C1_RES: CORE C1 Residency Counter
|
||||
* perf code: 0x00
|
||||
* Available model: SLM,AMT,GLM,CNL
|
||||
* Available model: SLM,AMT,GLM,CNL,TNT
|
||||
* Scope: Core (each processor core has a MSR)
|
||||
* MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
|
||||
* perf code: 0x01
|
||||
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM,
|
||||
* CNL,KBL,CML
|
||||
* CNL,KBL,CML,TNT
|
||||
* Scope: Core
|
||||
* MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
|
||||
* perf code: 0x02
|
||||
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
|
||||
* SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL
|
||||
* SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL,
|
||||
* TNT
|
||||
* Scope: Core
|
||||
* MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
|
||||
* perf code: 0x03
|
||||
@ -60,17 +61,18 @@
|
||||
* MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter.
|
||||
* perf code: 0x00
|
||||
* Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL,
|
||||
* KBL,CML,ICL,TGL
|
||||
* KBL,CML,ICL,TGL,TNT
|
||||
* Scope: Package (physical package)
|
||||
* MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
|
||||
* perf code: 0x01
|
||||
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
|
||||
* GLM,CNL,KBL,CML,ICL,TGL
|
||||
* GLM,CNL,KBL,CML,ICL,TGL,TNT
|
||||
* Scope: Package (physical package)
|
||||
* MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter.
|
||||
* perf code: 0x02
|
||||
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW
|
||||
* SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL
|
||||
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
|
||||
* SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL,
|
||||
* TNT
|
||||
* Scope: Package (physical package)
|
||||
* MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
|
||||
* perf code: 0x03
|
||||
@ -87,7 +89,8 @@
|
||||
* Scope: Package (physical package)
|
||||
* MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
|
||||
* perf code: 0x06
|
||||
* Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL
|
||||
* Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL,
|
||||
* TNT
|
||||
* Scope: Package (physical package)
|
||||
*
|
||||
*/
|
||||
@ -640,8 +643,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
|
||||
|
||||
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates),
|
||||
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_D, glm_cstates),
|
||||
|
||||
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates),
|
||||
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_TREMONT_D, glm_cstates),
|
||||
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_TREMONT, glm_cstates),
|
||||
|
||||
X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_L, icl_cstates),
|
||||
X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE, icl_cstates),
|
||||
|
@ -1714,6 +1714,8 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
|
||||
old = ((s64)(prev_raw_count << shift) >> shift);
|
||||
local64_add(new - old + count * period, &event->count);
|
||||
|
||||
local64_set(&hwc->period_left, -new);
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
|
||||
return 0;
|
||||
|
@ -75,8 +75,9 @@ static bool test_intel(int idx, void *data)
|
||||
|
||||
case INTEL_FAM6_ATOM_GOLDMONT:
|
||||
case INTEL_FAM6_ATOM_GOLDMONT_D:
|
||||
|
||||
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
|
||||
case INTEL_FAM6_ATOM_TREMONT_D:
|
||||
case INTEL_FAM6_ATOM_TREMONT:
|
||||
|
||||
case INTEL_FAM6_XEON_PHI_KNL:
|
||||
case INTEL_FAM6_XEON_PHI_KNM:
|
||||
|
@ -781,9 +781,19 @@ struct kvm_vcpu_arch {
|
||||
u64 msr_kvm_poll_control;
|
||||
|
||||
/*
|
||||
* Indicate whether the access faults on its page table in guest
|
||||
* which is set when fix page fault and used to detect unhandeable
|
||||
* instruction.
|
||||
* Indicates the guest is trying to write a gfn that contains one or
|
||||
* more of the PTEs used to translate the write itself, i.e. the access
|
||||
* is changing its own translation in the guest page tables. KVM exits
|
||||
* to userspace if emulation of the faulting instruction fails and this
|
||||
* flag is set, as KVM cannot make forward progress.
|
||||
*
|
||||
* If emulation fails for a write to guest page tables, KVM unprotects
|
||||
* (zaps) the shadow page for the target gfn and resumes the guest to
|
||||
* retry the non-emulatable instruction (on hardware). Unprotecting the
|
||||
* gfn doesn't allow forward progress for a self-changing access because
|
||||
* doing so also zaps the translation for the gfn, i.e. retrying the
|
||||
* instruction will hit a !PRESENT fault, which results in a new shadow
|
||||
* page and sends KVM back to square one.
|
||||
*/
|
||||
bool write_fault_to_shadow_pgtable;
|
||||
|
||||
|
@ -1080,9 +1080,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
|
||||
result = 1;
|
||||
/* assumes that there are only KVM_APIC_INIT/SIPI */
|
||||
apic->pending_events = (1UL << KVM_APIC_INIT);
|
||||
/* make sure pending_events is visible before sending
|
||||
* the request */
|
||||
smp_wmb();
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
}
|
||||
|
@ -102,6 +102,19 @@ static inline void kvm_mmu_load_cr3(struct kvm_vcpu *vcpu)
|
||||
kvm_get_active_pcid(vcpu));
|
||||
}
|
||||
|
||||
int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
|
||||
bool prefault);
|
||||
|
||||
static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
|
||||
u32 err, bool prefault)
|
||||
{
|
||||
#ifdef CONFIG_RETPOLINE
|
||||
if (likely(vcpu->arch.mmu->page_fault == kvm_tdp_page_fault))
|
||||
return kvm_tdp_page_fault(vcpu, cr2_or_gpa, err, prefault);
|
||||
#endif
|
||||
return vcpu->arch.mmu->page_fault(vcpu, cr2_or_gpa, err, prefault);
|
||||
}
|
||||
|
||||
/*
|
||||
* Currently, we have two sorts of write-protection, a) the first one
|
||||
* write-protects guest page to sync the guest modification, b) another one is
|
||||
|
@ -4219,8 +4219,8 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_handle_page_fault);
|
||||
|
||||
static int tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
|
||||
bool prefault)
|
||||
int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
|
||||
bool prefault)
|
||||
{
|
||||
int max_level;
|
||||
|
||||
@ -4925,7 +4925,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
|
||||
return;
|
||||
|
||||
context->mmu_role.as_u64 = new_role.as_u64;
|
||||
context->page_fault = tdp_page_fault;
|
||||
context->page_fault = kvm_tdp_page_fault;
|
||||
context->sync_page = nonpaging_sync_page;
|
||||
context->invlpg = nonpaging_invlpg;
|
||||
context->update_pte = nonpaging_update_pte;
|
||||
@ -5436,9 +5436,8 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
|
||||
}
|
||||
|
||||
if (r == RET_PF_INVALID) {
|
||||
r = vcpu->arch.mmu->page_fault(vcpu, cr2_or_gpa,
|
||||
lower_32_bits(error_code),
|
||||
false);
|
||||
r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa,
|
||||
lower_32_bits(error_code), false);
|
||||
WARN_ON(r == RET_PF_INVALID);
|
||||
}
|
||||
|
||||
|
@ -33,7 +33,7 @@
|
||||
#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
|
||||
#define PT_HAVE_ACCESSED_DIRTY(mmu) true
|
||||
#ifdef CONFIG_X86_64
|
||||
#define PT_MAX_FULL_LEVELS 4
|
||||
#define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
|
||||
#define CMPXCHG cmpxchg
|
||||
#else
|
||||
#define CMPXCHG cmpxchg64
|
||||
|
@ -2175,7 +2175,6 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||
u32 dummy;
|
||||
u32 eax = 1;
|
||||
|
||||
vcpu->arch.microcode_version = 0x01000065;
|
||||
svm->spec_ctrl = 0;
|
||||
svm->virt_spec_ctrl = 0;
|
||||
|
||||
@ -2266,6 +2265,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
|
||||
init_vmcb(svm);
|
||||
|
||||
svm_init_osvw(vcpu);
|
||||
vcpu->arch.microcode_version = 0x01000065;
|
||||
|
||||
return 0;
|
||||
|
||||
|
@ -544,7 +544,8 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
|
||||
}
|
||||
}
|
||||
|
||||
static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) {
|
||||
static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
|
||||
{
|
||||
int msr;
|
||||
|
||||
for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
|
||||
@ -1981,7 +1982,7 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
|
||||
/*
|
||||
* Clean fields data can't de used on VMLAUNCH and when we switch
|
||||
* Clean fields data can't be used on VMLAUNCH and when we switch
|
||||
* between different L2 guests as KVM keeps a single VMCS12 per L1.
|
||||
*/
|
||||
if (from_launch || evmcs_gpa_changed)
|
||||
@ -3575,6 +3576,33 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
|
||||
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if a debug trap is pending delivery.
|
||||
*
|
||||
* In KVM, debug traps bear an exception payload. As such, the class of a #DB
|
||||
* exception may be inferred from the presence of an exception payload.
|
||||
*/
|
||||
static inline bool vmx_pending_dbg_trap(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.exception.pending &&
|
||||
vcpu->arch.exception.nr == DB_VECTOR &&
|
||||
vcpu->arch.exception.payload;
|
||||
}
|
||||
|
||||
/*
|
||||
* Certain VM-exits set the 'pending debug exceptions' field to indicate a
|
||||
* recognized #DB (data or single-step) that has yet to be delivered. Since KVM
|
||||
* represents these debug traps with a payload that is said to be compatible
|
||||
* with the 'pending debug exceptions' field, write the payload to the VMCS
|
||||
* field if a VM-exit is delivered before the debug trap.
|
||||
*/
|
||||
static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (vmx_pending_dbg_trap(vcpu))
|
||||
vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
|
||||
vcpu->arch.exception.payload);
|
||||
}
|
||||
|
||||
static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
@ -3587,6 +3615,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
|
||||
test_bit(KVM_APIC_INIT, &apic->pending_events)) {
|
||||
if (block_nested_events)
|
||||
return -EBUSY;
|
||||
nested_vmx_update_pending_dbg(vcpu);
|
||||
clear_bit(KVM_APIC_INIT, &apic->pending_events);
|
||||
nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
|
||||
return 0;
|
||||
|
@ -2947,6 +2947,9 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
|
||||
|
||||
static int get_ept_level(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* Nested EPT currently only supports 4-level walks. */
|
||||
if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu)))
|
||||
return 4;
|
||||
if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
|
||||
return 5;
|
||||
return 4;
|
||||
@ -4238,7 +4241,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||
|
||||
vmx->msr_ia32_umwait_control = 0;
|
||||
|
||||
vcpu->arch.microcode_version = 0x100000000ULL;
|
||||
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
|
||||
vmx->hv_deadline_tsc = -1;
|
||||
kvm_set_cr8(vcpu, 0);
|
||||
@ -6763,6 +6765,7 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
|
||||
vmx->nested.posted_intr_nv = -1;
|
||||
vmx->nested.current_vmptr = -1ull;
|
||||
|
||||
vcpu->arch.microcode_version = 0x100000000ULL;
|
||||
vmx->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED;
|
||||
|
||||
/*
|
||||
|
@ -438,6 +438,14 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
|
||||
* for #DB exceptions under VMX.
|
||||
*/
|
||||
vcpu->arch.dr6 ^= payload & DR6_RTM;
|
||||
|
||||
/*
|
||||
* The #DB payload is defined as compatible with the 'pending
|
||||
* debug exceptions' field under VMX, not DR6. While bit 12 is
|
||||
* defined in the 'pending debug exceptions' field (enabled
|
||||
* breakpoint), it is reserved and must be zero in DR6.
|
||||
*/
|
||||
vcpu->arch.dr6 &= ~BIT(12);
|
||||
break;
|
||||
case PF_VECTOR:
|
||||
vcpu->arch.cr2 = payload;
|
||||
@ -490,19 +498,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
|
||||
vcpu->arch.exception.error_code = error_code;
|
||||
vcpu->arch.exception.has_payload = has_payload;
|
||||
vcpu->arch.exception.payload = payload;
|
||||
/*
|
||||
* In guest mode, payload delivery should be deferred,
|
||||
* so that the L1 hypervisor can intercept #PF before
|
||||
* CR2 is modified (or intercept #DB before DR6 is
|
||||
* modified under nVMX). However, for ABI
|
||||
* compatibility with KVM_GET_VCPU_EVENTS and
|
||||
* KVM_SET_VCPU_EVENTS, we can't delay payload
|
||||
* delivery unless userspace has enabled this
|
||||
* functionality via the per-VM capability,
|
||||
* KVM_CAP_EXCEPTION_PAYLOAD.
|
||||
*/
|
||||
if (!vcpu->kvm->arch.exception_payload_enabled ||
|
||||
!is_guest_mode(vcpu))
|
||||
if (!is_guest_mode(vcpu))
|
||||
kvm_deliver_exception_payload(vcpu);
|
||||
return;
|
||||
}
|
||||
@ -2448,7 +2444,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
|
||||
vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
|
||||
vcpu->last_guest_tsc = tsc_timestamp;
|
||||
WARN_ON(vcpu->hv_clock.system_time < 0);
|
||||
WARN_ON((s64)vcpu->hv_clock.system_time < 0);
|
||||
|
||||
/* If the host uses TSC clocksource, then it is stable */
|
||||
pvclock_flags = 0;
|
||||
@ -3795,6 +3791,21 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
|
||||
{
|
||||
process_nmi(vcpu);
|
||||
|
||||
/*
|
||||
* In guest mode, payload delivery should be deferred,
|
||||
* so that the L1 hypervisor can intercept #PF before
|
||||
* CR2 is modified (or intercept #DB before DR6 is
|
||||
* modified under nVMX). Unless the per-VM capability,
|
||||
* KVM_CAP_EXCEPTION_PAYLOAD, is set, we may not defer the delivery of
|
||||
* an exception payload and handle after a KVM_GET_VCPU_EVENTS. Since we
|
||||
* opportunistically defer the exception payload, deliver it if the
|
||||
* capability hasn't been requested before processing a
|
||||
* KVM_GET_VCPU_EVENTS.
|
||||
*/
|
||||
if (!vcpu->kvm->arch.exception_payload_enabled &&
|
||||
vcpu->arch.exception.pending && vcpu->arch.exception.has_payload)
|
||||
kvm_deliver_exception_payload(vcpu);
|
||||
|
||||
/*
|
||||
* The API doesn't provide the instruction length for software
|
||||
* exceptions, so don't report them. As long as the guest RIP
|
||||
@ -8942,7 +8953,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
|
||||
|
||||
kvm_rip_write(vcpu, ctxt->eip);
|
||||
kvm_set_rflags(vcpu, ctxt->eflags);
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
return 1;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_task_switch);
|
||||
@ -10182,7 +10192,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
|
||||
work->arch.cr3 != vcpu->arch.mmu->get_cr3(vcpu))
|
||||
return;
|
||||
|
||||
vcpu->arch.mmu->page_fault(vcpu, work->cr2_or_gpa, 0, true);
|
||||
kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
|
||||
}
|
||||
|
||||
static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
|
||||
|
@ -136,8 +136,6 @@ config CRYPTO_USER
|
||||
Userspace configuration for cryptographic instantiations such as
|
||||
cbc(aes).
|
||||
|
||||
if CRYPTO_MANAGER2
|
||||
|
||||
config CRYPTO_MANAGER_DISABLE_TESTS
|
||||
bool "Disable run-time self tests"
|
||||
default y
|
||||
@ -155,8 +153,6 @@ config CRYPTO_MANAGER_EXTRA_TESTS
|
||||
This is intended for developer use only, as these tests take much
|
||||
longer to run than the normal self tests.
|
||||
|
||||
endif # if CRYPTO_MANAGER2
|
||||
|
||||
config CRYPTO_GF128MUL
|
||||
tristate
|
||||
|
||||
|
@ -4436,6 +4436,15 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.cipher = __VECS(tf_cbc_tv_template)
|
||||
},
|
||||
}, {
|
||||
#if IS_ENABLED(CONFIG_CRYPTO_PAES_S390)
|
||||
.alg = "cbc-paes-s390",
|
||||
.fips_allowed = 1,
|
||||
.test = alg_test_skcipher,
|
||||
.suite = {
|
||||
.cipher = __VECS(aes_cbc_tv_template)
|
||||
}
|
||||
}, {
|
||||
#endif
|
||||
.alg = "cbcmac(aes)",
|
||||
.fips_allowed = 1,
|
||||
.test = alg_test_hash,
|
||||
@ -4587,6 +4596,15 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.cipher = __VECS(tf_ctr_tv_template)
|
||||
}
|
||||
}, {
|
||||
#if IS_ENABLED(CONFIG_CRYPTO_PAES_S390)
|
||||
.alg = "ctr-paes-s390",
|
||||
.fips_allowed = 1,
|
||||
.test = alg_test_skcipher,
|
||||
.suite = {
|
||||
.cipher = __VECS(aes_ctr_tv_template)
|
||||
}
|
||||
}, {
|
||||
#endif
|
||||
.alg = "cts(cbc(aes))",
|
||||
.test = alg_test_skcipher,
|
||||
.fips_allowed = 1,
|
||||
@ -4879,6 +4897,15 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.cipher = __VECS(xtea_tv_template)
|
||||
}
|
||||
}, {
|
||||
#if IS_ENABLED(CONFIG_CRYPTO_PAES_S390)
|
||||
.alg = "ecb-paes-s390",
|
||||
.fips_allowed = 1,
|
||||
.test = alg_test_skcipher,
|
||||
.suite = {
|
||||
.cipher = __VECS(aes_tv_template)
|
||||
}
|
||||
}, {
|
||||
#endif
|
||||
.alg = "ecdh",
|
||||
.test = alg_test_kpp,
|
||||
.fips_allowed = 1,
|
||||
@ -5465,6 +5492,15 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.cipher = __VECS(tf_xts_tv_template)
|
||||
}
|
||||
}, {
|
||||
#if IS_ENABLED(CONFIG_CRYPTO_PAES_S390)
|
||||
.alg = "xts-paes-s390",
|
||||
.fips_allowed = 1,
|
||||
.test = alg_test_skcipher,
|
||||
.suite = {
|
||||
.cipher = __VECS(aes_xts_tv_template)
|
||||
}
|
||||
}, {
|
||||
#endif
|
||||
.alg = "xts4096(paes)",
|
||||
.test = alg_test_null,
|
||||
.fips_allowed = 1,
|
||||
|
@ -101,6 +101,8 @@ acpi_status acpi_hw_enable_all_runtime_gpes(void);
|
||||
|
||||
acpi_status acpi_hw_enable_all_wakeup_gpes(void);
|
||||
|
||||
u8 acpi_hw_check_all_gpes(void);
|
||||
|
||||
acpi_status
|
||||
acpi_hw_enable_runtime_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
|
||||
struct acpi_gpe_block_info *gpe_block,
|
||||
|
@ -795,6 +795,38 @@ acpi_status acpi_enable_all_wakeup_gpes(void)
|
||||
|
||||
ACPI_EXPORT_SYMBOL(acpi_enable_all_wakeup_gpes)
|
||||
|
||||
/******************************************************************************
|
||||
*
|
||||
* FUNCTION: acpi_any_gpe_status_set
|
||||
*
|
||||
* PARAMETERS: None
|
||||
*
|
||||
* RETURN: Whether or not the status bit is set for any GPE
|
||||
*
|
||||
* DESCRIPTION: Check the status bits of all enabled GPEs and return TRUE if any
|
||||
* of them is set or FALSE otherwise.
|
||||
*
|
||||
******************************************************************************/
|
||||
u32 acpi_any_gpe_status_set(void)
|
||||
{
|
||||
acpi_status status;
|
||||
u8 ret;
|
||||
|
||||
ACPI_FUNCTION_TRACE(acpi_any_gpe_status_set);
|
||||
|
||||
status = acpi_ut_acquire_mutex(ACPI_MTX_EVENTS);
|
||||
if (ACPI_FAILURE(status)) {
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
ret = acpi_hw_check_all_gpes();
|
||||
(void)acpi_ut_release_mutex(ACPI_MTX_EVENTS);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
ACPI_EXPORT_SYMBOL(acpi_any_gpe_status_set)
|
||||
|
||||
/*******************************************************************************
|
||||
*
|
||||
* FUNCTION: acpi_install_gpe_block
|
||||
|
@ -444,6 +444,53 @@ acpi_hw_enable_wakeup_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
|
||||
return (AE_OK);
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
*
|
||||
* FUNCTION: acpi_hw_get_gpe_block_status
|
||||
*
|
||||
* PARAMETERS: gpe_xrupt_info - GPE Interrupt info
|
||||
* gpe_block - Gpe Block info
|
||||
*
|
||||
* RETURN: Success
|
||||
*
|
||||
* DESCRIPTION: Produce a combined GPE status bits mask for the given block.
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
static acpi_status
|
||||
acpi_hw_get_gpe_block_status(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
|
||||
struct acpi_gpe_block_info *gpe_block,
|
||||
void *ret_ptr)
|
||||
{
|
||||
struct acpi_gpe_register_info *gpe_register_info;
|
||||
u64 in_enable, in_status;
|
||||
acpi_status status;
|
||||
u8 *ret = ret_ptr;
|
||||
u32 i;
|
||||
|
||||
/* Examine each GPE Register within the block */
|
||||
|
||||
for (i = 0; i < gpe_block->register_count; i++) {
|
||||
gpe_register_info = &gpe_block->register_info[i];
|
||||
|
||||
status = acpi_hw_read(&in_enable,
|
||||
&gpe_register_info->enable_address);
|
||||
if (ACPI_FAILURE(status)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
status = acpi_hw_read(&in_status,
|
||||
&gpe_register_info->status_address);
|
||||
if (ACPI_FAILURE(status)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
*ret |= in_enable & in_status;
|
||||
}
|
||||
|
||||
return (AE_OK);
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
*
|
||||
* FUNCTION: acpi_hw_disable_all_gpes
|
||||
@ -510,4 +557,28 @@ acpi_status acpi_hw_enable_all_wakeup_gpes(void)
|
||||
return_ACPI_STATUS(status);
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
*
|
||||
* FUNCTION: acpi_hw_check_all_gpes
|
||||
*
|
||||
* PARAMETERS: None
|
||||
*
|
||||
* RETURN: Combined status of all GPEs
|
||||
*
|
||||
* DESCRIPTION: Check all enabled GPEs in all GPE blocks and return TRUE if the
|
||||
* status bit is set for at least one of them of FALSE otherwise.
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
u8 acpi_hw_check_all_gpes(void)
|
||||
{
|
||||
u8 ret = 0;
|
||||
|
||||
ACPI_FUNCTION_TRACE(acpi_hw_check_all_gpes);
|
||||
|
||||
(void)acpi_ev_walk_gpe_list(acpi_hw_get_gpe_block_status, &ret);
|
||||
|
||||
return (ret != 0);
|
||||
}
|
||||
|
||||
#endif /* !ACPI_REDUCED_HARDWARE */
|
||||
|
@ -179,6 +179,7 @@ EXPORT_SYMBOL(first_ec);
|
||||
|
||||
static struct acpi_ec *boot_ec;
|
||||
static bool boot_ec_is_ecdt = false;
|
||||
static struct workqueue_struct *ec_wq;
|
||||
static struct workqueue_struct *ec_query_wq;
|
||||
|
||||
static int EC_FLAGS_QUERY_HANDSHAKE; /* Needs QR_EC issued when SCI_EVT set */
|
||||
@ -469,7 +470,7 @@ static void acpi_ec_submit_query(struct acpi_ec *ec)
|
||||
ec_dbg_evt("Command(%s) submitted/blocked",
|
||||
acpi_ec_cmd_string(ACPI_EC_COMMAND_QUERY));
|
||||
ec->nr_pending_queries++;
|
||||
schedule_work(&ec->work);
|
||||
queue_work(ec_wq, &ec->work);
|
||||
}
|
||||
}
|
||||
|
||||
@ -535,7 +536,7 @@ static void acpi_ec_enable_event(struct acpi_ec *ec)
|
||||
#ifdef CONFIG_PM_SLEEP
|
||||
static void __acpi_ec_flush_work(void)
|
||||
{
|
||||
flush_scheduled_work(); /* flush ec->work */
|
||||
drain_workqueue(ec_wq); /* flush ec->work */
|
||||
flush_workqueue(ec_query_wq); /* flush queries */
|
||||
}
|
||||
|
||||
@ -556,8 +557,8 @@ static void acpi_ec_disable_event(struct acpi_ec *ec)
|
||||
|
||||
void acpi_ec_flush_work(void)
|
||||
{
|
||||
/* Without ec_query_wq there is nothing to flush. */
|
||||
if (!ec_query_wq)
|
||||
/* Without ec_wq there is nothing to flush. */
|
||||
if (!ec_wq)
|
||||
return;
|
||||
|
||||
__acpi_ec_flush_work();
|
||||
@ -2107,25 +2108,33 @@ static struct acpi_driver acpi_ec_driver = {
|
||||
.drv.pm = &acpi_ec_pm,
|
||||
};
|
||||
|
||||
static inline int acpi_ec_query_init(void)
|
||||
static void acpi_ec_destroy_workqueues(void)
|
||||
{
|
||||
if (!ec_query_wq) {
|
||||
ec_query_wq = alloc_workqueue("kec_query", 0,
|
||||
ec_max_queries);
|
||||
if (!ec_query_wq)
|
||||
return -ENODEV;
|
||||
if (ec_wq) {
|
||||
destroy_workqueue(ec_wq);
|
||||
ec_wq = NULL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void acpi_ec_query_exit(void)
|
||||
{
|
||||
if (ec_query_wq) {
|
||||
destroy_workqueue(ec_query_wq);
|
||||
ec_query_wq = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static int acpi_ec_init_workqueues(void)
|
||||
{
|
||||
if (!ec_wq)
|
||||
ec_wq = alloc_ordered_workqueue("kec", 0);
|
||||
|
||||
if (!ec_query_wq)
|
||||
ec_query_wq = alloc_workqueue("kec_query", 0, ec_max_queries);
|
||||
|
||||
if (!ec_wq || !ec_query_wq) {
|
||||
acpi_ec_destroy_workqueues();
|
||||
return -ENODEV;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct dmi_system_id acpi_ec_no_wakeup[] = {
|
||||
{
|
||||
.ident = "Thinkpad X1 Carbon 6th",
|
||||
@ -2156,8 +2165,7 @@ int __init acpi_ec_init(void)
|
||||
int result;
|
||||
int ecdt_fail, dsdt_fail;
|
||||
|
||||
/* register workqueue for _Qxx evaluations */
|
||||
result = acpi_ec_query_init();
|
||||
result = acpi_ec_init_workqueues();
|
||||
if (result)
|
||||
return result;
|
||||
|
||||
@ -2188,6 +2196,6 @@ static void __exit acpi_ec_exit(void)
|
||||
{
|
||||
|
||||
acpi_bus_unregister_driver(&acpi_ec_driver);
|
||||
acpi_ec_query_exit();
|
||||
acpi_ec_destroy_workqueues();
|
||||
}
|
||||
#endif /* 0 */
|
||||
|
@ -990,21 +990,34 @@ static void acpi_s2idle_sync(void)
|
||||
acpi_os_wait_events_complete(); /* synchronize Notify handling */
|
||||
}
|
||||
|
||||
static void acpi_s2idle_wake(void)
|
||||
static bool acpi_s2idle_wake(void)
|
||||
{
|
||||
/*
|
||||
* If IRQD_WAKEUP_ARMED is set for the SCI at this point, the SCI has
|
||||
* not triggered while suspended, so bail out.
|
||||
*/
|
||||
if (!acpi_sci_irq_valid() ||
|
||||
irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq)))
|
||||
return;
|
||||
if (!acpi_sci_irq_valid())
|
||||
return pm_wakeup_pending();
|
||||
|
||||
while (pm_wakeup_pending()) {
|
||||
/*
|
||||
* If IRQD_WAKEUP_ARMED is set for the SCI at this point, the
|
||||
* SCI has not triggered while suspended, so bail out (the
|
||||
* wakeup is pending anyway and the SCI is not the source of
|
||||
* it).
|
||||
*/
|
||||
if (irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq)))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* If there are no EC events to process and at least one of the
|
||||
* other enabled GPEs is active, the wakeup is regarded as a
|
||||
* genuine one.
|
||||
*
|
||||
* Note that the checks below must be carried out in this order
|
||||
* to avoid returning prematurely due to a change of the EC GPE
|
||||
* status bit from unset to set between the checks with the
|
||||
* status bits of all the other GPEs unset.
|
||||
*/
|
||||
if (acpi_any_gpe_status_set() && !acpi_ec_dispatch_gpe())
|
||||
return true;
|
||||
|
||||
/*
|
||||
* If there are EC events to process, the wakeup may be a spurious one
|
||||
* coming from the EC.
|
||||
*/
|
||||
if (acpi_ec_dispatch_gpe()) {
|
||||
/*
|
||||
* Cancel the wakeup and process all pending events in case
|
||||
* there are any wakeup ones in there.
|
||||
@ -1017,8 +1030,19 @@ static void acpi_s2idle_wake(void)
|
||||
|
||||
acpi_s2idle_sync();
|
||||
|
||||
/*
|
||||
* The SCI is in the "suspended" state now and it cannot produce
|
||||
* new wakeup events till the rearming below, so if any of them
|
||||
* are pending here, they must be resulting from the processing
|
||||
* of EC events above or coming from somewhere else.
|
||||
*/
|
||||
if (pm_wakeup_pending())
|
||||
return true;
|
||||
|
||||
rearm_wake_irq(acpi_sci_irq);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void acpi_s2idle_restore_early(void)
|
||||
|
@ -19,7 +19,7 @@
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/wait.h>
|
||||
|
||||
#define MAX_MSG_LEN 128
|
||||
#define MAX_MSG_LEN 240
|
||||
#define IPMB_REQUEST_LEN_MIN 7
|
||||
#define NETFN_RSP_BIT_MASK 0x4
|
||||
#define REQUEST_QUEUE_MAX_LEN 256
|
||||
@ -63,6 +63,7 @@ struct ipmb_dev {
|
||||
spinlock_t lock;
|
||||
wait_queue_head_t wait_queue;
|
||||
struct mutex file_mutex;
|
||||
bool is_i2c_protocol;
|
||||
};
|
||||
|
||||
static inline struct ipmb_dev *to_ipmb_dev(struct file *file)
|
||||
@ -112,6 +113,25 @@ static ssize_t ipmb_read(struct file *file, char __user *buf, size_t count,
|
||||
return ret < 0 ? ret : count;
|
||||
}
|
||||
|
||||
static int ipmb_i2c_write(struct i2c_client *client, u8 *msg, u8 addr)
|
||||
{
|
||||
struct i2c_msg i2c_msg;
|
||||
|
||||
/*
|
||||
* subtract 1 byte (rq_sa) from the length of the msg passed to
|
||||
* raw i2c_transfer
|
||||
*/
|
||||
i2c_msg.len = msg[IPMB_MSG_LEN_IDX] - 1;
|
||||
|
||||
/* Assign message to buffer except first 2 bytes (length and address) */
|
||||
i2c_msg.buf = msg + 2;
|
||||
|
||||
i2c_msg.addr = addr;
|
||||
i2c_msg.flags = client->flags & I2C_CLIENT_PEC;
|
||||
|
||||
return i2c_transfer(client->adapter, &i2c_msg, 1);
|
||||
}
|
||||
|
||||
static ssize_t ipmb_write(struct file *file, const char __user *buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
@ -133,6 +153,12 @@ static ssize_t ipmb_write(struct file *file, const char __user *buf,
|
||||
rq_sa = GET_7BIT_ADDR(msg[RQ_SA_8BIT_IDX]);
|
||||
netf_rq_lun = msg[NETFN_LUN_IDX];
|
||||
|
||||
/* Check i2c block transfer vs smbus */
|
||||
if (ipmb_dev->is_i2c_protocol) {
|
||||
ret = ipmb_i2c_write(ipmb_dev->client, msg, rq_sa);
|
||||
return (ret == 1) ? count : ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* subtract rq_sa and netf_rq_lun from the length of the msg passed to
|
||||
* i2c_smbus_xfer
|
||||
@ -253,7 +279,7 @@ static int ipmb_slave_cb(struct i2c_client *client,
|
||||
break;
|
||||
|
||||
case I2C_SLAVE_WRITE_RECEIVED:
|
||||
if (ipmb_dev->msg_idx >= sizeof(struct ipmb_msg))
|
||||
if (ipmb_dev->msg_idx >= sizeof(struct ipmb_msg) - 1)
|
||||
break;
|
||||
|
||||
buf[++ipmb_dev->msg_idx] = *val;
|
||||
@ -302,6 +328,9 @@ static int ipmb_probe(struct i2c_client *client,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ipmb_dev->is_i2c_protocol
|
||||
= device_property_read_bool(&client->dev, "i2c-protocol");
|
||||
|
||||
ipmb_dev->client = client;
|
||||
i2c_set_clientdata(client, ipmb_dev);
|
||||
ret = i2c_slave_register(client, ipmb_slave_cb);
|
||||
|
@ -775,10 +775,14 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
|
||||
flags = ipmi_ssif_lock_cond(ssif_info, &oflags);
|
||||
msg = ssif_info->curr_msg;
|
||||
if (msg) {
|
||||
if (data) {
|
||||
if (len > IPMI_MAX_MSG_LENGTH)
|
||||
len = IPMI_MAX_MSG_LENGTH;
|
||||
memcpy(msg->rsp, data, len);
|
||||
} else {
|
||||
len = 0;
|
||||
}
|
||||
msg->rsp_size = len;
|
||||
if (msg->rsp_size > IPMI_MAX_MSG_LENGTH)
|
||||
msg->rsp_size = IPMI_MAX_MSG_LENGTH;
|
||||
memcpy(msg->rsp, data, msg->rsp_size);
|
||||
ssif_info->curr_msg = NULL;
|
||||
}
|
||||
|
||||
|
@ -105,6 +105,8 @@ bool have_governor_per_policy(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(have_governor_per_policy);
|
||||
|
||||
static struct kobject *cpufreq_global_kobject;
|
||||
|
||||
struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy)
|
||||
{
|
||||
if (have_governor_per_policy())
|
||||
@ -2745,9 +2747,6 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
|
||||
|
||||
struct kobject *cpufreq_global_kobject;
|
||||
EXPORT_SYMBOL(cpufreq_global_kobject);
|
||||
|
||||
static int __init cpufreq_core_init(void)
|
||||
{
|
||||
if (cpufreq_disabled())
|
||||
|
@ -61,7 +61,7 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
|
||||
{
|
||||
if (!blk_queue_dax(bdev->bd_queue))
|
||||
return NULL;
|
||||
return fs_dax_get_by_host(bdev->bd_disk->disk_name);
|
||||
return dax_get_by_host(bdev->bd_disk->disk_name);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
|
||||
#endif
|
||||
|
@ -505,16 +505,10 @@ void edac_mc_free(struct mem_ctl_info *mci)
|
||||
{
|
||||
edac_dbg(1, "\n");
|
||||
|
||||
/* If we're not yet registered with sysfs free only what was allocated
|
||||
* in edac_mc_alloc().
|
||||
*/
|
||||
if (!device_is_registered(&mci->dev)) {
|
||||
_edac_mc_free(mci);
|
||||
return;
|
||||
}
|
||||
if (device_is_registered(&mci->dev))
|
||||
edac_unregister_sysfs(mci);
|
||||
|
||||
/* the mci instance is freed here, when the sysfs object is dropped */
|
||||
edac_unregister_sysfs(mci);
|
||||
_edac_mc_free(mci);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(edac_mc_free);
|
||||
|
||||
|
@ -276,10 +276,7 @@ static const struct attribute_group *csrow_attr_groups[] = {
|
||||
|
||||
static void csrow_attr_release(struct device *dev)
|
||||
{
|
||||
struct csrow_info *csrow = container_of(dev, struct csrow_info, dev);
|
||||
|
||||
edac_dbg(1, "device %s released\n", dev_name(dev));
|
||||
kfree(csrow);
|
||||
/* release device with _edac_mc_free() */
|
||||
}
|
||||
|
||||
static const struct device_type csrow_attr_type = {
|
||||
@ -447,8 +444,7 @@ error:
|
||||
csrow = mci->csrows[i];
|
||||
if (!nr_pages_per_csrow(csrow))
|
||||
continue;
|
||||
|
||||
device_del(&mci->csrows[i]->dev);
|
||||
device_unregister(&mci->csrows[i]->dev);
|
||||
}
|
||||
|
||||
return err;
|
||||
@ -608,10 +604,7 @@ static const struct attribute_group *dimm_attr_groups[] = {
|
||||
|
||||
static void dimm_attr_release(struct device *dev)
|
||||
{
|
||||
struct dimm_info *dimm = container_of(dev, struct dimm_info, dev);
|
||||
|
||||
edac_dbg(1, "device %s released\n", dev_name(dev));
|
||||
kfree(dimm);
|
||||
/* release device with _edac_mc_free() */
|
||||
}
|
||||
|
||||
static const struct device_type dimm_attr_type = {
|
||||
@ -893,10 +886,7 @@ static const struct attribute_group *mci_attr_groups[] = {
|
||||
|
||||
static void mci_attr_release(struct device *dev)
|
||||
{
|
||||
struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev);
|
||||
|
||||
edac_dbg(1, "device %s released\n", dev_name(dev));
|
||||
kfree(mci);
|
||||
/* release device with _edac_mc_free() */
|
||||
}
|
||||
|
||||
static const struct device_type mci_attr_type = {
|
||||
|
@ -10,16 +10,6 @@
|
||||
#define GPIO_OUT_REG(off) (BD71828_REG_GPIO_CTRL1 + (off))
|
||||
#define HALL_GPIO_OFFSET 3
|
||||
|
||||
/*
|
||||
* These defines can be removed when
|
||||
* "gpio: Add definition for GPIO direction"
|
||||
* (9208b1e77d6e8e9776f34f46ef4079ecac9c3c25 in GPIO tree) gets merged,
|
||||
*/
|
||||
#ifndef GPIO_LINE_DIRECTION_IN
|
||||
#define GPIO_LINE_DIRECTION_IN 1
|
||||
#define GPIO_LINE_DIRECTION_OUT 0
|
||||
#endif
|
||||
|
||||
struct bd71828_gpio {
|
||||
struct rohm_regmap_dev chip;
|
||||
struct gpio_chip gpio;
|
||||
|
@ -35,7 +35,7 @@ struct sifive_gpio {
|
||||
void __iomem *base;
|
||||
struct gpio_chip gc;
|
||||
struct regmap *regs;
|
||||
u32 irq_state;
|
||||
unsigned long irq_state;
|
||||
unsigned int trigger[SIFIVE_GPIO_MAX];
|
||||
unsigned int irq_parent[SIFIVE_GPIO_MAX];
|
||||
};
|
||||
@ -94,7 +94,7 @@ static void sifive_gpio_irq_enable(struct irq_data *d)
|
||||
spin_unlock_irqrestore(&gc->bgpio_lock, flags);
|
||||
|
||||
/* Enable interrupts */
|
||||
assign_bit(offset, (unsigned long *)&chip->irq_state, 1);
|
||||
assign_bit(offset, &chip->irq_state, 1);
|
||||
sifive_gpio_set_ie(chip, offset);
|
||||
}
|
||||
|
||||
@ -104,7 +104,7 @@ static void sifive_gpio_irq_disable(struct irq_data *d)
|
||||
struct sifive_gpio *chip = gpiochip_get_data(gc);
|
||||
int offset = irqd_to_hwirq(d) % SIFIVE_GPIO_MAX;
|
||||
|
||||
assign_bit(offset, (unsigned long *)&chip->irq_state, 0);
|
||||
assign_bit(offset, &chip->irq_state, 0);
|
||||
sifive_gpio_set_ie(chip, offset);
|
||||
irq_chip_disable_parent(d);
|
||||
}
|
||||
|
@ -147,9 +147,10 @@ static void xgpio_set_multiple(struct gpio_chip *gc, unsigned long *mask,
|
||||
for (i = 0; i < gc->ngpio; i++) {
|
||||
if (*mask == 0)
|
||||
break;
|
||||
/* Once finished with an index write it out to the register */
|
||||
if (index != xgpio_index(chip, i)) {
|
||||
xgpio_writereg(chip->regs + XGPIO_DATA_OFFSET +
|
||||
xgpio_regoffset(chip, i),
|
||||
index * XGPIO_CHANNEL_OFFSET,
|
||||
chip->gpio_state[index]);
|
||||
spin_unlock_irqrestore(&chip->gpio_lock[index], flags);
|
||||
index = xgpio_index(chip, i);
|
||||
@ -165,7 +166,7 @@ static void xgpio_set_multiple(struct gpio_chip *gc, unsigned long *mask,
|
||||
}
|
||||
|
||||
xgpio_writereg(chip->regs + XGPIO_DATA_OFFSET +
|
||||
xgpio_regoffset(chip, i), chip->gpio_state[index]);
|
||||
index * XGPIO_CHANNEL_OFFSET, chip->gpio_state[index]);
|
||||
|
||||
spin_unlock_irqrestore(&chip->gpio_lock[index], flags);
|
||||
}
|
||||
|
@ -3035,13 +3035,33 @@ EXPORT_SYMBOL_GPL(gpiochip_free_own_desc);
|
||||
* rely on gpio_request() having been called beforehand.
|
||||
*/
|
||||
|
||||
static int gpio_set_config(struct gpio_chip *gc, unsigned int offset,
|
||||
enum pin_config_param mode)
|
||||
static int gpio_do_set_config(struct gpio_chip *gc, unsigned int offset,
|
||||
unsigned long config)
|
||||
{
|
||||
if (!gc->set_config)
|
||||
return -ENOTSUPP;
|
||||
|
||||
return gc->set_config(gc, offset, mode);
|
||||
return gc->set_config(gc, offset, config);
|
||||
}
|
||||
|
||||
static int gpio_set_config(struct gpio_chip *gc, unsigned int offset,
|
||||
enum pin_config_param mode)
|
||||
{
|
||||
unsigned long config;
|
||||
unsigned arg;
|
||||
|
||||
switch (mode) {
|
||||
case PIN_CONFIG_BIAS_PULL_DOWN:
|
||||
case PIN_CONFIG_BIAS_PULL_UP:
|
||||
arg = 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
arg = 0;
|
||||
}
|
||||
|
||||
config = PIN_CONF_PACKED(mode, arg);
|
||||
return gpio_do_set_config(gc, offset, config);
|
||||
}
|
||||
|
||||
static int gpio_set_bias(struct gpio_chip *chip, struct gpio_desc *desc)
|
||||
@ -3277,7 +3297,7 @@ int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce)
|
||||
chip = desc->gdev->chip;
|
||||
|
||||
config = pinconf_to_config_packed(PIN_CONFIG_INPUT_DEBOUNCE, debounce);
|
||||
return gpio_set_config(chip, gpio_chip_hwgpio(desc), config);
|
||||
return gpio_do_set_config(chip, gpio_chip_hwgpio(desc), config);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gpiod_set_debounce);
|
||||
|
||||
@ -3311,7 +3331,7 @@ int gpiod_set_transitory(struct gpio_desc *desc, bool transitory)
|
||||
packed = pinconf_to_config_packed(PIN_CONFIG_PERSIST_STATE,
|
||||
!transitory);
|
||||
gpio = gpio_chip_hwgpio(desc);
|
||||
rc = gpio_set_config(chip, gpio, packed);
|
||||
rc = gpio_do_set_config(chip, gpio, packed);
|
||||
if (rc == -ENOTSUPP) {
|
||||
dev_dbg(&desc->gdev->dev, "Persistence not supported for GPIO %d\n",
|
||||
gpio);
|
||||
|
@ -52,7 +52,7 @@ static int amdgpu_perf_event_init(struct perf_event *event)
|
||||
return -ENOENT;
|
||||
|
||||
/* update the hw_perf_event struct with config data */
|
||||
hwc->conf = event->attr.config;
|
||||
hwc->config = event->attr.config;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -74,9 +74,9 @@ static void amdgpu_perf_start(struct perf_event *event, int flags)
|
||||
switch (pe->pmu_perf_type) {
|
||||
case PERF_TYPE_AMDGPU_DF:
|
||||
if (!(flags & PERF_EF_RELOAD))
|
||||
pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 1);
|
||||
pe->adev->df.funcs->pmc_start(pe->adev, hwc->config, 1);
|
||||
|
||||
pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 0);
|
||||
pe->adev->df.funcs->pmc_start(pe->adev, hwc->config, 0);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@ -101,7 +101,7 @@ static void amdgpu_perf_read(struct perf_event *event)
|
||||
|
||||
switch (pe->pmu_perf_type) {
|
||||
case PERF_TYPE_AMDGPU_DF:
|
||||
pe->adev->df.funcs->pmc_get_count(pe->adev, hwc->conf,
|
||||
pe->adev->df.funcs->pmc_get_count(pe->adev, hwc->config,
|
||||
&count);
|
||||
break;
|
||||
default:
|
||||
@ -126,7 +126,7 @@ static void amdgpu_perf_stop(struct perf_event *event, int flags)
|
||||
|
||||
switch (pe->pmu_perf_type) {
|
||||
case PERF_TYPE_AMDGPU_DF:
|
||||
pe->adev->df.funcs->pmc_stop(pe->adev, hwc->conf, 0);
|
||||
pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, 0);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@ -156,7 +156,8 @@ static int amdgpu_perf_add(struct perf_event *event, int flags)
|
||||
|
||||
switch (pe->pmu_perf_type) {
|
||||
case PERF_TYPE_AMDGPU_DF:
|
||||
retval = pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 1);
|
||||
retval = pe->adev->df.funcs->pmc_start(pe->adev,
|
||||
hwc->config, 1);
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
@ -184,7 +185,7 @@ static void amdgpu_perf_del(struct perf_event *event, int flags)
|
||||
|
||||
switch (pe->pmu_perf_type) {
|
||||
case PERF_TYPE_AMDGPU_DF:
|
||||
pe->adev->df.funcs->pmc_stop(pe->adev, hwc->conf, 1);
|
||||
pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, 1);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
@ -179,6 +179,7 @@ struct amdgpu_vcn_inst {
|
||||
struct amdgpu_irq_src irq;
|
||||
struct amdgpu_vcn_reg external;
|
||||
struct amdgpu_bo *dpg_sram_bo;
|
||||
struct dpg_pause_state pause_state;
|
||||
void *dpg_sram_cpu_addr;
|
||||
uint64_t dpg_sram_gpu_addr;
|
||||
uint32_t *dpg_sram_curr_addr;
|
||||
@ -190,8 +191,6 @@ struct amdgpu_vcn {
|
||||
const struct firmware *fw; /* VCN firmware */
|
||||
unsigned num_enc_rings;
|
||||
enum amd_powergating_state cur_state;
|
||||
struct dpg_pause_state pause_state;
|
||||
|
||||
bool indirect_sram;
|
||||
|
||||
uint8_t num_vcn_inst;
|
||||
|
@ -4374,9 +4374,17 @@ static int gfx_v9_0_ecc_late_init(void *handle)
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
int r;
|
||||
|
||||
r = gfx_v9_0_do_edc_gds_workarounds(adev);
|
||||
if (r)
|
||||
return r;
|
||||
/*
|
||||
* Temp workaround to fix the issue that CP firmware fails to
|
||||
* update read pointer when CPDMA is writing clearing operation
|
||||
* to GDS in suspend/resume sequence on several cards. So just
|
||||
* limit this operation in cold boot sequence.
|
||||
*/
|
||||
if (!adev->in_suspend) {
|
||||
r = gfx_v9_0_do_edc_gds_workarounds(adev);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* requires IBs so do in late init after IB pool is initialized */
|
||||
r = gfx_v9_0_do_edc_gpr_workarounds(adev);
|
||||
|
@ -1207,9 +1207,10 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring;
|
||||
|
||||
/* pause/unpause if state is changed */
|
||||
if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
|
||||
if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
|
||||
DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
|
||||
adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
|
||||
adev->vcn.inst[inst_idx].pause_state.fw_based,
|
||||
adev->vcn.inst[inst_idx].pause_state.jpeg,
|
||||
new_state->fw_based, new_state->jpeg);
|
||||
|
||||
reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
|
||||
@ -1258,13 +1259,14 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
|
||||
reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
|
||||
}
|
||||
adev->vcn.pause_state.fw_based = new_state->fw_based;
|
||||
adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
|
||||
}
|
||||
|
||||
/* pause/unpause if state is changed */
|
||||
if (adev->vcn.pause_state.jpeg != new_state->jpeg) {
|
||||
if (adev->vcn.inst[inst_idx].pause_state.jpeg != new_state->jpeg) {
|
||||
DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
|
||||
adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
|
||||
adev->vcn.inst[inst_idx].pause_state.fw_based,
|
||||
adev->vcn.inst[inst_idx].pause_state.jpeg,
|
||||
new_state->fw_based, new_state->jpeg);
|
||||
|
||||
reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
|
||||
@ -1318,7 +1320,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
|
||||
reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
|
||||
}
|
||||
adev->vcn.pause_state.jpeg = new_state->jpeg;
|
||||
adev->vcn.inst[inst_idx].pause_state.jpeg = new_state->jpeg;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -1137,9 +1137,9 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
|
||||
int ret_code;
|
||||
|
||||
/* pause/unpause if state is changed */
|
||||
if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
|
||||
if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
|
||||
DRM_DEBUG("dpg pause state changed %d -> %d",
|
||||
adev->vcn.pause_state.fw_based, new_state->fw_based);
|
||||
adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based);
|
||||
reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
|
||||
(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
|
||||
|
||||
@ -1185,7 +1185,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
|
||||
reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
|
||||
}
|
||||
adev->vcn.pause_state.fw_based = new_state->fw_based;
|
||||
adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -1367,9 +1367,9 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
|
||||
int ret_code;
|
||||
|
||||
/* pause/unpause if state is changed */
|
||||
if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
|
||||
if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
|
||||
DRM_DEBUG("dpg pause state changed %d -> %d",
|
||||
adev->vcn.pause_state.fw_based, new_state->fw_based);
|
||||
adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based);
|
||||
reg_data = RREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE) &
|
||||
(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
|
||||
|
||||
@ -1407,14 +1407,14 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
|
||||
RREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF);
|
||||
|
||||
SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS,
|
||||
0x0, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
|
||||
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
|
||||
}
|
||||
} else {
|
||||
/* unpause dpg, no need to wait */
|
||||
reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
|
||||
WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data);
|
||||
}
|
||||
adev->vcn.pause_state.fw_based = new_state->fw_based;
|
||||
adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -8408,7 +8408,6 @@ bool amdgpu_dm_psr_enable(struct dc_stream_state *stream)
|
||||
/* Calculate number of static frames before generating interrupt to
|
||||
* enter PSR.
|
||||
*/
|
||||
unsigned int frame_time_microsec = 1000000 / vsync_rate_hz;
|
||||
// Init fail safe of 2 frames static
|
||||
unsigned int num_frames_static = 2;
|
||||
|
||||
@ -8423,8 +8422,10 @@ bool amdgpu_dm_psr_enable(struct dc_stream_state *stream)
|
||||
* Calculate number of frames such that at least 30 ms of time has
|
||||
* passed.
|
||||
*/
|
||||
if (vsync_rate_hz != 0)
|
||||
if (vsync_rate_hz != 0) {
|
||||
unsigned int frame_time_microsec = 1000000 / vsync_rate_hz;
|
||||
num_frames_static = (30000 / frame_time_microsec) + 1;
|
||||
}
|
||||
|
||||
params.triggers.cursor_update = true;
|
||||
params.triggers.overlay_update = true;
|
||||
|
@ -711,10 +711,6 @@ static void enable_disp_power_gating_dmcub(
|
||||
power_gating.header.sub_type = DMUB_CMD__VBIOS_ENABLE_DISP_POWER_GATING;
|
||||
power_gating.power_gating.pwr = *pwr;
|
||||
|
||||
/* ATOM_ENABLE is old API in DMUB */
|
||||
if (power_gating.power_gating.pwr.enable == ATOM_ENABLE)
|
||||
power_gating.power_gating.pwr.enable = ATOM_INIT;
|
||||
|
||||
dc_dmub_srv_cmd_queue(dmcub, &power_gating.header);
|
||||
dc_dmub_srv_cmd_execute(dmcub);
|
||||
dc_dmub_srv_wait_idle(dmcub);
|
||||
|
@ -87,6 +87,12 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN20)
|
||||
###############################################################################
|
||||
CLK_MGR_DCN21 = rn_clk_mgr.o rn_clk_mgr_vbios_smu.o
|
||||
|
||||
# prevent build errors regarding soft-float vs hard-float FP ABI tags
|
||||
# this code is currently unused on ppc64, as it applies to Renoir APUs only
|
||||
ifdef CONFIG_PPC64
|
||||
CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := $(call cc-option,-mno-gnu-attribute)
|
||||
endif
|
||||
|
||||
AMD_DAL_CLK_MGR_DCN21 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn21/,$(CLK_MGR_DCN21))
|
||||
|
||||
AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21)
|
||||
|
@ -117,7 +117,7 @@ void dcn20_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr,
|
||||
|
||||
prev_dppclk_khz = clk_mgr->base.ctx->dc->current_state->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz;
|
||||
|
||||
if (safe_to_lower || prev_dppclk_khz < dppclk_khz) {
|
||||
if ((prev_dppclk_khz > dppclk_khz && safe_to_lower) || prev_dppclk_khz < dppclk_khz) {
|
||||
clk_mgr->dccg->funcs->update_dpp_dto(
|
||||
clk_mgr->dccg, dpp_inst, dppclk_khz);
|
||||
}
|
||||
|
@ -151,6 +151,12 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base,
|
||||
rn_vbios_smu_set_min_deep_sleep_dcfclk(clk_mgr, clk_mgr_base->clks.dcfclk_deep_sleep_khz);
|
||||
}
|
||||
|
||||
// workaround: Limit dppclk to 100Mhz to avoid lower eDP panel switch to plus 4K monitor underflow.
|
||||
if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
|
||||
if (new_clocks->dppclk_khz < 100000)
|
||||
new_clocks->dppclk_khz = 100000;
|
||||
}
|
||||
|
||||
if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) {
|
||||
if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz)
|
||||
dpp_clock_lowered = true;
|
||||
@ -412,19 +418,19 @@ void build_watermark_ranges(struct clk_bw_params *bw_params, struct pp_smu_wm_ra
|
||||
|
||||
ranges->reader_wm_sets[num_valid_sets].wm_inst = bw_params->wm_table.entries[i].wm_inst;
|
||||
ranges->reader_wm_sets[num_valid_sets].wm_type = bw_params->wm_table.entries[i].wm_type;
|
||||
/* We will not select WM based on dcfclk, so leave it as unconstrained */
|
||||
ranges->reader_wm_sets[num_valid_sets].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN;
|
||||
ranges->reader_wm_sets[num_valid_sets].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX;
|
||||
/* fclk wil be used to select WM*/
|
||||
/* We will not select WM based on fclk, so leave it as unconstrained */
|
||||
ranges->reader_wm_sets[num_valid_sets].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN;
|
||||
ranges->reader_wm_sets[num_valid_sets].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX;
|
||||
/* dcfclk wil be used to select WM*/
|
||||
|
||||
if (ranges->reader_wm_sets[num_valid_sets].wm_type == WM_TYPE_PSTATE_CHG) {
|
||||
if (i == 0)
|
||||
ranges->reader_wm_sets[num_valid_sets].min_fill_clk_mhz = 0;
|
||||
ranges->reader_wm_sets[num_valid_sets].min_drain_clk_mhz = 0;
|
||||
else {
|
||||
/* add 1 to make it non-overlapping with next lvl */
|
||||
ranges->reader_wm_sets[num_valid_sets].min_fill_clk_mhz = bw_params->clk_table.entries[i - 1].fclk_mhz + 1;
|
||||
ranges->reader_wm_sets[num_valid_sets].min_drain_clk_mhz = bw_params->clk_table.entries[i - 1].dcfclk_mhz + 1;
|
||||
}
|
||||
ranges->reader_wm_sets[num_valid_sets].max_fill_clk_mhz = bw_params->clk_table.entries[i].fclk_mhz;
|
||||
ranges->reader_wm_sets[num_valid_sets].max_drain_clk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
|
||||
|
||||
} else {
|
||||
/* unconstrained for memory retraining */
|
||||
|
@ -400,7 +400,7 @@ static bool acquire(
|
||||
{
|
||||
enum gpio_result result;
|
||||
|
||||
if (!is_engine_available(engine))
|
||||
if ((engine == NULL) || !is_engine_available(engine))
|
||||
return false;
|
||||
|
||||
result = dal_ddc_open(ddc, GPIO_MODE_HARDWARE,
|
||||
|
@ -572,7 +572,6 @@ void dcn20_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
|
||||
dpp->funcs->dpp_dppclk_control(dpp, false, false);
|
||||
|
||||
hubp->power_gated = true;
|
||||
dc->optimized_required = false; /* We're powering off, no need to optimize */
|
||||
|
||||
hws->funcs.plane_atomic_power_down(dc,
|
||||
pipe_ctx->plane_res.dpp,
|
||||
|
@ -60,6 +60,7 @@
|
||||
#include "dcn20/dcn20_dccg.h"
|
||||
#include "dcn21_hubbub.h"
|
||||
#include "dcn10/dcn10_resource.h"
|
||||
#include "dce110/dce110_resource.h"
|
||||
|
||||
#include "dcn20/dcn20_dwb.h"
|
||||
#include "dcn20/dcn20_mmhubbub.h"
|
||||
@ -856,6 +857,7 @@ static const struct dc_debug_options debug_defaults_diags = {
|
||||
enum dcn20_clk_src_array_id {
|
||||
DCN20_CLK_SRC_PLL0,
|
||||
DCN20_CLK_SRC_PLL1,
|
||||
DCN20_CLK_SRC_PLL2,
|
||||
DCN20_CLK_SRC_TOTAL_DCN21
|
||||
};
|
||||
|
||||
@ -1718,6 +1720,10 @@ static bool dcn21_resource_construct(
|
||||
dcn21_clock_source_create(ctx, ctx->dc_bios,
|
||||
CLOCK_SOURCE_COMBO_PHY_PLL1,
|
||||
&clk_src_regs[1], false);
|
||||
pool->base.clock_sources[DCN20_CLK_SRC_PLL2] =
|
||||
dcn21_clock_source_create(ctx, ctx->dc_bios,
|
||||
CLOCK_SOURCE_COMBO_PHY_PLL2,
|
||||
&clk_src_regs[2], false);
|
||||
|
||||
pool->base.clk_src_count = DCN20_CLK_SRC_TOTAL_DCN21;
|
||||
|
||||
|
@ -39,21 +39,39 @@
|
||||
#define SMU_11_0_PP_OVERDRIVE_VERSION 0x0800
|
||||
#define SMU_11_0_PP_POWERSAVINGCLOCK_VERSION 0x0100
|
||||
|
||||
enum SMU_11_0_ODFEATURE_CAP {
|
||||
SMU_11_0_ODCAP_GFXCLK_LIMITS = 0,
|
||||
SMU_11_0_ODCAP_GFXCLK_CURVE,
|
||||
SMU_11_0_ODCAP_UCLK_MAX,
|
||||
SMU_11_0_ODCAP_POWER_LIMIT,
|
||||
SMU_11_0_ODCAP_FAN_ACOUSTIC_LIMIT,
|
||||
SMU_11_0_ODCAP_FAN_SPEED_MIN,
|
||||
SMU_11_0_ODCAP_TEMPERATURE_FAN,
|
||||
SMU_11_0_ODCAP_TEMPERATURE_SYSTEM,
|
||||
SMU_11_0_ODCAP_MEMORY_TIMING_TUNE,
|
||||
SMU_11_0_ODCAP_FAN_ZERO_RPM_CONTROL,
|
||||
SMU_11_0_ODCAP_AUTO_UV_ENGINE,
|
||||
SMU_11_0_ODCAP_AUTO_OC_ENGINE,
|
||||
SMU_11_0_ODCAP_AUTO_OC_MEMORY,
|
||||
SMU_11_0_ODCAP_FAN_CURVE,
|
||||
SMU_11_0_ODCAP_COUNT,
|
||||
};
|
||||
|
||||
enum SMU_11_0_ODFEATURE_ID {
|
||||
SMU_11_0_ODFEATURE_GFXCLK_LIMITS = 1 << 0, //GFXCLK Limit feature
|
||||
SMU_11_0_ODFEATURE_GFXCLK_CURVE = 1 << 1, //GFXCLK Curve feature
|
||||
SMU_11_0_ODFEATURE_UCLK_MAX = 1 << 2, //UCLK Limit feature
|
||||
SMU_11_0_ODFEATURE_POWER_LIMIT = 1 << 3, //Power Limit feature
|
||||
SMU_11_0_ODFEATURE_FAN_ACOUSTIC_LIMIT = 1 << 4, //Fan Acoustic RPM feature
|
||||
SMU_11_0_ODFEATURE_FAN_SPEED_MIN = 1 << 5, //Minimum Fan Speed feature
|
||||
SMU_11_0_ODFEATURE_TEMPERATURE_FAN = 1 << 6, //Fan Target Temperature Limit feature
|
||||
SMU_11_0_ODFEATURE_TEMPERATURE_SYSTEM = 1 << 7, //Operating Temperature Limit feature
|
||||
SMU_11_0_ODFEATURE_MEMORY_TIMING_TUNE = 1 << 8, //AC Timing Tuning feature
|
||||
SMU_11_0_ODFEATURE_FAN_ZERO_RPM_CONTROL = 1 << 9, //Zero RPM feature
|
||||
SMU_11_0_ODFEATURE_AUTO_UV_ENGINE = 1 << 10, //Auto Under Volt GFXCLK feature
|
||||
SMU_11_0_ODFEATURE_AUTO_OC_ENGINE = 1 << 11, //Auto Over Clock GFXCLK feature
|
||||
SMU_11_0_ODFEATURE_AUTO_OC_MEMORY = 1 << 12, //Auto Over Clock MCLK feature
|
||||
SMU_11_0_ODFEATURE_FAN_CURVE = 1 << 13, //VICTOR TODO
|
||||
SMU_11_0_ODFEATURE_GFXCLK_LIMITS = 1 << SMU_11_0_ODCAP_GFXCLK_LIMITS, //GFXCLK Limit feature
|
||||
SMU_11_0_ODFEATURE_GFXCLK_CURVE = 1 << SMU_11_0_ODCAP_GFXCLK_CURVE, //GFXCLK Curve feature
|
||||
SMU_11_0_ODFEATURE_UCLK_MAX = 1 << SMU_11_0_ODCAP_UCLK_MAX, //UCLK Limit feature
|
||||
SMU_11_0_ODFEATURE_POWER_LIMIT = 1 << SMU_11_0_ODCAP_POWER_LIMIT, //Power Limit feature
|
||||
SMU_11_0_ODFEATURE_FAN_ACOUSTIC_LIMIT = 1 << SMU_11_0_ODCAP_FAN_ACOUSTIC_LIMIT, //Fan Acoustic RPM feature
|
||||
SMU_11_0_ODFEATURE_FAN_SPEED_MIN = 1 << SMU_11_0_ODCAP_FAN_SPEED_MIN, //Minimum Fan Speed feature
|
||||
SMU_11_0_ODFEATURE_TEMPERATURE_FAN = 1 << SMU_11_0_ODCAP_TEMPERATURE_FAN, //Fan Target Temperature Limit feature
|
||||
SMU_11_0_ODFEATURE_TEMPERATURE_SYSTEM = 1 << SMU_11_0_ODCAP_TEMPERATURE_SYSTEM, //Operating Temperature Limit feature
|
||||
SMU_11_0_ODFEATURE_MEMORY_TIMING_TUNE = 1 << SMU_11_0_ODCAP_MEMORY_TIMING_TUNE, //AC Timing Tuning feature
|
||||
SMU_11_0_ODFEATURE_FAN_ZERO_RPM_CONTROL = 1 << SMU_11_0_ODCAP_FAN_ZERO_RPM_CONTROL, //Zero RPM feature
|
||||
SMU_11_0_ODFEATURE_AUTO_UV_ENGINE = 1 << SMU_11_0_ODCAP_AUTO_UV_ENGINE, //Auto Under Volt GFXCLK feature
|
||||
SMU_11_0_ODFEATURE_AUTO_OC_ENGINE = 1 << SMU_11_0_ODCAP_AUTO_OC_ENGINE, //Auto Over Clock GFXCLK feature
|
||||
SMU_11_0_ODFEATURE_AUTO_OC_MEMORY = 1 << SMU_11_0_ODCAP_AUTO_OC_MEMORY, //Auto Over Clock MCLK feature
|
||||
SMU_11_0_ODFEATURE_FAN_CURVE = 1 << SMU_11_0_ODCAP_FAN_CURVE, //Fan Curve feature
|
||||
SMU_11_0_ODFEATURE_COUNT = 14,
|
||||
};
|
||||
#define SMU_11_0_MAX_ODFEATURE 32 //Maximum Number of OD Features
|
||||
|
@ -736,9 +736,9 @@ static bool navi10_is_support_fine_grained_dpm(struct smu_context *smu, enum smu
|
||||
return dpm_desc->SnapToDiscrete == 0 ? true : false;
|
||||
}
|
||||
|
||||
static inline bool navi10_od_feature_is_supported(struct smu_11_0_overdrive_table *od_table, enum SMU_11_0_ODFEATURE_ID feature)
|
||||
static inline bool navi10_od_feature_is_supported(struct smu_11_0_overdrive_table *od_table, enum SMU_11_0_ODFEATURE_CAP cap)
|
||||
{
|
||||
return od_table->cap[feature];
|
||||
return od_table->cap[cap];
|
||||
}
|
||||
|
||||
static void navi10_od_setting_get_range(struct smu_11_0_overdrive_table *od_table,
|
||||
@ -846,7 +846,7 @@ static int navi10_print_clk_levels(struct smu_context *smu,
|
||||
case SMU_OD_SCLK:
|
||||
if (!smu->od_enabled || !od_table || !od_settings)
|
||||
break;
|
||||
if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS))
|
||||
if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_LIMITS))
|
||||
break;
|
||||
size += sprintf(buf + size, "OD_SCLK:\n");
|
||||
size += sprintf(buf + size, "0: %uMhz\n1: %uMhz\n", od_table->GfxclkFmin, od_table->GfxclkFmax);
|
||||
@ -854,7 +854,7 @@ static int navi10_print_clk_levels(struct smu_context *smu,
|
||||
case SMU_OD_MCLK:
|
||||
if (!smu->od_enabled || !od_table || !od_settings)
|
||||
break;
|
||||
if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX))
|
||||
if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_UCLK_MAX))
|
||||
break;
|
||||
size += sprintf(buf + size, "OD_MCLK:\n");
|
||||
size += sprintf(buf + size, "1: %uMHz\n", od_table->UclkFmax);
|
||||
@ -862,7 +862,7 @@ static int navi10_print_clk_levels(struct smu_context *smu,
|
||||
case SMU_OD_VDDC_CURVE:
|
||||
if (!smu->od_enabled || !od_table || !od_settings)
|
||||
break;
|
||||
if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE))
|
||||
if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_CURVE))
|
||||
break;
|
||||
size += sprintf(buf + size, "OD_VDDC_CURVE:\n");
|
||||
for (i = 0; i < 3; i++) {
|
||||
@ -887,7 +887,7 @@ static int navi10_print_clk_levels(struct smu_context *smu,
|
||||
break;
|
||||
size = sprintf(buf, "%s:\n", "OD_RANGE");
|
||||
|
||||
if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS)) {
|
||||
if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_LIMITS)) {
|
||||
navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_GFXCLKFMIN,
|
||||
&min_value, NULL);
|
||||
navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_GFXCLKFMAX,
|
||||
@ -896,14 +896,14 @@ static int navi10_print_clk_levels(struct smu_context *smu,
|
||||
min_value, max_value);
|
||||
}
|
||||
|
||||
if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX)) {
|
||||
if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_UCLK_MAX)) {
|
||||
navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_UCLKFMAX,
|
||||
&min_value, &max_value);
|
||||
size += sprintf(buf + size, "MCLK: %7uMhz %10uMhz\n",
|
||||
min_value, max_value);
|
||||
}
|
||||
|
||||
if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE)) {
|
||||
if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_CURVE)) {
|
||||
navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P1,
|
||||
&min_value, &max_value);
|
||||
size += sprintf(buf + size, "VDDC_CURVE_SCLK[0]: %7uMhz %10uMhz\n",
|
||||
@ -2056,7 +2056,7 @@ static int navi10_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABL
|
||||
|
||||
switch (type) {
|
||||
case PP_OD_EDIT_SCLK_VDDC_TABLE:
|
||||
if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS)) {
|
||||
if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_LIMITS)) {
|
||||
pr_warn("GFXCLK_LIMITS not supported!\n");
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
@ -2102,7 +2102,7 @@ static int navi10_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABL
|
||||
}
|
||||
break;
|
||||
case PP_OD_EDIT_MCLK_VDDC_TABLE:
|
||||
if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX)) {
|
||||
if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_UCLK_MAX)) {
|
||||
pr_warn("UCLK_MAX not supported!\n");
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
@ -2143,7 +2143,7 @@ static int navi10_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABL
|
||||
}
|
||||
break;
|
||||
case PP_OD_EDIT_VDDC_CURVE:
|
||||
if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE)) {
|
||||
if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_CURVE)) {
|
||||
pr_warn("GFXCLK_CURVE not supported!\n");
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
@ -3838,7 +3838,8 @@ drm_dp_mst_process_up_req(struct drm_dp_mst_topology_mgr *mgr,
|
||||
else if (msg->req_type == DP_RESOURCE_STATUS_NOTIFY)
|
||||
guid = msg->u.resource_stat.guid;
|
||||
|
||||
mstb = drm_dp_get_mst_branch_device_by_guid(mgr, guid);
|
||||
if (guid)
|
||||
mstb = drm_dp_get_mst_branch_device_by_guid(mgr, guid);
|
||||
} else {
|
||||
mstb = drm_dp_get_mst_branch_device(mgr, hdr->lct, hdr->rad);
|
||||
}
|
||||
|
@ -3211,7 +3211,7 @@ static u8 *drm_find_cea_extension(const struct edid *edid)
|
||||
return cea;
|
||||
}
|
||||
|
||||
static const struct drm_display_mode *cea_mode_for_vic(u8 vic)
|
||||
static __always_inline const struct drm_display_mode *cea_mode_for_vic(u8 vic)
|
||||
{
|
||||
BUILD_BUG_ON(1 + ARRAY_SIZE(edid_cea_modes_1) - 1 != 127);
|
||||
BUILD_BUG_ON(193 + ARRAY_SIZE(edid_cea_modes_193) - 1 != 219);
|
||||
|
@ -357,14 +357,16 @@ parse_generic_dtd(struct drm_i915_private *dev_priv,
|
||||
panel_fixed_mode->hdisplay + dtd->hfront_porch;
|
||||
panel_fixed_mode->hsync_end =
|
||||
panel_fixed_mode->hsync_start + dtd->hsync;
|
||||
panel_fixed_mode->htotal = panel_fixed_mode->hsync_end;
|
||||
panel_fixed_mode->htotal =
|
||||
panel_fixed_mode->hdisplay + dtd->hblank;
|
||||
|
||||
panel_fixed_mode->vdisplay = dtd->vactive;
|
||||
panel_fixed_mode->vsync_start =
|
||||
panel_fixed_mode->vdisplay + dtd->vfront_porch;
|
||||
panel_fixed_mode->vsync_end =
|
||||
panel_fixed_mode->vsync_start + dtd->vsync;
|
||||
panel_fixed_mode->vtotal = panel_fixed_mode->vsync_end;
|
||||
panel_fixed_mode->vtotal =
|
||||
panel_fixed_mode->vdisplay + dtd->vblank;
|
||||
|
||||
panel_fixed_mode->clock = dtd->pixel_clock;
|
||||
panel_fixed_mode->width_mm = dtd->width_mm;
|
||||
|
@ -12366,6 +12366,7 @@ static int icl_check_nv12_planes(struct intel_crtc_state *crtc_state)
|
||||
/* Copy parameters to slave plane */
|
||||
linked_state->ctl = plane_state->ctl | PLANE_CTL_YUV420_Y_PLANE;
|
||||
linked_state->color_ctl = plane_state->color_ctl;
|
||||
linked_state->view = plane_state->view;
|
||||
memcpy(linked_state->color_plane, plane_state->color_plane,
|
||||
sizeof(linked_state->color_plane));
|
||||
|
||||
@ -14476,37 +14477,23 @@ static int intel_atomic_check_crtcs(struct intel_atomic_state *state)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool intel_cpu_transcoder_needs_modeset(struct intel_atomic_state *state,
|
||||
enum transcoder transcoder)
|
||||
static bool intel_cpu_transcoders_need_modeset(struct intel_atomic_state *state,
|
||||
u8 transcoders)
|
||||
{
|
||||
struct intel_crtc_state *new_crtc_state;
|
||||
const struct intel_crtc_state *new_crtc_state;
|
||||
struct intel_crtc *crtc;
|
||||
int i;
|
||||
|
||||
for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i)
|
||||
if (new_crtc_state->cpu_transcoder == transcoder)
|
||||
return needs_modeset(new_crtc_state);
|
||||
for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
|
||||
if (new_crtc_state->hw.enable &&
|
||||
transcoders & BIT(new_crtc_state->cpu_transcoder) &&
|
||||
needs_modeset(new_crtc_state))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
intel_modeset_synced_crtcs(struct intel_atomic_state *state,
|
||||
u8 transcoders)
|
||||
{
|
||||
struct intel_crtc_state *new_crtc_state;
|
||||
struct intel_crtc *crtc;
|
||||
int i;
|
||||
|
||||
for_each_new_intel_crtc_in_state(state, crtc,
|
||||
new_crtc_state, i) {
|
||||
if (transcoders & BIT(new_crtc_state->cpu_transcoder)) {
|
||||
new_crtc_state->uapi.mode_changed = true;
|
||||
new_crtc_state->update_pipe = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
intel_modeset_all_tiles(struct intel_atomic_state *state, int tile_grp_id)
|
||||
{
|
||||
@ -14662,15 +14649,20 @@ static int intel_atomic_check(struct drm_device *dev,
|
||||
if (intel_dp_mst_is_slave_trans(new_crtc_state)) {
|
||||
enum transcoder master = new_crtc_state->mst_master_transcoder;
|
||||
|
||||
if (intel_cpu_transcoder_needs_modeset(state, master)) {
|
||||
if (intel_cpu_transcoders_need_modeset(state, BIT(master))) {
|
||||
new_crtc_state->uapi.mode_changed = true;
|
||||
new_crtc_state->update_pipe = false;
|
||||
}
|
||||
} else if (is_trans_port_sync_mode(new_crtc_state)) {
|
||||
}
|
||||
|
||||
if (is_trans_port_sync_mode(new_crtc_state)) {
|
||||
u8 trans = new_crtc_state->sync_mode_slaves_mask |
|
||||
BIT(new_crtc_state->master_transcoder);
|
||||
|
||||
intel_modeset_synced_crtcs(state, trans);
|
||||
if (intel_cpu_transcoders_need_modeset(state, trans)) {
|
||||
new_crtc_state->uapi.mode_changed = true;
|
||||
new_crtc_state->update_pipe = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -384,6 +384,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data)
|
||||
return data;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ACPI
|
||||
static int i2c_adapter_lookup(struct acpi_resource *ares, void *data)
|
||||
{
|
||||
struct i2c_adapter_lookup *lookup = data;
|
||||
@ -393,8 +394,7 @@ static int i2c_adapter_lookup(struct acpi_resource *ares, void *data)
|
||||
acpi_handle adapter_handle;
|
||||
acpi_status status;
|
||||
|
||||
if (intel_dsi->i2c_bus_num >= 0 ||
|
||||
!i2c_acpi_get_i2c_resource(ares, &sb))
|
||||
if (!i2c_acpi_get_i2c_resource(ares, &sb))
|
||||
return 1;
|
||||
|
||||
if (lookup->slave_addr != sb->slave_address)
|
||||
@ -413,14 +413,41 @@ static int i2c_adapter_lookup(struct acpi_resource *ares, void *data)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi,
|
||||
const u16 slave_addr)
|
||||
{
|
||||
struct drm_device *drm_dev = intel_dsi->base.base.dev;
|
||||
struct device *dev = &drm_dev->pdev->dev;
|
||||
struct acpi_device *acpi_dev;
|
||||
struct list_head resource_list;
|
||||
struct i2c_adapter_lookup lookup;
|
||||
|
||||
acpi_dev = ACPI_COMPANION(dev);
|
||||
if (acpi_dev) {
|
||||
memset(&lookup, 0, sizeof(lookup));
|
||||
lookup.slave_addr = slave_addr;
|
||||
lookup.intel_dsi = intel_dsi;
|
||||
lookup.dev_handle = acpi_device_handle(acpi_dev);
|
||||
|
||||
INIT_LIST_HEAD(&resource_list);
|
||||
acpi_dev_get_resources(acpi_dev, &resource_list,
|
||||
i2c_adapter_lookup,
|
||||
&lookup);
|
||||
acpi_dev_free_resource_list(&resource_list);
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi,
|
||||
const u16 slave_addr)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data)
|
||||
{
|
||||
struct drm_device *drm_dev = intel_dsi->base.base.dev;
|
||||
struct device *dev = &drm_dev->pdev->dev;
|
||||
struct i2c_adapter *adapter;
|
||||
struct acpi_device *acpi_dev;
|
||||
struct list_head resource_list;
|
||||
struct i2c_adapter_lookup lookup;
|
||||
struct i2c_msg msg;
|
||||
int ret;
|
||||
u8 vbt_i2c_bus_num = *(data + 2);
|
||||
@ -431,20 +458,7 @@ static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data)
|
||||
|
||||
if (intel_dsi->i2c_bus_num < 0) {
|
||||
intel_dsi->i2c_bus_num = vbt_i2c_bus_num;
|
||||
|
||||
acpi_dev = ACPI_COMPANION(dev);
|
||||
if (acpi_dev) {
|
||||
memset(&lookup, 0, sizeof(lookup));
|
||||
lookup.slave_addr = slave_addr;
|
||||
lookup.intel_dsi = intel_dsi;
|
||||
lookup.dev_handle = acpi_device_handle(acpi_dev);
|
||||
|
||||
INIT_LIST_HEAD(&resource_list);
|
||||
acpi_dev_get_resources(acpi_dev, &resource_list,
|
||||
i2c_adapter_lookup,
|
||||
&lookup);
|
||||
acpi_dev_free_resource_list(&resource_list);
|
||||
}
|
||||
i2c_acpi_find_adapter(intel_dsi, slave_addr);
|
||||
}
|
||||
|
||||
adapter = i2c_get_adapter(intel_dsi->i2c_bus_num);
|
||||
|
@ -1981,9 +1981,20 @@ static int __eb_parse(struct dma_fence_work *work)
|
||||
pw->trampoline);
|
||||
}
|
||||
|
||||
static void __eb_parse_release(struct dma_fence_work *work)
|
||||
{
|
||||
struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
|
||||
|
||||
if (pw->trampoline)
|
||||
i915_active_release(&pw->trampoline->active);
|
||||
i915_active_release(&pw->shadow->active);
|
||||
i915_active_release(&pw->batch->active);
|
||||
}
|
||||
|
||||
static const struct dma_fence_work_ops eb_parse_ops = {
|
||||
.name = "eb_parse",
|
||||
.work = __eb_parse,
|
||||
.release = __eb_parse_release,
|
||||
};
|
||||
|
||||
static int eb_parse_pipeline(struct i915_execbuffer *eb,
|
||||
@ -1997,6 +2008,20 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
|
||||
if (!pw)
|
||||
return -ENOMEM;
|
||||
|
||||
err = i915_active_acquire(&eb->batch->active);
|
||||
if (err)
|
||||
goto err_free;
|
||||
|
||||
err = i915_active_acquire(&shadow->active);
|
||||
if (err)
|
||||
goto err_batch;
|
||||
|
||||
if (trampoline) {
|
||||
err = i915_active_acquire(&trampoline->active);
|
||||
if (err)
|
||||
goto err_shadow;
|
||||
}
|
||||
|
||||
dma_fence_work_init(&pw->base, &eb_parse_ops);
|
||||
|
||||
pw->engine = eb->engine;
|
||||
@ -2006,7 +2031,9 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
|
||||
pw->shadow = shadow;
|
||||
pw->trampoline = trampoline;
|
||||
|
||||
dma_resv_lock(pw->batch->resv, NULL);
|
||||
err = dma_resv_lock_interruptible(pw->batch->resv, NULL);
|
||||
if (err)
|
||||
goto err_trampoline;
|
||||
|
||||
err = dma_resv_reserve_shared(pw->batch->resv, 1);
|
||||
if (err)
|
||||
@ -2034,6 +2061,14 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
|
||||
|
||||
err_batch_unlock:
|
||||
dma_resv_unlock(pw->batch->resv);
|
||||
err_trampoline:
|
||||
if (trampoline)
|
||||
i915_active_release(&trampoline->active);
|
||||
err_shadow:
|
||||
i915_active_release(&shadow->active);
|
||||
err_batch:
|
||||
i915_active_release(&eb->batch->active);
|
||||
err_free:
|
||||
kfree(pw);
|
||||
return err;
|
||||
}
|
||||
|
@ -455,10 +455,11 @@ out:
|
||||
|
||||
void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
struct i915_mmap_offset *mmo;
|
||||
struct i915_mmap_offset *mmo, *mn;
|
||||
|
||||
spin_lock(&obj->mmo.lock);
|
||||
list_for_each_entry(mmo, &obj->mmo.offsets, offset) {
|
||||
rbtree_postorder_for_each_entry_safe(mmo, mn,
|
||||
&obj->mmo.offsets, offset) {
|
||||
/*
|
||||
* vma_node_unmap for GTT mmaps handled already in
|
||||
* __i915_gem_object_release_mmap_gtt
|
||||
@ -487,6 +488,67 @@ void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
|
||||
i915_gem_object_release_mmap_offset(obj);
|
||||
}
|
||||
|
||||
static struct i915_mmap_offset *
|
||||
lookup_mmo(struct drm_i915_gem_object *obj,
|
||||
enum i915_mmap_type mmap_type)
|
||||
{
|
||||
struct rb_node *rb;
|
||||
|
||||
spin_lock(&obj->mmo.lock);
|
||||
rb = obj->mmo.offsets.rb_node;
|
||||
while (rb) {
|
||||
struct i915_mmap_offset *mmo =
|
||||
rb_entry(rb, typeof(*mmo), offset);
|
||||
|
||||
if (mmo->mmap_type == mmap_type) {
|
||||
spin_unlock(&obj->mmo.lock);
|
||||
return mmo;
|
||||
}
|
||||
|
||||
if (mmo->mmap_type < mmap_type)
|
||||
rb = rb->rb_right;
|
||||
else
|
||||
rb = rb->rb_left;
|
||||
}
|
||||
spin_unlock(&obj->mmo.lock);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct i915_mmap_offset *
|
||||
insert_mmo(struct drm_i915_gem_object *obj, struct i915_mmap_offset *mmo)
|
||||
{
|
||||
struct rb_node *rb, **p;
|
||||
|
||||
spin_lock(&obj->mmo.lock);
|
||||
rb = NULL;
|
||||
p = &obj->mmo.offsets.rb_node;
|
||||
while (*p) {
|
||||
struct i915_mmap_offset *pos;
|
||||
|
||||
rb = *p;
|
||||
pos = rb_entry(rb, typeof(*pos), offset);
|
||||
|
||||
if (pos->mmap_type == mmo->mmap_type) {
|
||||
spin_unlock(&obj->mmo.lock);
|
||||
drm_vma_offset_remove(obj->base.dev->vma_offset_manager,
|
||||
&mmo->vma_node);
|
||||
kfree(mmo);
|
||||
return pos;
|
||||
}
|
||||
|
||||
if (pos->mmap_type < mmo->mmap_type)
|
||||
p = &rb->rb_right;
|
||||
else
|
||||
p = &rb->rb_left;
|
||||
}
|
||||
rb_link_node(&mmo->offset, rb, p);
|
||||
rb_insert_color(&mmo->offset, &obj->mmo.offsets);
|
||||
spin_unlock(&obj->mmo.lock);
|
||||
|
||||
return mmo;
|
||||
}
|
||||
|
||||
static struct i915_mmap_offset *
|
||||
mmap_offset_attach(struct drm_i915_gem_object *obj,
|
||||
enum i915_mmap_type mmap_type,
|
||||
@ -496,20 +558,22 @@ mmap_offset_attach(struct drm_i915_gem_object *obj,
|
||||
struct i915_mmap_offset *mmo;
|
||||
int err;
|
||||
|
||||
mmo = lookup_mmo(obj, mmap_type);
|
||||
if (mmo)
|
||||
goto out;
|
||||
|
||||
mmo = kmalloc(sizeof(*mmo), GFP_KERNEL);
|
||||
if (!mmo)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
mmo->obj = obj;
|
||||
mmo->dev = obj->base.dev;
|
||||
mmo->file = file;
|
||||
mmo->mmap_type = mmap_type;
|
||||
drm_vma_node_reset(&mmo->vma_node);
|
||||
|
||||
err = drm_vma_offset_add(mmo->dev->vma_offset_manager, &mmo->vma_node,
|
||||
obj->base.size / PAGE_SIZE);
|
||||
err = drm_vma_offset_add(obj->base.dev->vma_offset_manager,
|
||||
&mmo->vma_node, obj->base.size / PAGE_SIZE);
|
||||
if (likely(!err))
|
||||
goto out;
|
||||
goto insert;
|
||||
|
||||
/* Attempt to reap some mmap space from dead objects */
|
||||
err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT);
|
||||
@ -517,19 +581,17 @@ mmap_offset_attach(struct drm_i915_gem_object *obj,
|
||||
goto err;
|
||||
|
||||
i915_gem_drain_freed_objects(i915);
|
||||
err = drm_vma_offset_add(mmo->dev->vma_offset_manager, &mmo->vma_node,
|
||||
obj->base.size / PAGE_SIZE);
|
||||
err = drm_vma_offset_add(obj->base.dev->vma_offset_manager,
|
||||
&mmo->vma_node, obj->base.size / PAGE_SIZE);
|
||||
if (err)
|
||||
goto err;
|
||||
|
||||
insert:
|
||||
mmo = insert_mmo(obj, mmo);
|
||||
GEM_BUG_ON(lookup_mmo(obj, mmap_type) != mmo);
|
||||
out:
|
||||
if (file)
|
||||
drm_vma_node_allow(&mmo->vma_node, file);
|
||||
|
||||
spin_lock(&obj->mmo.lock);
|
||||
list_add(&mmo->offset, &obj->mmo.offsets);
|
||||
spin_unlock(&obj->mmo.lock);
|
||||
|
||||
return mmo;
|
||||
|
||||
err:
|
||||
@ -745,60 +807,43 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
struct drm_vma_offset_node *node;
|
||||
struct drm_file *priv = filp->private_data;
|
||||
struct drm_device *dev = priv->minor->dev;
|
||||
struct drm_i915_gem_object *obj = NULL;
|
||||
struct i915_mmap_offset *mmo = NULL;
|
||||
struct drm_gem_object *obj = NULL;
|
||||
struct file *anon;
|
||||
|
||||
if (drm_dev_is_unplugged(dev))
|
||||
return -ENODEV;
|
||||
|
||||
rcu_read_lock();
|
||||
drm_vma_offset_lock_lookup(dev->vma_offset_manager);
|
||||
node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager,
|
||||
vma->vm_pgoff,
|
||||
vma_pages(vma));
|
||||
if (likely(node)) {
|
||||
mmo = container_of(node, struct i915_mmap_offset,
|
||||
vma_node);
|
||||
/*
|
||||
* In our dependency chain, the drm_vma_offset_node
|
||||
* depends on the validity of the mmo, which depends on
|
||||
* the gem object. However the only reference we have
|
||||
* at this point is the mmo (as the parent of the node).
|
||||
* Try to check if the gem object was at least cleared.
|
||||
*/
|
||||
if (!mmo || !mmo->obj) {
|
||||
drm_vma_offset_unlock_lookup(dev->vma_offset_manager);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (node && drm_vma_node_is_allowed(node, priv)) {
|
||||
/*
|
||||
* Skip 0-refcnted objects as it is in the process of being
|
||||
* destroyed and will be invalid when the vma manager lock
|
||||
* is released.
|
||||
*/
|
||||
obj = &mmo->obj->base;
|
||||
if (!kref_get_unless_zero(&obj->refcount))
|
||||
obj = NULL;
|
||||
mmo = container_of(node, struct i915_mmap_offset, vma_node);
|
||||
obj = i915_gem_object_get_rcu(mmo->obj);
|
||||
}
|
||||
drm_vma_offset_unlock_lookup(dev->vma_offset_manager);
|
||||
rcu_read_unlock();
|
||||
if (!obj)
|
||||
return -EINVAL;
|
||||
return node ? -EACCES : -EINVAL;
|
||||
|
||||
if (!drm_vma_node_is_allowed(node, priv)) {
|
||||
drm_gem_object_put_unlocked(obj);
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
if (i915_gem_object_is_readonly(to_intel_bo(obj))) {
|
||||
if (i915_gem_object_is_readonly(obj)) {
|
||||
if (vma->vm_flags & VM_WRITE) {
|
||||
drm_gem_object_put_unlocked(obj);
|
||||
i915_gem_object_put(obj);
|
||||
return -EINVAL;
|
||||
}
|
||||
vma->vm_flags &= ~VM_MAYWRITE;
|
||||
}
|
||||
|
||||
anon = mmap_singleton(to_i915(obj->dev));
|
||||
anon = mmap_singleton(to_i915(dev));
|
||||
if (IS_ERR(anon)) {
|
||||
drm_gem_object_put_unlocked(obj);
|
||||
i915_gem_object_put(obj);
|
||||
return PTR_ERR(anon);
|
||||
}
|
||||
|
||||
|
@ -63,7 +63,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
|
||||
INIT_LIST_HEAD(&obj->lut_list);
|
||||
|
||||
spin_lock_init(&obj->mmo.lock);
|
||||
INIT_LIST_HEAD(&obj->mmo.offsets);
|
||||
obj->mmo.offsets = RB_ROOT;
|
||||
|
||||
init_rcu_head(&obj->rcu);
|
||||
|
||||
@ -100,8 +100,8 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
|
||||
{
|
||||
struct drm_i915_gem_object *obj = to_intel_bo(gem);
|
||||
struct drm_i915_file_private *fpriv = file->driver_priv;
|
||||
struct i915_mmap_offset *mmo, *mn;
|
||||
struct i915_lut_handle *lut, *ln;
|
||||
struct i915_mmap_offset *mmo;
|
||||
LIST_HEAD(close);
|
||||
|
||||
i915_gem_object_lock(obj);
|
||||
@ -117,14 +117,8 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
|
||||
i915_gem_object_unlock(obj);
|
||||
|
||||
spin_lock(&obj->mmo.lock);
|
||||
list_for_each_entry(mmo, &obj->mmo.offsets, offset) {
|
||||
if (mmo->file != file)
|
||||
continue;
|
||||
|
||||
spin_unlock(&obj->mmo.lock);
|
||||
rbtree_postorder_for_each_entry_safe(mmo, mn, &obj->mmo.offsets, offset)
|
||||
drm_vma_node_revoke(&mmo->vma_node, file);
|
||||
spin_lock(&obj->mmo.lock);
|
||||
}
|
||||
spin_unlock(&obj->mmo.lock);
|
||||
|
||||
list_for_each_entry_safe(lut, ln, &close, obj_link) {
|
||||
@ -203,12 +197,14 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
|
||||
|
||||
i915_gem_object_release_mmap(obj);
|
||||
|
||||
list_for_each_entry_safe(mmo, mn, &obj->mmo.offsets, offset) {
|
||||
rbtree_postorder_for_each_entry_safe(mmo, mn,
|
||||
&obj->mmo.offsets,
|
||||
offset) {
|
||||
drm_vma_offset_remove(obj->base.dev->vma_offset_manager,
|
||||
&mmo->vma_node);
|
||||
kfree(mmo);
|
||||
}
|
||||
INIT_LIST_HEAD(&obj->mmo.offsets);
|
||||
obj->mmo.offsets = RB_ROOT;
|
||||
|
||||
GEM_BUG_ON(atomic_read(&obj->bind_count));
|
||||
GEM_BUG_ON(obj->userfault_count);
|
||||
|
@ -69,6 +69,15 @@ i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle)
|
||||
return idr_find(&file->object_idr, handle);
|
||||
}
|
||||
|
||||
static inline struct drm_i915_gem_object *
|
||||
i915_gem_object_get_rcu(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
if (obj && !kref_get_unless_zero(&obj->base.refcount))
|
||||
obj = NULL;
|
||||
|
||||
return obj;
|
||||
}
|
||||
|
||||
static inline struct drm_i915_gem_object *
|
||||
i915_gem_object_lookup(struct drm_file *file, u32 handle)
|
||||
{
|
||||
@ -76,8 +85,7 @@ i915_gem_object_lookup(struct drm_file *file, u32 handle)
|
||||
|
||||
rcu_read_lock();
|
||||
obj = i915_gem_object_lookup_rcu(file, handle);
|
||||
if (obj && !kref_get_unless_zero(&obj->base.refcount))
|
||||
obj = NULL;
|
||||
obj = i915_gem_object_get_rcu(obj);
|
||||
rcu_read_unlock();
|
||||
|
||||
return obj;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user