mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-19 18:24:14 +08:00
Linux 5.6-rc4
-----BEGIN PGP SIGNATURE----- iQFSBAABCAA8FiEEq68RxlopcLEwq+PEeb4+QwBBGIYFAl5cOX8eHHRvcnZhbGRz QGxpbnV4LWZvdW5kYXRpb24ub3JnAAoJEHm+PkMAQRiGw0AH/0nex1uEpzUTm+Gw D8QPFr3y61sYLu7sIMVt39+Zl6OSxvOOX14QIM/mrNrzjjRI8EXGvYgES5gSO4on 6NLS6/64c1oQThDHCsxusKoSWLZ9KqP2vRPt7tZjn7DZMzEsuLhlINKBeupcqALX FnOBr768P+if/j0WcDR2pBaMg3ch+XC5sfYav7kapjgWUqCx9BvrHKLXXdlEGUC0 7Ku7PH+nF7CIHiTay+i89odvOd8aLGsa/SUf5XGauKkH65VgQkmksgPeZUPqTnyC MEsyLJLfn4AP3ySwqzfSLac8jqZG8FGBt4DgM2MQBHibctzfeMIznfcfh/A8+Edx jqLKLAs= =4075 -----END PGP SIGNATURE----- Merge tag 'v5.6-rc4' into rdma.git for-next Required due to dependencies in following patches. Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
commit
c13cac2a21
4
.gitignore
vendored
4
.gitignore
vendored
@ -100,6 +100,10 @@ modules.order
|
||||
/include/ksym/
|
||||
/arch/*/include/generated/
|
||||
|
||||
# Generated lkdtm tests
|
||||
/tools/testing/selftests/lkdtm/*.sh
|
||||
!/tools/testing/selftests/lkdtm/run.sh
|
||||
|
||||
# stgit generated dirs
|
||||
patches-*
|
||||
|
||||
|
2
COPYING
2
COPYING
@ -16,3 +16,5 @@ In addition, other licenses may also apply. Please see:
|
||||
Documentation/process/license-rules.rst
|
||||
|
||||
for more details.
|
||||
|
||||
All contributions to the Linux Kernel are subject to this COPYING file.
|
||||
|
5
CREDITS
5
CREDITS
@ -567,6 +567,11 @@ D: Original author of Amiga FFS filesystem
|
||||
S: Orlando, Florida
|
||||
S: USA
|
||||
|
||||
N: Paul Burton
|
||||
E: paulburton@kernel.org
|
||||
W: https://pburton.com
|
||||
D: MIPS maintainer 2018-2020
|
||||
|
||||
N: Lennert Buytenhek
|
||||
E: kernel@wantstofly.org
|
||||
D: Original (2.4) rewrite of the ethernet bridging code
|
||||
|
@ -62,6 +62,30 @@ Or more shorter, written as following::
|
||||
In both styles, same key words are automatically merged when parsing it
|
||||
at boot time. So you can append similar trees or key-values.
|
||||
|
||||
Same-key Values
|
||||
---------------
|
||||
|
||||
It is prohibited that two or more values or arrays share a same-key.
|
||||
For example,::
|
||||
|
||||
foo = bar, baz
|
||||
foo = qux # !ERROR! we can not re-define same key
|
||||
|
||||
If you want to append the value to existing key as an array member,
|
||||
you can use ``+=`` operator. For example::
|
||||
|
||||
foo = bar, baz
|
||||
foo += qux
|
||||
|
||||
In this case, the key ``foo`` has ``bar``, ``baz`` and ``qux``.
|
||||
|
||||
However, a sub-key and a value can not co-exist under a parent key.
|
||||
For example, following config is NOT allowed.::
|
||||
|
||||
foo = value1
|
||||
foo.bar = value2 # !ERROR! subkey "bar" and value "value1" can NOT co-exist
|
||||
|
||||
|
||||
Comments
|
||||
--------
|
||||
|
||||
@ -102,9 +126,13 @@ Boot Kernel With a Boot Config
|
||||
==============================
|
||||
|
||||
Since the boot configuration file is loaded with initrd, it will be added
|
||||
to the end of the initrd (initramfs) image file. The Linux kernel decodes
|
||||
the last part of the initrd image in memory to get the boot configuration
|
||||
data.
|
||||
to the end of the initrd (initramfs) image file with size, checksum and
|
||||
12-byte magic word as below.
|
||||
|
||||
[initrd][bootconfig][size(u32)][checksum(u32)][#BOOTCONFIG\n]
|
||||
|
||||
The Linux kernel decodes the last part of the initrd image in memory to
|
||||
get the boot configuration data.
|
||||
Because of this "piggyback" method, there is no need to change or
|
||||
update the boot loader and the kernel image itself.
|
||||
|
||||
|
@ -136,6 +136,10 @@
|
||||
dynamic table installation which will install SSDT
|
||||
tables to /sys/firmware/acpi/tables/dynamic.
|
||||
|
||||
acpi_no_watchdog [HW,ACPI,WDT]
|
||||
Ignore the ACPI-based watchdog interface (WDAT) and let
|
||||
a native driver control the watchdog device instead.
|
||||
|
||||
acpi_rsdp= [ACPI,EFI,KEXEC]
|
||||
Pass the RSDP address to the kernel, mostly used
|
||||
on machines running EFI runtime service to boot the
|
||||
|
@ -129,7 +129,7 @@ this logic.
|
||||
|
||||
As a single binary will need to support both 48-bit and 52-bit VA
|
||||
spaces, the VMEMMAP must be sized large enough for 52-bit VAs and
|
||||
also must be sized large enought to accommodate a fixed PAGE_OFFSET.
|
||||
also must be sized large enough to accommodate a fixed PAGE_OFFSET.
|
||||
|
||||
Most code in the kernel should not need to consider the VA_BITS, for
|
||||
code that does need to know the VA size the variables are
|
||||
|
@ -44,8 +44,15 @@ The AArch64 Tagged Address ABI has two stages of relaxation depending
|
||||
how the user addresses are used by the kernel:
|
||||
|
||||
1. User addresses not accessed by the kernel but used for address space
|
||||
management (e.g. ``mmap()``, ``mprotect()``, ``madvise()``). The use
|
||||
of valid tagged pointers in this context is always allowed.
|
||||
management (e.g. ``mprotect()``, ``madvise()``). The use of valid
|
||||
tagged pointers in this context is allowed with the exception of
|
||||
``brk()``, ``mmap()`` and the ``new_address`` argument to
|
||||
``mremap()`` as these have the potential to alias with existing
|
||||
user addresses.
|
||||
|
||||
NOTE: This behaviour changed in v5.6 and so some earlier kernels may
|
||||
incorrectly accept valid tagged pointers for the ``brk()``,
|
||||
``mmap()`` and ``mremap()`` system calls.
|
||||
|
||||
2. User addresses accessed by the kernel (e.g. ``write()``). This ABI
|
||||
relaxation is disabled by default and the application thread needs to
|
||||
|
@ -551,6 +551,7 @@ options to your ``.config``:
|
||||
Once the kernel is built and installed, a simple
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
modprobe example-test
|
||||
|
||||
...will run the tests.
|
||||
|
@ -43,9 +43,13 @@ properties:
|
||||
- enum:
|
||||
- allwinner,sun8i-h3-tcon-tv
|
||||
- allwinner,sun50i-a64-tcon-tv
|
||||
- allwinner,sun50i-h6-tcon-tv
|
||||
- const: allwinner,sun8i-a83t-tcon-tv
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- allwinner,sun50i-h6-tcon-tv
|
||||
- const: allwinner,sun8i-r40-tcon-tv
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
|
@ -1,9 +1,10 @@
|
||||
Ilitek ILI210x/ILI2117/ILI251x touchscreen controller
|
||||
Ilitek ILI210x/ILI2117/ILI2120/ILI251x touchscreen controller
|
||||
|
||||
Required properties:
|
||||
- compatible:
|
||||
ilitek,ili210x for ILI210x
|
||||
ilitek,ili2117 for ILI2117
|
||||
ilitek,ili2120 for ILI2120
|
||||
ilitek,ili251x for ILI251x
|
||||
|
||||
- reg: The I2C address of the device
|
||||
|
@ -33,24 +33,40 @@ properties:
|
||||
maxItems: 1
|
||||
|
||||
clocks:
|
||||
minItems: 2
|
||||
maxItems: 3
|
||||
items:
|
||||
- description: The CSI interface clock
|
||||
- description: The CSI ISP clock
|
||||
- description: The CSI DRAM clock
|
||||
oneOf:
|
||||
- items:
|
||||
- description: The CSI interface clock
|
||||
- description: The CSI DRAM clock
|
||||
|
||||
- items:
|
||||
- description: The CSI interface clock
|
||||
- description: The CSI ISP clock
|
||||
- description: The CSI DRAM clock
|
||||
|
||||
clock-names:
|
||||
minItems: 2
|
||||
maxItems: 3
|
||||
items:
|
||||
- const: bus
|
||||
- const: isp
|
||||
- const: ram
|
||||
oneOf:
|
||||
- items:
|
||||
- const: bus
|
||||
- const: ram
|
||||
|
||||
- items:
|
||||
- const: bus
|
||||
- const: isp
|
||||
- const: ram
|
||||
|
||||
resets:
|
||||
maxItems: 1
|
||||
|
||||
# FIXME: This should be made required eventually once every SoC will
|
||||
# have the MBUS declared.
|
||||
interconnects:
|
||||
maxItems: 1
|
||||
|
||||
# FIXME: This should be made required eventually once every SoC will
|
||||
# have the MBUS declared.
|
||||
interconnect-names:
|
||||
const: dma-mem
|
||||
|
||||
# See ./video-interfaces.txt for details
|
||||
port:
|
||||
type: object
|
||||
|
@ -347,6 +347,7 @@ examples:
|
||||
interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
|
||||
|
||||
#iommu-cells = <1>;
|
||||
#reset-cells = <1>;
|
||||
};
|
||||
|
||||
external-memory-controller@7001b000 {
|
||||
@ -363,20 +364,23 @@ examples:
|
||||
timing-0 {
|
||||
clock-frequency = <12750000>;
|
||||
|
||||
nvidia,emc-zcal-cnt-long = <0x00000042>;
|
||||
nvidia,emc-auto-cal-interval = <0x001fffff>;
|
||||
nvidia,emc-ctt-term-ctrl = <0x00000802>;
|
||||
nvidia,emc-cfg = <0x73240000>;
|
||||
nvidia,emc-cfg-2 = <0x000008c5>;
|
||||
nvidia,emc-sel-dpd-ctrl = <0x00040128>;
|
||||
nvidia,emc-bgbias-ctl0 = <0x00000008>;
|
||||
nvidia,emc-auto-cal-config = <0xa1430000>;
|
||||
nvidia,emc-auto-cal-config2 = <0x00000000>;
|
||||
nvidia,emc-auto-cal-config3 = <0x00000000>;
|
||||
nvidia,emc-mode-reset = <0x80001221>;
|
||||
nvidia,emc-auto-cal-interval = <0x001fffff>;
|
||||
nvidia,emc-bgbias-ctl0 = <0x00000008>;
|
||||
nvidia,emc-cfg = <0x73240000>;
|
||||
nvidia,emc-cfg-2 = <0x000008c5>;
|
||||
nvidia,emc-ctt-term-ctrl = <0x00000802>;
|
||||
nvidia,emc-mode-1 = <0x80100003>;
|
||||
nvidia,emc-mode-2 = <0x80200008>;
|
||||
nvidia,emc-mode-4 = <0x00000000>;
|
||||
nvidia,emc-mode-reset = <0x80001221>;
|
||||
nvidia,emc-mrs-wait-cnt = <0x000e000e>;
|
||||
nvidia,emc-sel-dpd-ctrl = <0x00040128>;
|
||||
nvidia,emc-xm2dqspadctrl2 = <0x0130b118>;
|
||||
nvidia,emc-zcal-cnt-long = <0x00000042>;
|
||||
nvidia,emc-zcal-interval = <0x00000000>;
|
||||
|
||||
nvidia,emc-configuration = <
|
||||
0x00000000 /* EMC_RC */
|
||||
|
@ -124,7 +124,7 @@ not every application needs SDIO irq, e.g. MMC cards.
|
||||
pinctrl-1 = <&mmc1_idle>;
|
||||
pinctrl-2 = <&mmc1_sleep>;
|
||||
...
|
||||
interrupts-extended = <&intc 64 &gpio2 28 GPIO_ACTIVE_LOW>;
|
||||
interrupts-extended = <&intc 64 &gpio2 28 IRQ_TYPE_LEVEL_LOW>;
|
||||
};
|
||||
|
||||
mmc1_idle : pinmux_cirq_pin {
|
||||
|
@ -56,7 +56,6 @@ patternProperties:
|
||||
examples:
|
||||
- |
|
||||
davinci_mdio: mdio@5c030000 {
|
||||
compatible = "ti,davinci_mdio";
|
||||
reg = <0x5c030000 0x1000>;
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
@ -71,9 +71,13 @@ b) Example for device tree::
|
||||
ipmb@10 {
|
||||
compatible = "ipmb-dev";
|
||||
reg = <0x10>;
|
||||
i2c-protocol;
|
||||
};
|
||||
};
|
||||
|
||||
If xmit of data to be done using raw i2c block vs smbus
|
||||
then "i2c-protocol" needs to be defined as above.
|
||||
|
||||
2) Manually from Linux::
|
||||
|
||||
modprobe ipmb-dev-int
|
||||
|
@ -134,7 +134,7 @@ Sequential zone files can only be written sequentially, starting from the file
|
||||
end, that is, write operations can only be append writes. Zonefs makes no
|
||||
attempt at accepting random writes and will fail any write request that has a
|
||||
start offset not corresponding to the end of the file, or to the end of the last
|
||||
write issued and still in-flight (for asynchrnous I/O operations).
|
||||
write issued and still in-flight (for asynchronous I/O operations).
|
||||
|
||||
Since dirty page writeback by the page cache does not guarantee a sequential
|
||||
write pattern, zonefs prevents buffered writes and writeable shared mappings
|
||||
@ -142,7 +142,7 @@ on sequential files. Only direct I/O writes are accepted for these files.
|
||||
zonefs relies on the sequential delivery of write I/O requests to the device
|
||||
implemented by the block layer elevator. An elevator implementing the sequential
|
||||
write feature for zoned block device (ELEVATOR_F_ZBD_SEQ_WRITE elevator feature)
|
||||
must be used. This type of elevator (e.g. mq-deadline) is the set by default
|
||||
must be used. This type of elevator (e.g. mq-deadline) is set by default
|
||||
for zoned block devices on device initialization.
|
||||
|
||||
There are no restrictions on the type of I/O used for read operations in
|
||||
@ -196,7 +196,7 @@ additional conditions that result in I/O errors.
|
||||
may still happen in the case of a partial failure of a very large direct I/O
|
||||
operation split into multiple BIOs/requests or asynchronous I/O operations.
|
||||
If one of the write request within the set of sequential write requests
|
||||
issued to the device fails, all write requests after queued after it will
|
||||
issued to the device fails, all write requests queued after it will
|
||||
become unaligned and fail.
|
||||
|
||||
* Delayed write errors: similarly to regular block devices, if the device side
|
||||
@ -207,7 +207,7 @@ additional conditions that result in I/O errors.
|
||||
causing all data to be dropped after the sector that caused the error.
|
||||
|
||||
All I/O errors detected by zonefs are notified to the user with an error code
|
||||
return for the system call that trigered or detected the error. The recovery
|
||||
return for the system call that triggered or detected the error. The recovery
|
||||
actions taken by zonefs in response to I/O errors depend on the I/O type (read
|
||||
vs write) and on the reason for the error (bad sector, unaligned writes or zone
|
||||
condition change).
|
||||
@ -222,7 +222,7 @@ condition change).
|
||||
* A zone condition change to read-only or offline also always triggers zonefs
|
||||
I/O error recovery.
|
||||
|
||||
Zonefs minimal I/O error recovery may change a file size and a file access
|
||||
Zonefs minimal I/O error recovery may change a file size and file access
|
||||
permissions.
|
||||
|
||||
* File size changes:
|
||||
@ -237,7 +237,7 @@ permissions.
|
||||
A file size may also be reduced to reflect a delayed write error detected on
|
||||
fsync(): in this case, the amount of data effectively written in the zone may
|
||||
be less than originally indicated by the file inode size. After such I/O
|
||||
error, zonefs always fixes a file inode size to reflect the amount of data
|
||||
error, zonefs always fixes the file inode size to reflect the amount of data
|
||||
persistently stored in the file zone.
|
||||
|
||||
* Access permission changes:
|
||||
@ -281,11 +281,11 @@ Further notes:
|
||||
permissions to read-only applies to all files. The file system is remounted
|
||||
read-only.
|
||||
* Access permission and file size changes due to the device transitioning zones
|
||||
to the offline condition are permanent. Remounting or reformating the device
|
||||
to the offline condition are permanent. Remounting or reformatting the device
|
||||
with mkfs.zonefs (mkzonefs) will not change back offline zone files to a good
|
||||
state.
|
||||
* File access permission changes to read-only due to the device transitioning
|
||||
zones to the read-only condition are permanent. Remounting or reformating
|
||||
zones to the read-only condition are permanent. Remounting or reformatting
|
||||
the device will not re-enable file write access.
|
||||
* File access permission changes implied by the remount-ro, zone-ro and
|
||||
zone-offline mount options are temporary for zones in a good condition.
|
||||
@ -301,13 +301,13 @@ Mount options
|
||||
|
||||
zonefs define the "errors=<behavior>" mount option to allow the user to specify
|
||||
zonefs behavior in response to I/O errors, inode size inconsistencies or zone
|
||||
condition chages. The defined behaviors are as follow:
|
||||
condition changes. The defined behaviors are as follow:
|
||||
* remount-ro (default)
|
||||
* zone-ro
|
||||
* zone-offline
|
||||
* repair
|
||||
|
||||
The I/O error actions defined for each behavior is detailed in the previous
|
||||
The I/O error actions defined for each behavior are detailed in the previous
|
||||
section.
|
||||
|
||||
Zonefs User Space Tools
|
||||
|
@ -24,6 +24,7 @@ This driver implements support for Infineon Multi-phase XDPE122 family
|
||||
dual loop voltage regulators.
|
||||
The family includes XDPE12284 and XDPE12254 devices.
|
||||
The devices from this family complaint with:
|
||||
|
||||
- Intel VR13 and VR13HC rev 1.3, IMVP8 rev 1.2 and IMPVP9 rev 1.3 DC-DC
|
||||
converter specification.
|
||||
- Intel SVID rev 1.9. protocol.
|
||||
|
@ -765,7 +765,7 @@ is not sufficient this sometimes needs to be explicit.
|
||||
Example::
|
||||
|
||||
#arch/x86/boot/Makefile
|
||||
subdir- := compressed/
|
||||
subdir- := compressed
|
||||
|
||||
The above assignment instructs kbuild to descend down in the
|
||||
directory compressed/ when "make clean" is executed.
|
||||
@ -1379,9 +1379,6 @@ See subsequent chapter for the syntax of the Kbuild file.
|
||||
in arch/$(ARCH)/include/(uapi/)/asm, Kbuild will automatically generate
|
||||
a wrapper of the asm-generic one.
|
||||
|
||||
The convention is to list one subdir per line and
|
||||
preferably in alphabetic order.
|
||||
|
||||
8 Kbuild Variables
|
||||
==================
|
||||
|
||||
|
@ -487,8 +487,9 @@ phy_register_fixup_for_id()::
|
||||
The stubs set one of the two matching criteria, and set the other one to
|
||||
match anything.
|
||||
|
||||
When phy_register_fixup() or \*_for_uid()/\*_for_id() is called at module,
|
||||
unregister fixup and free allocate memory are required.
|
||||
When phy_register_fixup() or \*_for_uid()/\*_for_id() is called at module load
|
||||
time, the module needs to unregister the fixup and free allocated memory when
|
||||
it's unloaded.
|
||||
|
||||
Call one of following function before unloading module::
|
||||
|
||||
|
@ -13,7 +13,6 @@ Power Management
|
||||
drivers-testing
|
||||
energy-model
|
||||
freezing-of-tasks
|
||||
interface
|
||||
opp
|
||||
pci
|
||||
pm_qos_interface
|
||||
|
@ -244,23 +244,23 @@ disclosure of a particular issue, unless requested by a response team or by
|
||||
an involved disclosed party. The current ambassadors list:
|
||||
|
||||
============= ========================================================
|
||||
ARM
|
||||
ARM Grant Likely <grant.likely@arm.com>
|
||||
AMD Tom Lendacky <tom.lendacky@amd.com>
|
||||
IBM
|
||||
Intel Tony Luck <tony.luck@intel.com>
|
||||
Qualcomm Trilok Soni <tsoni@codeaurora.org>
|
||||
|
||||
Microsoft Sasha Levin <sashal@kernel.org>
|
||||
Microsoft James Morris <jamorris@linux.microsoft.com>
|
||||
VMware
|
||||
Xen Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
|
||||
Canonical Tyler Hicks <tyhicks@canonical.com>
|
||||
Canonical John Johansen <john.johansen@canonical.com>
|
||||
Debian Ben Hutchings <ben@decadent.org.uk>
|
||||
Oracle Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
||||
Red Hat Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
SUSE Jiri Kosina <jkosina@suse.cz>
|
||||
|
||||
Amazon Peter Bowen <pzb@amzn.com>
|
||||
Amazon
|
||||
Google Kees Cook <keescook@chromium.org>
|
||||
============= ========================================================
|
||||
|
||||
|
@ -30,4 +30,4 @@ if [ -n "$parallel" ] ; then
|
||||
parallel="-j$parallel"
|
||||
fi
|
||||
|
||||
exec "$sphinx" "$parallel" "$@"
|
||||
exec "$sphinx" $parallel "$@"
|
||||
|
@ -183,7 +183,7 @@ CVE分配
|
||||
VMware
|
||||
Xen Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
|
||||
Canonical Tyler Hicks <tyhicks@canonical.com>
|
||||
Canonical John Johansen <john.johansen@canonical.com>
|
||||
Debian Ben Hutchings <ben@decadent.org.uk>
|
||||
Oracle Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
||||
Red Hat Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
|
@ -1,9 +1,11 @@
|
||||
==================
|
||||
Guest halt polling
|
||||
==================
|
||||
|
||||
The cpuidle_haltpoll driver, with the haltpoll governor, allows
|
||||
the guest vcpus to poll for a specified amount of time before
|
||||
halting.
|
||||
|
||||
This provides the following benefits to host side polling:
|
||||
|
||||
1) The POLL flag is set while polling is performed, which allows
|
||||
@ -29,18 +31,21 @@ Module Parameters
|
||||
The haltpoll governor has 5 tunable module parameters:
|
||||
|
||||
1) guest_halt_poll_ns:
|
||||
|
||||
Maximum amount of time, in nanoseconds, that polling is
|
||||
performed before halting.
|
||||
|
||||
Default: 200000
|
||||
|
||||
2) guest_halt_poll_shrink:
|
||||
|
||||
Division factor used to shrink per-cpu guest_halt_poll_ns when
|
||||
wakeup event occurs after the global guest_halt_poll_ns.
|
||||
|
||||
Default: 2
|
||||
|
||||
3) guest_halt_poll_grow:
|
||||
|
||||
Multiplication factor used to grow per-cpu guest_halt_poll_ns
|
||||
when event occurs after per-cpu guest_halt_poll_ns
|
||||
but before global guest_halt_poll_ns.
|
||||
@ -48,6 +53,7 @@ but before global guest_halt_poll_ns.
|
||||
Default: 2
|
||||
|
||||
4) guest_halt_poll_grow_start:
|
||||
|
||||
The per-cpu guest_halt_poll_ns eventually reaches zero
|
||||
in case of an idle system. This value sets the initial
|
||||
per-cpu guest_halt_poll_ns when growing. This can
|
||||
@ -66,7 +72,7 @@ high once achieves global guest_halt_poll_ns value).
|
||||
|
||||
Default: Y
|
||||
|
||||
The module parameters can be set from the debugfs files in:
|
||||
The module parameters can be set from the debugfs files in::
|
||||
|
||||
/sys/module/haltpoll/parameters/
|
||||
|
||||
@ -74,5 +80,5 @@ Further Notes
|
||||
=============
|
||||
|
||||
- Care should be taken when setting the guest_halt_poll_ns parameter as a
|
||||
large value has the potential to drive the cpu usage to 100% on a machine which
|
||||
would be almost entirely idle otherwise.
|
||||
large value has the potential to drive the cpu usage to 100% on a machine
|
||||
which would be almost entirely idle otherwise.
|
@ -8,7 +8,9 @@ Linux Virtualization Support
|
||||
:maxdepth: 2
|
||||
|
||||
kvm/index
|
||||
uml/user_mode_linux
|
||||
paravirt_ops
|
||||
guest-halt-polling
|
||||
|
||||
.. only:: html and subproject
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,8 @@
|
||||
* Internal ABI between the kernel and HYP
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=======================================
|
||||
Internal ABI between the kernel and HYP
|
||||
=======================================
|
||||
|
||||
This file documents the interaction between the Linux kernel and the
|
||||
hypervisor layer when running Linux as a hypervisor (for example
|
||||
@ -19,25 +23,31 @@ and only act on individual CPUs.
|
||||
Unless specified otherwise, any built-in hypervisor must implement
|
||||
these functions (see arch/arm{,64}/include/asm/virt.h):
|
||||
|
||||
* r0/x0 = HVC_SET_VECTORS
|
||||
r1/x1 = vectors
|
||||
* ::
|
||||
|
||||
r0/x0 = HVC_SET_VECTORS
|
||||
r1/x1 = vectors
|
||||
|
||||
Set HVBAR/VBAR_EL2 to 'vectors' to enable a hypervisor. 'vectors'
|
||||
must be a physical address, and respect the alignment requirements
|
||||
of the architecture. Only implemented by the initial stubs, not by
|
||||
Linux hypervisors.
|
||||
|
||||
* r0/x0 = HVC_RESET_VECTORS
|
||||
* ::
|
||||
|
||||
r0/x0 = HVC_RESET_VECTORS
|
||||
|
||||
Turn HYP/EL2 MMU off, and reset HVBAR/VBAR_EL2 to the initials
|
||||
stubs' exception vector value. This effectively disables an existing
|
||||
hypervisor.
|
||||
|
||||
* r0/x0 = HVC_SOFT_RESTART
|
||||
r1/x1 = restart address
|
||||
x2 = x0's value when entering the next payload (arm64)
|
||||
x3 = x1's value when entering the next payload (arm64)
|
||||
x4 = x2's value when entering the next payload (arm64)
|
||||
* ::
|
||||
|
||||
r0/x0 = HVC_SOFT_RESTART
|
||||
r1/x1 = restart address
|
||||
x2 = x0's value when entering the next payload (arm64)
|
||||
x3 = x1's value when entering the next payload (arm64)
|
||||
x4 = x2's value when entering the next payload (arm64)
|
||||
|
||||
Mask all exceptions, disable the MMU, move the arguments into place
|
||||
(arm64 only), and jump to the restart address while at HYP/EL2. This
|
12
Documentation/virt/kvm/arm/index.rst
Normal file
12
Documentation/virt/kvm/arm/index.rst
Normal file
@ -0,0 +1,12 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===
|
||||
ARM
|
||||
===
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
hyp-abi
|
||||
psci
|
||||
pvtime
|
@ -1,3 +1,9 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=========================================
|
||||
Power State Coordination Interface (PSCI)
|
||||
=========================================
|
||||
|
||||
KVM implements the PSCI (Power State Coordination Interface)
|
||||
specification in order to provide services such as CPU on/off, reset
|
||||
and power-off to the guest.
|
||||
@ -30,32 +36,42 @@ The following register is defined:
|
||||
- Affects the whole VM (even if the register view is per-vcpu)
|
||||
|
||||
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
|
||||
Holds the state of the firmware support to mitigate CVE-2017-5715, as
|
||||
offered by KVM to the guest via a HVC call. The workaround is described
|
||||
under SMCCC_ARCH_WORKAROUND_1 in [1].
|
||||
Holds the state of the firmware support to mitigate CVE-2017-5715, as
|
||||
offered by KVM to the guest via a HVC call. The workaround is described
|
||||
under SMCCC_ARCH_WORKAROUND_1 in [1].
|
||||
|
||||
Accepted values are:
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL: KVM does not offer
|
||||
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL:
|
||||
KVM does not offer
|
||||
firmware support for the workaround. The mitigation status for the
|
||||
guest is unknown.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL: The workaround HVC call is
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL:
|
||||
The workaround HVC call is
|
||||
available to the guest and required for the mitigation.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED: The workaround HVC call
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED:
|
||||
The workaround HVC call
|
||||
is available to the guest, but it is not needed on this VCPU.
|
||||
|
||||
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
|
||||
Holds the state of the firmware support to mitigate CVE-2018-3639, as
|
||||
offered by KVM to the guest via a HVC call. The workaround is described
|
||||
under SMCCC_ARCH_WORKAROUND_2 in [1].
|
||||
Holds the state of the firmware support to mitigate CVE-2018-3639, as
|
||||
offered by KVM to the guest via a HVC call. The workaround is described
|
||||
under SMCCC_ARCH_WORKAROUND_2 in [1]_.
|
||||
|
||||
Accepted values are:
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL: A workaround is not
|
||||
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL:
|
||||
A workaround is not
|
||||
available. KVM does not offer firmware support for the workaround.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN: The workaround state is
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN:
|
||||
The workaround state is
|
||||
unknown. KVM does not offer firmware support for the workaround.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: The workaround is available,
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
|
||||
The workaround is available,
|
||||
and can be disabled by a vCPU. If
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for
|
||||
this vCPU.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: The workaround is
|
||||
always active on this vCPU or it is not needed.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
|
||||
The workaround is always active on this vCPU or it is not needed.
|
||||
|
||||
[1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf
|
||||
.. [1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf
|
@ -1,3 +1,6 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===============================================
|
||||
ARM Virtual Interrupt Translation Service (ITS)
|
||||
===============================================
|
||||
|
||||
@ -12,22 +15,32 @@ There can be multiple ITS controllers per guest, each of them has to have
|
||||
a separate, non-overlapping MMIO region.
|
||||
|
||||
|
||||
Groups:
|
||||
KVM_DEV_ARM_VGIC_GRP_ADDR
|
||||
Groups
|
||||
======
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_ADDR
|
||||
-------------------------
|
||||
|
||||
Attributes:
|
||||
KVM_VGIC_ITS_ADDR_TYPE (rw, 64-bit)
|
||||
Base address in the guest physical address space of the GICv3 ITS
|
||||
control register frame.
|
||||
This address needs to be 64K aligned and the region covers 128K.
|
||||
|
||||
Errors:
|
||||
-E2BIG: Address outside of addressable IPA range
|
||||
-EINVAL: Incorrectly aligned address
|
||||
-EEXIST: Address already configured
|
||||
-EFAULT: Invalid user pointer for attr->addr.
|
||||
-ENODEV: Incorrect attribute or the ITS is not supported.
|
||||
|
||||
======= =================================================
|
||||
-E2BIG Address outside of addressable IPA range
|
||||
-EINVAL Incorrectly aligned address
|
||||
-EEXIST Address already configured
|
||||
-EFAULT Invalid user pointer for attr->addr.
|
||||
-ENODEV Incorrect attribute or the ITS is not supported.
|
||||
======= =================================================
|
||||
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_CTRL
|
||||
KVM_DEV_ARM_VGIC_GRP_CTRL
|
||||
-------------------------
|
||||
|
||||
Attributes:
|
||||
KVM_DEV_ARM_VGIC_CTRL_INIT
|
||||
request the initialization of the ITS, no additional parameter in
|
||||
@ -58,16 +71,21 @@ Groups:
|
||||
"ITS Restore Sequence".
|
||||
|
||||
Errors:
|
||||
-ENXIO: ITS not properly configured as required prior to setting
|
||||
this attribute
|
||||
-ENOMEM: Memory shortage when allocating ITS internal data
|
||||
-EINVAL: Inconsistent restored data
|
||||
-EFAULT: Invalid guest ram access
|
||||
-EBUSY: One or more VCPUS are running
|
||||
-EACCES: The virtual ITS is backed by a physical GICv4 ITS, and the
|
||||
state is not available
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_ITS_REGS
|
||||
======= ==========================================================
|
||||
-ENXIO ITS not properly configured as required prior to setting
|
||||
this attribute
|
||||
-ENOMEM Memory shortage when allocating ITS internal data
|
||||
-EINVAL Inconsistent restored data
|
||||
-EFAULT Invalid guest ram access
|
||||
-EBUSY One or more VCPUS are running
|
||||
-EACCES The virtual ITS is backed by a physical GICv4 ITS, and the
|
||||
state is not available
|
||||
======= ==========================================================
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_ITS_REGS
|
||||
-----------------------------
|
||||
|
||||
Attributes:
|
||||
The attr field of kvm_device_attr encodes the offset of the
|
||||
ITS register, relative to the ITS control frame base address
|
||||
@ -78,6 +96,7 @@ Groups:
|
||||
be accessed with full length.
|
||||
|
||||
Writes to read-only registers are ignored by the kernel except for:
|
||||
|
||||
- GITS_CREADR. It must be restored otherwise commands in the queue
|
||||
will be re-executed after restoring CWRITER. GITS_CREADR must be
|
||||
restored before restoring the GITS_CTLR which is likely to enable the
|
||||
@ -91,30 +110,36 @@ Groups:
|
||||
|
||||
For other registers, getting or setting a register has the same
|
||||
effect as reading/writing the register on real hardware.
|
||||
Errors:
|
||||
-ENXIO: Offset does not correspond to any supported register
|
||||
-EFAULT: Invalid user pointer for attr->addr
|
||||
-EINVAL: Offset is not 64-bit aligned
|
||||
-EBUSY: one or more VCPUS are running
|
||||
|
||||
ITS Restore Sequence:
|
||||
-------------------------
|
||||
Errors:
|
||||
|
||||
======= ====================================================
|
||||
-ENXIO Offset does not correspond to any supported register
|
||||
-EFAULT Invalid user pointer for attr->addr
|
||||
-EINVAL Offset is not 64-bit aligned
|
||||
-EBUSY one or more VCPUS are running
|
||||
======= ====================================================
|
||||
|
||||
ITS Restore Sequence:
|
||||
---------------------
|
||||
|
||||
The following ordering must be followed when restoring the GIC and the ITS:
|
||||
|
||||
a) restore all guest memory and create vcpus
|
||||
b) restore all redistributors
|
||||
c) provide the ITS base address
|
||||
(KVM_DEV_ARM_VGIC_GRP_ADDR)
|
||||
d) restore the ITS in the following order:
|
||||
1. Restore GITS_CBASER
|
||||
2. Restore all other GITS_ registers, except GITS_CTLR!
|
||||
3. Load the ITS table data (KVM_DEV_ARM_ITS_RESTORE_TABLES)
|
||||
4. Restore GITS_CTLR
|
||||
|
||||
1. Restore GITS_CBASER
|
||||
2. Restore all other ``GITS_`` registers, except GITS_CTLR!
|
||||
3. Load the ITS table data (KVM_DEV_ARM_ITS_RESTORE_TABLES)
|
||||
4. Restore GITS_CTLR
|
||||
|
||||
Then vcpus can be started.
|
||||
|
||||
ITS Table ABI REV0:
|
||||
-------------------
|
||||
ITS Table ABI REV0:
|
||||
-------------------
|
||||
|
||||
Revision 0 of the ABI only supports the features of a virtual GICv3, and does
|
||||
not support a virtual GICv4 with support for direct injection of virtual
|
||||
@ -125,12 +150,13 @@ Then vcpus can be started.
|
||||
entries in the collection are listed in no particular order.
|
||||
All entries are 8 bytes.
|
||||
|
||||
Device Table Entry (DTE):
|
||||
Device Table Entry (DTE)::
|
||||
|
||||
bits: | 63| 62 ... 49 | 48 ... 5 | 4 ... 0 |
|
||||
values: | V | next | ITT_addr | Size |
|
||||
bits: | 63| 62 ... 49 | 48 ... 5 | 4 ... 0 |
|
||||
values: | V | next | ITT_addr | Size |
|
||||
|
||||
where:
|
||||
|
||||
where;
|
||||
- V indicates whether the entry is valid. If not, other fields
|
||||
are not meaningful.
|
||||
- next: equals to 0 if this entry is the last one; otherwise it
|
||||
@ -140,32 +166,34 @@ Then vcpus can be started.
|
||||
- Size specifies the supported number of bits for the EventID,
|
||||
minus one
|
||||
|
||||
Collection Table Entry (CTE):
|
||||
Collection Table Entry (CTE)::
|
||||
|
||||
bits: | 63| 62 .. 52 | 51 ... 16 | 15 ... 0 |
|
||||
values: | V | RES0 | RDBase | ICID |
|
||||
bits: | 63| 62 .. 52 | 51 ... 16 | 15 ... 0 |
|
||||
values: | V | RES0 | RDBase | ICID |
|
||||
|
||||
where:
|
||||
|
||||
- V indicates whether the entry is valid. If not, other fields are
|
||||
not meaningful.
|
||||
- RES0: reserved field with Should-Be-Zero-or-Preserved behavior.
|
||||
- RDBase is the PE number (GICR_TYPER.Processor_Number semantic),
|
||||
- ICID is the collection ID
|
||||
|
||||
Interrupt Translation Entry (ITE):
|
||||
Interrupt Translation Entry (ITE)::
|
||||
|
||||
bits: | 63 ... 48 | 47 ... 16 | 15 ... 0 |
|
||||
values: | next | pINTID | ICID |
|
||||
bits: | 63 ... 48 | 47 ... 16 | 15 ... 0 |
|
||||
values: | next | pINTID | ICID |
|
||||
|
||||
where:
|
||||
|
||||
- next: equals to 0 if this entry is the last one; otherwise it corresponds
|
||||
to the EventID offset to the next ITE capped by 2^16 -1.
|
||||
- pINTID is the physical LPI ID; if zero, it means the entry is not valid
|
||||
and other fields are not meaningful.
|
||||
- ICID is the collection ID
|
||||
|
||||
ITS Reset State:
|
||||
----------------
|
||||
ITS Reset State:
|
||||
----------------
|
||||
|
||||
RESET returns the ITS to the same state that it was when first created and
|
||||
initialized. When the RESET command returns, the following things are
|
@ -1,9 +1,12 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
==============================================================
|
||||
ARM Virtual Generic Interrupt Controller v3 and later (VGICv3)
|
||||
==============================================================
|
||||
|
||||
|
||||
Device types supported:
|
||||
KVM_DEV_TYPE_ARM_VGIC_V3 ARM Generic Interrupt Controller v3.0
|
||||
- KVM_DEV_TYPE_ARM_VGIC_V3 ARM Generic Interrupt Controller v3.0
|
||||
|
||||
Only one VGIC instance may be instantiated through this API. The created VGIC
|
||||
will act as the VM interrupt controller, requiring emulated user-space devices
|
||||
@ -15,7 +18,8 @@ Creating a guest GICv3 device requires a host GICv3 as well.
|
||||
|
||||
Groups:
|
||||
KVM_DEV_ARM_VGIC_GRP_ADDR
|
||||
Attributes:
|
||||
Attributes:
|
||||
|
||||
KVM_VGIC_V3_ADDR_TYPE_DIST (rw, 64-bit)
|
||||
Base address in the guest physical address space of the GICv3 distributor
|
||||
register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
|
||||
@ -29,21 +33,25 @@ Groups:
|
||||
This address needs to be 64K aligned.
|
||||
|
||||
KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION (rw, 64-bit)
|
||||
The attribute data pointed to by kvm_device_attr.addr is a __u64 value:
|
||||
bits: | 63 .... 52 | 51 .... 16 | 15 - 12 |11 - 0
|
||||
values: | count | base | flags | index
|
||||
The attribute data pointed to by kvm_device_attr.addr is a __u64 value::
|
||||
|
||||
bits: | 63 .... 52 | 51 .... 16 | 15 - 12 |11 - 0
|
||||
values: | count | base | flags | index
|
||||
|
||||
- index encodes the unique redistributor region index
|
||||
- flags: reserved for future use, currently 0
|
||||
- base field encodes bits [51:16] of the guest physical base address
|
||||
of the first redistributor in the region.
|
||||
- count encodes the number of redistributors in the region. Must be
|
||||
greater than 0.
|
||||
|
||||
There are two 64K pages for each redistributor in the region and
|
||||
redistributors are laid out contiguously within the region. Regions
|
||||
are filled with redistributors in the index order. The sum of all
|
||||
region count fields must be greater than or equal to the number of
|
||||
VCPUs. Redistributor regions must be registered in the incremental
|
||||
index order, starting from index 0.
|
||||
|
||||
The characteristics of a specific redistributor region can be read
|
||||
by presetting the index field in the attr data.
|
||||
Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
|
||||
@ -52,23 +60,27 @@ Groups:
|
||||
KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION attributes.
|
||||
|
||||
Errors:
|
||||
-E2BIG: Address outside of addressable IPA range
|
||||
-EINVAL: Incorrectly aligned address, bad redistributor region
|
||||
|
||||
======= =============================================================
|
||||
-E2BIG Address outside of addressable IPA range
|
||||
-EINVAL Incorrectly aligned address, bad redistributor region
|
||||
count/index, mixed redistributor region attribute usage
|
||||
-EEXIST: Address already configured
|
||||
-ENOENT: Attempt to read the characteristics of a non existing
|
||||
-EEXIST Address already configured
|
||||
-ENOENT Attempt to read the characteristics of a non existing
|
||||
redistributor region
|
||||
-ENXIO: The group or attribute is unknown/unsupported for this device
|
||||
-ENXIO The group or attribute is unknown/unsupported for this device
|
||||
or hardware support is missing.
|
||||
-EFAULT: Invalid user pointer for attr->addr.
|
||||
-EFAULT Invalid user pointer for attr->addr.
|
||||
======= =============================================================
|
||||
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_DIST_REGS
|
||||
KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
|
||||
Attributes:
|
||||
The attr field of kvm_device_attr encodes two values:
|
||||
bits: | 63 .... 32 | 31 .... 0 |
|
||||
values: | mpidr | offset |
|
||||
KVM_DEV_ARM_VGIC_GRP_DIST_REGS, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
|
||||
Attributes:
|
||||
|
||||
The attr field of kvm_device_attr encodes two values::
|
||||
|
||||
bits: | 63 .... 32 | 31 .... 0 |
|
||||
values: | mpidr | offset |
|
||||
|
||||
All distributor regs are (rw, 32-bit) and kvm_device_attr.addr points to a
|
||||
__u32 value. 64-bit registers must be accessed by separately accessing the
|
||||
@ -93,7 +105,8 @@ Groups:
|
||||
redistributor is accessed. The mpidr is ignored for the distributor.
|
||||
|
||||
The mpidr encoding is based on the affinity information in the
|
||||
architecture defined MPIDR, and the field is encoded as follows:
|
||||
architecture defined MPIDR, and the field is encoded as follows::
|
||||
|
||||
| 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
|
||||
| Aff3 | Aff2 | Aff1 | Aff0 |
|
||||
|
||||
@ -148,24 +161,30 @@ Groups:
|
||||
ignored.
|
||||
|
||||
Errors:
|
||||
-ENXIO: Getting or setting this register is not yet supported
|
||||
-EBUSY: One or more VCPUs are running
|
||||
|
||||
====== =====================================================
|
||||
-ENXIO Getting or setting this register is not yet supported
|
||||
-EBUSY One or more VCPUs are running
|
||||
====== =====================================================
|
||||
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS
|
||||
Attributes:
|
||||
The attr field of kvm_device_attr encodes two values:
|
||||
bits: | 63 .... 32 | 31 .... 16 | 15 .... 0 |
|
||||
values: | mpidr | RES | instr |
|
||||
Attributes:
|
||||
|
||||
The attr field of kvm_device_attr encodes two values::
|
||||
|
||||
bits: | 63 .... 32 | 31 .... 16 | 15 .... 0 |
|
||||
values: | mpidr | RES | instr |
|
||||
|
||||
The mpidr field encodes the CPU ID based on the affinity information in the
|
||||
architecture defined MPIDR, and the field is encoded as follows:
|
||||
architecture defined MPIDR, and the field is encoded as follows::
|
||||
|
||||
| 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
|
||||
| Aff3 | Aff2 | Aff1 | Aff0 |
|
||||
|
||||
The instr field encodes the system register to access based on the fields
|
||||
defined in the A64 instruction set encoding for system register access
|
||||
(RES means the bits are reserved for future use and should be zero):
|
||||
(RES means the bits are reserved for future use and should be zero)::
|
||||
|
||||
| 15 ... 14 | 13 ... 11 | 10 ... 7 | 6 ... 3 | 2 ... 0 |
|
||||
| Op 0 | Op1 | CRn | CRm | Op2 |
|
||||
@ -178,26 +197,35 @@ Groups:
|
||||
|
||||
CPU interface registers access is not implemented for AArch32 mode.
|
||||
Error -ENXIO is returned when accessed in AArch32 mode.
|
||||
|
||||
Errors:
|
||||
-ENXIO: Getting or setting this register is not yet supported
|
||||
-EBUSY: VCPU is running
|
||||
-EINVAL: Invalid mpidr or register value supplied
|
||||
|
||||
======= =====================================================
|
||||
-ENXIO Getting or setting this register is not yet supported
|
||||
-EBUSY VCPU is running
|
||||
-EINVAL Invalid mpidr or register value supplied
|
||||
======= =====================================================
|
||||
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_NR_IRQS
|
||||
Attributes:
|
||||
Attributes:
|
||||
|
||||
A value describing the number of interrupts (SGI, PPI and SPI) for
|
||||
this GIC instance, ranging from 64 to 1024, in increments of 32.
|
||||
|
||||
kvm_device_attr.addr points to a __u32 value.
|
||||
|
||||
Errors:
|
||||
-EINVAL: Value set is out of the expected range
|
||||
-EBUSY: Value has already be set.
|
||||
|
||||
======= ======================================
|
||||
-EINVAL Value set is out of the expected range
|
||||
-EBUSY Value has already be set.
|
||||
======= ======================================
|
||||
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_CTRL
|
||||
Attributes:
|
||||
Attributes:
|
||||
|
||||
KVM_DEV_ARM_VGIC_CTRL_INIT
|
||||
request the initialization of the VGIC, no additional parameter in
|
||||
kvm_device_attr.addr.
|
||||
@ -205,20 +233,26 @@ Groups:
|
||||
save all LPI pending bits into guest RAM pending tables.
|
||||
|
||||
The first kB of the pending table is not altered by this operation.
|
||||
|
||||
Errors:
|
||||
-ENXIO: VGIC not properly configured as required prior to calling
|
||||
this attribute
|
||||
-ENODEV: no online VCPU
|
||||
-ENOMEM: memory shortage when allocating vgic internal data
|
||||
-EFAULT: Invalid guest ram access
|
||||
-EBUSY: One or more VCPUS are running
|
||||
|
||||
======= ========================================================
|
||||
-ENXIO VGIC not properly configured as required prior to calling
|
||||
this attribute
|
||||
-ENODEV no online VCPU
|
||||
-ENOMEM memory shortage when allocating vgic internal data
|
||||
-EFAULT Invalid guest ram access
|
||||
-EBUSY One or more VCPUS are running
|
||||
======= ========================================================
|
||||
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO
|
||||
Attributes:
|
||||
The attr field of kvm_device_attr encodes the following values:
|
||||
bits: | 63 .... 32 | 31 .... 10 | 9 .... 0 |
|
||||
values: | mpidr | info | vINTID |
|
||||
Attributes:
|
||||
|
||||
The attr field of kvm_device_attr encodes the following values::
|
||||
|
||||
bits: | 63 .... 32 | 31 .... 10 | 9 .... 0 |
|
||||
values: | mpidr | info | vINTID |
|
||||
|
||||
The vINTID specifies which set of IRQs is reported on.
|
||||
|
||||
@ -228,6 +262,7 @@ Groups:
|
||||
VGIC_LEVEL_INFO_LINE_LEVEL:
|
||||
Get/Set the input level of the IRQ line for a set of 32 contiguously
|
||||
numbered interrupts.
|
||||
|
||||
vINTID must be a multiple of 32.
|
||||
|
||||
kvm_device_attr.addr points to a __u32 value which will contain a
|
||||
@ -243,9 +278,14 @@ Groups:
|
||||
reported with the same value regardless of the mpidr specified.
|
||||
|
||||
The mpidr field encodes the CPU ID based on the affinity information in the
|
||||
architecture defined MPIDR, and the field is encoded as follows:
|
||||
architecture defined MPIDR, and the field is encoded as follows::
|
||||
|
||||
| 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
|
||||
| Aff3 | Aff2 | Aff1 | Aff0 |
|
||||
|
||||
Errors:
|
||||
-EINVAL: vINTID is not multiple of 32 or
|
||||
info field is not VGIC_LEVEL_INFO_LINE_LEVEL
|
||||
|
||||
======= =============================================
|
||||
-EINVAL vINTID is not multiple of 32 or info field is
|
||||
not VGIC_LEVEL_INFO_LINE_LEVEL
|
||||
======= =============================================
|
@ -1,8 +1,12 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
==================================================
|
||||
ARM Virtual Generic Interrupt Controller v2 (VGIC)
|
||||
==================================================
|
||||
|
||||
Device types supported:
|
||||
KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0
|
||||
|
||||
- KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0
|
||||
|
||||
Only one VGIC instance may be instantiated through either this API or the
|
||||
legacy KVM_CREATE_IRQCHIP API. The created VGIC will act as the VM interrupt
|
||||
@ -17,7 +21,8 @@ create both a GICv3 and GICv2 device on the same VM.
|
||||
|
||||
Groups:
|
||||
KVM_DEV_ARM_VGIC_GRP_ADDR
|
||||
Attributes:
|
||||
Attributes:
|
||||
|
||||
KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit)
|
||||
Base address in the guest physical address space of the GIC distributor
|
||||
register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
|
||||
@ -27,19 +32,25 @@ Groups:
|
||||
Base address in the guest physical address space of the GIC virtual cpu
|
||||
interface register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
|
||||
This address needs to be 4K aligned and the region covers 4 KByte.
|
||||
|
||||
Errors:
|
||||
-E2BIG: Address outside of addressable IPA range
|
||||
-EINVAL: Incorrectly aligned address
|
||||
-EEXIST: Address already configured
|
||||
-ENXIO: The group or attribute is unknown/unsupported for this device
|
||||
|
||||
======= =============================================================
|
||||
-E2BIG Address outside of addressable IPA range
|
||||
-EINVAL Incorrectly aligned address
|
||||
-EEXIST Address already configured
|
||||
-ENXIO The group or attribute is unknown/unsupported for this device
|
||||
or hardware support is missing.
|
||||
-EFAULT: Invalid user pointer for attr->addr.
|
||||
-EFAULT Invalid user pointer for attr->addr.
|
||||
======= =============================================================
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_DIST_REGS
|
||||
Attributes:
|
||||
The attr field of kvm_device_attr encodes two values:
|
||||
bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
|
||||
values: | reserved | vcpu_index | offset |
|
||||
Attributes:
|
||||
|
||||
The attr field of kvm_device_attr encodes two values::
|
||||
|
||||
bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
|
||||
values: | reserved | vcpu_index | offset |
|
||||
|
||||
All distributor regs are (rw, 32-bit)
|
||||
|
||||
@ -58,16 +69,22 @@ Groups:
|
||||
KVM_DEV_ARM_VGIC_GRP_DIST_REGS and KVM_DEV_ARM_VGIC_GRP_CPU_REGS) to ensure
|
||||
the expected behavior. Unless GICD_IIDR has been set from userspace, writes
|
||||
to the interrupt group registers (GICD_IGROUPR) are ignored.
|
||||
|
||||
Errors:
|
||||
-ENXIO: Getting or setting this register is not yet supported
|
||||
-EBUSY: One or more VCPUs are running
|
||||
-EINVAL: Invalid vcpu_index supplied
|
||||
|
||||
======= =====================================================
|
||||
-ENXIO Getting or setting this register is not yet supported
|
||||
-EBUSY One or more VCPUs are running
|
||||
-EINVAL Invalid vcpu_index supplied
|
||||
======= =====================================================
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_CPU_REGS
|
||||
Attributes:
|
||||
The attr field of kvm_device_attr encodes two values:
|
||||
bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
|
||||
values: | reserved | vcpu_index | offset |
|
||||
Attributes:
|
||||
|
||||
The attr field of kvm_device_attr encodes two values::
|
||||
|
||||
bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
|
||||
values: | reserved | vcpu_index | offset |
|
||||
|
||||
All CPU interface regs are (rw, 32-bit)
|
||||
|
||||
@ -101,27 +118,39 @@ Groups:
|
||||
value left by 3 places to obtain the actual priority mask level.
|
||||
|
||||
Errors:
|
||||
-ENXIO: Getting or setting this register is not yet supported
|
||||
-EBUSY: One or more VCPUs are running
|
||||
-EINVAL: Invalid vcpu_index supplied
|
||||
|
||||
======= =====================================================
|
||||
-ENXIO Getting or setting this register is not yet supported
|
||||
-EBUSY One or more VCPUs are running
|
||||
-EINVAL Invalid vcpu_index supplied
|
||||
======= =====================================================
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_NR_IRQS
|
||||
Attributes:
|
||||
Attributes:
|
||||
|
||||
A value describing the number of interrupts (SGI, PPI and SPI) for
|
||||
this GIC instance, ranging from 64 to 1024, in increments of 32.
|
||||
|
||||
Errors:
|
||||
-EINVAL: Value set is out of the expected range
|
||||
-EBUSY: Value has already be set, or GIC has already been initialized
|
||||
with default values.
|
||||
|
||||
======= =============================================================
|
||||
-EINVAL Value set is out of the expected range
|
||||
-EBUSY Value has already be set, or GIC has already been initialized
|
||||
with default values.
|
||||
======= =============================================================
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_CTRL
|
||||
Attributes:
|
||||
Attributes:
|
||||
|
||||
KVM_DEV_ARM_VGIC_CTRL_INIT
|
||||
request the initialization of the VGIC or ITS, no additional parameter
|
||||
in kvm_device_attr.addr.
|
||||
|
||||
Errors:
|
||||
-ENXIO: VGIC not properly configured as required prior to calling
|
||||
this attribute
|
||||
-ENODEV: no online VCPU
|
||||
-ENOMEM: memory shortage when allocating vgic internal data
|
||||
|
||||
======= =========================================================
|
||||
-ENXIO VGIC not properly configured as required prior to calling
|
||||
this attribute
|
||||
-ENODEV no online VCPU
|
||||
-ENOMEM memory shortage when allocating vgic internal data
|
||||
======= =========================================================
|
19
Documentation/virt/kvm/devices/index.rst
Normal file
19
Documentation/virt/kvm/devices/index.rst
Normal file
@ -0,0 +1,19 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=======
|
||||
Devices
|
||||
=======
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
arm-vgic-its
|
||||
arm-vgic
|
||||
arm-vgic-v3
|
||||
mpic
|
||||
s390_flic
|
||||
vcpu
|
||||
vfio
|
||||
vm
|
||||
xics
|
||||
xive
|
@ -1,9 +1,13 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=========================
|
||||
MPIC interrupt controller
|
||||
=========================
|
||||
|
||||
Device types supported:
|
||||
KVM_DEV_TYPE_FSL_MPIC_20 Freescale MPIC v2.0
|
||||
KVM_DEV_TYPE_FSL_MPIC_42 Freescale MPIC v4.2
|
||||
|
||||
- KVM_DEV_TYPE_FSL_MPIC_20 Freescale MPIC v2.0
|
||||
- KVM_DEV_TYPE_FSL_MPIC_42 Freescale MPIC v4.2
|
||||
|
||||
Only one MPIC instance, of any type, may be instantiated. The created
|
||||
MPIC will act as the system interrupt controller, connecting to each
|
||||
@ -11,7 +15,8 @@ vcpu's interrupt inputs.
|
||||
|
||||
Groups:
|
||||
KVM_DEV_MPIC_GRP_MISC
|
||||
Attributes:
|
||||
Attributes:
|
||||
|
||||
KVM_DEV_MPIC_BASE_ADDR (rw, 64-bit)
|
||||
Base address of the 256 KiB MPIC register space. Must be
|
||||
naturally aligned. A value of zero disables the mapping.
|
@ -1,3 +1,6 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
====================================
|
||||
FLIC (floating interrupt controller)
|
||||
====================================
|
||||
|
||||
@ -31,8 +34,10 @@ Groups:
|
||||
Copies all floating interrupts into a buffer provided by userspace.
|
||||
When the buffer is too small it returns -ENOMEM, which is the indication
|
||||
for userspace to try again with a bigger buffer.
|
||||
|
||||
-ENOBUFS is returned when the allocation of a kernelspace buffer has
|
||||
failed.
|
||||
|
||||
-EFAULT is returned when copying data to userspace failed.
|
||||
All interrupts remain pending, i.e. are not deleted from the list of
|
||||
currently pending interrupts.
|
||||
@ -60,38 +65,41 @@ Groups:
|
||||
|
||||
KVM_DEV_FLIC_ADAPTER_REGISTER
|
||||
Register an I/O adapter interrupt source. Takes a kvm_s390_io_adapter
|
||||
describing the adapter to register:
|
||||
describing the adapter to register::
|
||||
|
||||
struct kvm_s390_io_adapter {
|
||||
__u32 id;
|
||||
__u8 isc;
|
||||
__u8 maskable;
|
||||
__u8 swap;
|
||||
__u8 flags;
|
||||
};
|
||||
struct kvm_s390_io_adapter {
|
||||
__u32 id;
|
||||
__u8 isc;
|
||||
__u8 maskable;
|
||||
__u8 swap;
|
||||
__u8 flags;
|
||||
};
|
||||
|
||||
id contains the unique id for the adapter, isc the I/O interruption subclass
|
||||
to use, maskable whether this adapter may be masked (interrupts turned off),
|
||||
swap whether the indicators need to be byte swapped, and flags contains
|
||||
further characteristics of the adapter.
|
||||
|
||||
Currently defined values for 'flags' are:
|
||||
|
||||
- KVM_S390_ADAPTER_SUPPRESSIBLE: adapter is subject to AIS
|
||||
(adapter-interrupt-suppression) facility. This flag only has an effect if
|
||||
the AIS capability is enabled.
|
||||
|
||||
Unknown flag values are ignored.
|
||||
|
||||
|
||||
KVM_DEV_FLIC_ADAPTER_MODIFY
|
||||
Modifies attributes of an existing I/O adapter interrupt source. Takes
|
||||
a kvm_s390_io_adapter_req specifying the adapter and the operation:
|
||||
a kvm_s390_io_adapter_req specifying the adapter and the operation::
|
||||
|
||||
struct kvm_s390_io_adapter_req {
|
||||
__u32 id;
|
||||
__u8 type;
|
||||
__u8 mask;
|
||||
__u16 pad0;
|
||||
__u64 addr;
|
||||
};
|
||||
struct kvm_s390_io_adapter_req {
|
||||
__u32 id;
|
||||
__u8 type;
|
||||
__u8 mask;
|
||||
__u16 pad0;
|
||||
__u64 addr;
|
||||
};
|
||||
|
||||
id specifies the adapter and type the operation. The supported operations
|
||||
are:
|
||||
@ -103,8 +111,9 @@ struct kvm_s390_io_adapter_req {
|
||||
perform a gmap translation for the guest address provided in addr,
|
||||
pin a userspace page for the translated address and add it to the
|
||||
list of mappings
|
||||
Note: A new mapping will be created unconditionally; therefore,
|
||||
the calling code should avoid making duplicate mappings.
|
||||
|
||||
.. note:: A new mapping will be created unconditionally; therefore,
|
||||
the calling code should avoid making duplicate mappings.
|
||||
|
||||
KVM_S390_IO_ADAPTER_UNMAP
|
||||
release a userspace page for the translated address specified in addr
|
||||
@ -112,16 +121,17 @@ struct kvm_s390_io_adapter_req {
|
||||
|
||||
KVM_DEV_FLIC_AISM
|
||||
modify the adapter-interruption-suppression mode for a given isc if the
|
||||
AIS capability is enabled. Takes a kvm_s390_ais_req describing:
|
||||
AIS capability is enabled. Takes a kvm_s390_ais_req describing::
|
||||
|
||||
struct kvm_s390_ais_req {
|
||||
__u8 isc;
|
||||
__u16 mode;
|
||||
};
|
||||
struct kvm_s390_ais_req {
|
||||
__u8 isc;
|
||||
__u16 mode;
|
||||
};
|
||||
|
||||
isc contains the target I/O interruption subclass, mode the target
|
||||
adapter-interruption-suppression mode. The following modes are
|
||||
currently supported:
|
||||
|
||||
- KVM_S390_AIS_MODE_ALL: ALL-Interruptions Mode, i.e. airq injection
|
||||
is always allowed;
|
||||
- KVM_S390_AIS_MODE_SINGLE: SINGLE-Interruption Mode, i.e. airq
|
||||
@ -139,12 +149,12 @@ struct kvm_s390_ais_req {
|
||||
|
||||
KVM_DEV_FLIC_AISM_ALL
|
||||
Gets or sets the adapter-interruption-suppression mode for all ISCs. Takes
|
||||
a kvm_s390_ais_all describing:
|
||||
a kvm_s390_ais_all describing::
|
||||
|
||||
struct kvm_s390_ais_all {
|
||||
__u8 simm; /* Single-Interruption-Mode mask */
|
||||
__u8 nimm; /* No-Interruption-Mode mask *
|
||||
};
|
||||
struct kvm_s390_ais_all {
|
||||
__u8 simm; /* Single-Interruption-Mode mask */
|
||||
__u8 nimm; /* No-Interruption-Mode mask *
|
||||
};
|
||||
|
||||
simm contains Single-Interruption-Mode mask for all ISCs, nimm contains
|
||||
No-Interruption-Mode mask for all ISCs. Each bit in simm and nimm corresponds
|
||||
@ -159,5 +169,5 @@ ENXIO, as specified in the API documentation). It is not possible to conclude
|
||||
that a FLIC operation is unavailable based on the error code resulting from a
|
||||
usage attempt.
|
||||
|
||||
Note: The KVM_DEV_FLIC_CLEAR_IO_IRQ ioctl will return EINVAL in case a zero
|
||||
schid is specified.
|
||||
.. note:: The KVM_DEV_FLIC_CLEAR_IO_IRQ ioctl will return EINVAL in case a
|
||||
zero schid is specified.
|
114
Documentation/virt/kvm/devices/vcpu.rst
Normal file
114
Documentation/virt/kvm/devices/vcpu.rst
Normal file
@ -0,0 +1,114 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
======================
|
||||
Generic vcpu interface
|
||||
======================
|
||||
|
||||
The virtual cpu "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
|
||||
KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same struct
|
||||
kvm_device_attr as other devices, but targets VCPU-wide settings and controls.
|
||||
|
||||
The groups and attributes per virtual cpu, if any, are architecture specific.
|
||||
|
||||
1. GROUP: KVM_ARM_VCPU_PMU_V3_CTRL
|
||||
==================================
|
||||
|
||||
:Architectures: ARM64
|
||||
|
||||
1.1. ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_IRQ
|
||||
---------------------------------------
|
||||
|
||||
:Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a
|
||||
pointer to an int
|
||||
|
||||
Returns:
|
||||
|
||||
======= ========================================================
|
||||
-EBUSY The PMU overflow interrupt is already set
|
||||
-ENXIO The overflow interrupt not set when attempting to get it
|
||||
-ENODEV PMUv3 not supported
|
||||
-EINVAL Invalid PMU overflow interrupt number supplied or
|
||||
trying to set the IRQ number without using an in-kernel
|
||||
irqchip.
|
||||
======= ========================================================
|
||||
|
||||
A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt
|
||||
number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt
|
||||
type must be same for each vcpu. As a PPI, the interrupt number is the same for
|
||||
all vcpus, while as an SPI it must be a separate number per vcpu.
|
||||
|
||||
1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT
|
||||
---------------------------------------
|
||||
|
||||
:Parameters: no additional parameter in kvm_device_attr.addr
|
||||
|
||||
Returns:
|
||||
|
||||
======= ======================================================
|
||||
-ENODEV PMUv3 not supported or GIC not initialized
|
||||
-ENXIO PMUv3 not properly configured or in-kernel irqchip not
|
||||
configured as required prior to calling this attribute
|
||||
-EBUSY PMUv3 already initialized
|
||||
======= ======================================================
|
||||
|
||||
Request the initialization of the PMUv3. If using the PMUv3 with an in-kernel
|
||||
virtual GIC implementation, this must be done after initializing the in-kernel
|
||||
irqchip.
|
||||
|
||||
|
||||
2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
|
||||
=================================
|
||||
|
||||
:Architectures: ARM, ARM64
|
||||
|
||||
2.1. ATTRIBUTES: KVM_ARM_VCPU_TIMER_IRQ_VTIMER, KVM_ARM_VCPU_TIMER_IRQ_PTIMER
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
:Parameters: in kvm_device_attr.addr the address for the timer interrupt is a
|
||||
pointer to an int
|
||||
|
||||
Returns:
|
||||
|
||||
======= =================================
|
||||
-EINVAL Invalid timer interrupt number
|
||||
-EBUSY One or more VCPUs has already run
|
||||
======= =================================
|
||||
|
||||
A value describing the architected timer interrupt number when connected to an
|
||||
in-kernel virtual GIC. These must be a PPI (16 <= intid < 32). Setting the
|
||||
attribute overrides the default values (see below).
|
||||
|
||||
============================= ==========================================
|
||||
KVM_ARM_VCPU_TIMER_IRQ_VTIMER The EL1 virtual timer intid (default: 27)
|
||||
KVM_ARM_VCPU_TIMER_IRQ_PTIMER The EL1 physical timer intid (default: 30)
|
||||
============================= ==========================================
|
||||
|
||||
Setting the same PPI for different timers will prevent the VCPUs from running.
|
||||
Setting the interrupt number on a VCPU configures all VCPUs created at that
|
||||
time to use the number provided for a given timer, overwriting any previously
|
||||
configured values on other VCPUs. Userspace should configure the interrupt
|
||||
numbers on at least one VCPU after creating all VCPUs and before running any
|
||||
VCPUs.
|
||||
|
||||
3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL
|
||||
==================================
|
||||
|
||||
:Architectures: ARM64
|
||||
|
||||
3.1 ATTRIBUTE: KVM_ARM_VCPU_PVTIME_IPA
|
||||
--------------------------------------
|
||||
|
||||
:Parameters: 64-bit base address
|
||||
|
||||
Returns:
|
||||
|
||||
======= ======================================
|
||||
-ENXIO Stolen time not implemented
|
||||
-EEXIST Base address already set for this VCPU
|
||||
-EINVAL Base address not 64 byte aligned
|
||||
======= ======================================
|
||||
|
||||
Specifies the base address of the stolen time structure for this VCPU. The
|
||||
base address must be 64 byte aligned and exist within a valid guest memory
|
||||
region. See Documentation/virt/kvm/arm/pvtime.txt for more information
|
||||
including the layout of the stolen time structure.
|
@ -1,76 +0,0 @@
|
||||
Generic vcpu interface
|
||||
====================================
|
||||
|
||||
The virtual cpu "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
|
||||
KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same struct
|
||||
kvm_device_attr as other devices, but targets VCPU-wide settings and controls.
|
||||
|
||||
The groups and attributes per virtual cpu, if any, are architecture specific.
|
||||
|
||||
1. GROUP: KVM_ARM_VCPU_PMU_V3_CTRL
|
||||
Architectures: ARM64
|
||||
|
||||
1.1. ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_IRQ
|
||||
Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a
|
||||
pointer to an int
|
||||
Returns: -EBUSY: The PMU overflow interrupt is already set
|
||||
-ENXIO: The overflow interrupt not set when attempting to get it
|
||||
-ENODEV: PMUv3 not supported
|
||||
-EINVAL: Invalid PMU overflow interrupt number supplied or
|
||||
trying to set the IRQ number without using an in-kernel
|
||||
irqchip.
|
||||
|
||||
A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt
|
||||
number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt
|
||||
type must be same for each vcpu. As a PPI, the interrupt number is the same for
|
||||
all vcpus, while as an SPI it must be a separate number per vcpu.
|
||||
|
||||
1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT
|
||||
Parameters: no additional parameter in kvm_device_attr.addr
|
||||
Returns: -ENODEV: PMUv3 not supported or GIC not initialized
|
||||
-ENXIO: PMUv3 not properly configured or in-kernel irqchip not
|
||||
configured as required prior to calling this attribute
|
||||
-EBUSY: PMUv3 already initialized
|
||||
|
||||
Request the initialization of the PMUv3. If using the PMUv3 with an in-kernel
|
||||
virtual GIC implementation, this must be done after initializing the in-kernel
|
||||
irqchip.
|
||||
|
||||
|
||||
2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
|
||||
Architectures: ARM,ARM64
|
||||
|
||||
2.1. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_VTIMER
|
||||
2.2. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_PTIMER
|
||||
Parameters: in kvm_device_attr.addr the address for the timer interrupt is a
|
||||
pointer to an int
|
||||
Returns: -EINVAL: Invalid timer interrupt number
|
||||
-EBUSY: One or more VCPUs has already run
|
||||
|
||||
A value describing the architected timer interrupt number when connected to an
|
||||
in-kernel virtual GIC. These must be a PPI (16 <= intid < 32). Setting the
|
||||
attribute overrides the default values (see below).
|
||||
|
||||
KVM_ARM_VCPU_TIMER_IRQ_VTIMER: The EL1 virtual timer intid (default: 27)
|
||||
KVM_ARM_VCPU_TIMER_IRQ_PTIMER: The EL1 physical timer intid (default: 30)
|
||||
|
||||
Setting the same PPI for different timers will prevent the VCPUs from running.
|
||||
Setting the interrupt number on a VCPU configures all VCPUs created at that
|
||||
time to use the number provided for a given timer, overwriting any previously
|
||||
configured values on other VCPUs. Userspace should configure the interrupt
|
||||
numbers on at least one VCPU after creating all VCPUs and before running any
|
||||
VCPUs.
|
||||
|
||||
3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL
|
||||
Architectures: ARM64
|
||||
|
||||
3.1 ATTRIBUTE: KVM_ARM_VCPU_PVTIME_IPA
|
||||
Parameters: 64-bit base address
|
||||
Returns: -ENXIO: Stolen time not implemented
|
||||
-EEXIST: Base address already set for this VCPU
|
||||
-EINVAL: Base address not 64 byte aligned
|
||||
|
||||
Specifies the base address of the stolen time structure for this VCPU. The
|
||||
base address must be 64 byte aligned and exist within a valid guest memory
|
||||
region. See Documentation/virt/kvm/arm/pvtime.txt for more information
|
||||
including the layout of the stolen time structure.
|
@ -1,8 +1,12 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===================
|
||||
VFIO virtual device
|
||||
===================
|
||||
|
||||
Device types supported:
|
||||
KVM_DEV_TYPE_VFIO
|
||||
|
||||
- KVM_DEV_TYPE_VFIO
|
||||
|
||||
Only one VFIO instance may be created per VM. The created device
|
||||
tracks VFIO groups in use by the VM and features of those groups
|
||||
@ -23,14 +27,15 @@ KVM_DEV_VFIO_GROUP attributes:
|
||||
for the VFIO group.
|
||||
KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: attaches a guest visible TCE table
|
||||
allocated by sPAPR KVM.
|
||||
kvm_device_attr.addr points to a struct:
|
||||
kvm_device_attr.addr points to a struct::
|
||||
|
||||
struct kvm_vfio_spapr_tce {
|
||||
__s32 groupfd;
|
||||
__s32 tablefd;
|
||||
};
|
||||
struct kvm_vfio_spapr_tce {
|
||||
__s32 groupfd;
|
||||
__s32 tablefd;
|
||||
};
|
||||
|
||||
where
|
||||
@groupfd is a file descriptor for a VFIO group;
|
||||
@tablefd is a file descriptor for a TCE table allocated via
|
||||
KVM_CREATE_SPAPR_TCE.
|
||||
where:
|
||||
|
||||
- @groupfd is a file descriptor for a VFIO group;
|
||||
- @tablefd is a file descriptor for a TCE table allocated via
|
||||
KVM_CREATE_SPAPR_TCE.
|
@ -1,5 +1,8 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
====================
|
||||
Generic vm interface
|
||||
====================================
|
||||
====================
|
||||
|
||||
The virtual machine "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
|
||||
KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same
|
||||
@ -10,30 +13,38 @@ The groups and attributes per virtual machine, if any, are architecture
|
||||
specific.
|
||||
|
||||
1. GROUP: KVM_S390_VM_MEM_CTRL
|
||||
Architectures: s390
|
||||
==============================
|
||||
|
||||
:Architectures: s390
|
||||
|
||||
1.1. ATTRIBUTE: KVM_S390_VM_MEM_ENABLE_CMMA
|
||||
Parameters: none
|
||||
Returns: -EBUSY if a vcpu is already defined, otherwise 0
|
||||
-------------------------------------------
|
||||
|
||||
:Parameters: none
|
||||
:Returns: -EBUSY if a vcpu is already defined, otherwise 0
|
||||
|
||||
Enables Collaborative Memory Management Assist (CMMA) for the virtual machine.
|
||||
|
||||
1.2. ATTRIBUTE: KVM_S390_VM_MEM_CLR_CMMA
|
||||
Parameters: none
|
||||
Returns: -EINVAL if CMMA was not enabled
|
||||
0 otherwise
|
||||
----------------------------------------
|
||||
|
||||
:Parameters: none
|
||||
:Returns: -EINVAL if CMMA was not enabled;
|
||||
0 otherwise
|
||||
|
||||
Clear the CMMA status for all guest pages, so any pages the guest marked
|
||||
as unused are again used any may not be reclaimed by the host.
|
||||
|
||||
1.3. ATTRIBUTE KVM_S390_VM_MEM_LIMIT_SIZE
|
||||
Parameters: in attr->addr the address for the new limit of guest memory
|
||||
Returns: -EFAULT if the given address is not accessible
|
||||
-EINVAL if the virtual machine is of type UCONTROL
|
||||
-E2BIG if the given guest memory is to big for that machine
|
||||
-EBUSY if a vcpu is already defined
|
||||
-ENOMEM if not enough memory is available for a new shadow guest mapping
|
||||
0 otherwise
|
||||
-----------------------------------------
|
||||
|
||||
:Parameters: in attr->addr the address for the new limit of guest memory
|
||||
:Returns: -EFAULT if the given address is not accessible;
|
||||
-EINVAL if the virtual machine is of type UCONTROL;
|
||||
-E2BIG if the given guest memory is to big for that machine;
|
||||
-EBUSY if a vcpu is already defined;
|
||||
-ENOMEM if not enough memory is available for a new shadow guest mapping;
|
||||
0 otherwise.
|
||||
|
||||
Allows userspace to query the actual limit and set a new limit for
|
||||
the maximum guest memory size. The limit will be rounded up to
|
||||
@ -42,78 +53,92 @@ the number of page table levels. In the case that there is no limit we will set
|
||||
the limit to KVM_S390_NO_MEM_LIMIT (U64_MAX).
|
||||
|
||||
2. GROUP: KVM_S390_VM_CPU_MODEL
|
||||
Architectures: s390
|
||||
===============================
|
||||
|
||||
:Architectures: s390
|
||||
|
||||
2.1. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE (r/o)
|
||||
---------------------------------------------
|
||||
|
||||
Allows user space to retrieve machine and kvm specific cpu related information:
|
||||
Allows user space to retrieve machine and kvm specific cpu related information::
|
||||
|
||||
struct kvm_s390_vm_cpu_machine {
|
||||
struct kvm_s390_vm_cpu_machine {
|
||||
__u64 cpuid; # CPUID of host
|
||||
__u32 ibc; # IBC level range offered by host
|
||||
__u8 pad[4];
|
||||
__u64 fac_mask[256]; # set of cpu facilities enabled by KVM
|
||||
__u64 fac_list[256]; # set of cpu facilities offered by host
|
||||
}
|
||||
}
|
||||
|
||||
Parameters: address of buffer to store the machine related cpu data
|
||||
of type struct kvm_s390_vm_cpu_machine*
|
||||
Returns: -EFAULT if the given address is not accessible from kernel space
|
||||
-ENOMEM if not enough memory is available to process the ioctl
|
||||
0 in case of success
|
||||
:Parameters: address of buffer to store the machine related cpu data
|
||||
of type struct kvm_s390_vm_cpu_machine*
|
||||
:Returns: -EFAULT if the given address is not accessible from kernel space;
|
||||
-ENOMEM if not enough memory is available to process the ioctl;
|
||||
0 in case of success.
|
||||
|
||||
2.2. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR (r/w)
|
||||
===============================================
|
||||
|
||||
Allows user space to retrieve or request to change cpu related information for a vcpu:
|
||||
Allows user space to retrieve or request to change cpu related information for a vcpu::
|
||||
|
||||
struct kvm_s390_vm_cpu_processor {
|
||||
struct kvm_s390_vm_cpu_processor {
|
||||
__u64 cpuid; # CPUID currently (to be) used by this vcpu
|
||||
__u16 ibc; # IBC level currently (to be) used by this vcpu
|
||||
__u8 pad[6];
|
||||
__u64 fac_list[256]; # set of cpu facilities currently (to be) used
|
||||
# by this vcpu
|
||||
}
|
||||
# by this vcpu
|
||||
}
|
||||
|
||||
KVM does not enforce or limit the cpu model data in any form. Take the information
|
||||
retrieved by means of KVM_S390_VM_CPU_MACHINE as hint for reasonable configuration
|
||||
setups. Instruction interceptions triggered by additionally set facility bits that
|
||||
are not handled by KVM need to by imlemented in the VM driver code.
|
||||
|
||||
Parameters: address of buffer to store/set the processor related cpu
|
||||
data of type struct kvm_s390_vm_cpu_processor*.
|
||||
Returns: -EBUSY in case 1 or more vcpus are already activated (only in write case)
|
||||
-EFAULT if the given address is not accessible from kernel space
|
||||
-ENOMEM if not enough memory is available to process the ioctl
|
||||
0 in case of success
|
||||
:Parameters: address of buffer to store/set the processor related cpu
|
||||
data of type struct kvm_s390_vm_cpu_processor*.
|
||||
:Returns: -EBUSY in case 1 or more vcpus are already activated (only in write case);
|
||||
-EFAULT if the given address is not accessible from kernel space;
|
||||
-ENOMEM if not enough memory is available to process the ioctl;
|
||||
0 in case of success.
|
||||
|
||||
.. _KVM_S390_VM_CPU_MACHINE_FEAT:
|
||||
|
||||
2.3. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_FEAT (r/o)
|
||||
--------------------------------------------------
|
||||
|
||||
Allows user space to retrieve available cpu features. A feature is available if
|
||||
provided by the hardware and supported by kvm. In theory, cpu features could
|
||||
even be completely emulated by kvm.
|
||||
|
||||
struct kvm_s390_vm_cpu_feat {
|
||||
__u64 feat[16]; # Bitmap (1 = feature available), MSB 0 bit numbering
|
||||
};
|
||||
::
|
||||
|
||||
Parameters: address of a buffer to load the feature list from.
|
||||
Returns: -EFAULT if the given address is not accessible from kernel space.
|
||||
0 in case of success.
|
||||
struct kvm_s390_vm_cpu_feat {
|
||||
__u64 feat[16]; # Bitmap (1 = feature available), MSB 0 bit numbering
|
||||
};
|
||||
|
||||
:Parameters: address of a buffer to load the feature list from.
|
||||
:Returns: -EFAULT if the given address is not accessible from kernel space;
|
||||
0 in case of success.
|
||||
|
||||
2.4. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_FEAT (r/w)
|
||||
----------------------------------------------------
|
||||
|
||||
Allows user space to retrieve or change enabled cpu features for all VCPUs of a
|
||||
VM. Features that are not available cannot be enabled.
|
||||
|
||||
See 2.3. for a description of the parameter struct.
|
||||
See :ref:`KVM_S390_VM_CPU_MACHINE_FEAT` for
|
||||
a description of the parameter struct.
|
||||
|
||||
Parameters: address of a buffer to store/load the feature list from.
|
||||
Returns: -EFAULT if the given address is not accessible from kernel space.
|
||||
-EINVAL if a cpu feature that is not available is to be enabled.
|
||||
-EBUSY if at least one VCPU has already been defined.
|
||||
:Parameters: address of a buffer to store/load the feature list from.
|
||||
:Returns: -EFAULT if the given address is not accessible from kernel space;
|
||||
-EINVAL if a cpu feature that is not available is to be enabled;
|
||||
-EBUSY if at least one VCPU has already been defined;
|
||||
0 in case of success.
|
||||
|
||||
.. _KVM_S390_VM_CPU_MACHINE_SUBFUNC:
|
||||
|
||||
2.5. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_SUBFUNC (r/o)
|
||||
-----------------------------------------------------
|
||||
|
||||
Allows user space to retrieve available cpu subfunctions without any filtering
|
||||
done by a set IBC. These subfunctions are indicated to the guest VCPU via
|
||||
@ -126,7 +151,9 @@ contained in the returned struct. If the affected instruction
|
||||
indicates subfunctions via a "test bit" mechanism, the subfunction codes are
|
||||
contained in the returned struct in MSB 0 bit numbering.
|
||||
|
||||
struct kvm_s390_vm_cpu_subfunc {
|
||||
::
|
||||
|
||||
struct kvm_s390_vm_cpu_subfunc {
|
||||
u8 plo[32]; # always valid (ESA/390 feature)
|
||||
u8 ptff[16]; # valid with TOD-clock steering
|
||||
u8 kmac[16]; # valid with Message-Security-Assist
|
||||
@ -143,13 +170,14 @@ struct kvm_s390_vm_cpu_subfunc {
|
||||
u8 kma[16]; # valid with Message-Security-Assist-Extension 8
|
||||
u8 kdsa[16]; # valid with Message-Security-Assist-Extension 9
|
||||
u8 reserved[1792]; # reserved for future instructions
|
||||
};
|
||||
};
|
||||
|
||||
Parameters: address of a buffer to load the subfunction blocks from.
|
||||
Returns: -EFAULT if the given address is not accessible from kernel space.
|
||||
:Parameters: address of a buffer to load the subfunction blocks from.
|
||||
:Returns: -EFAULT if the given address is not accessible from kernel space;
|
||||
0 in case of success.
|
||||
|
||||
2.6. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_SUBFUNC (r/w)
|
||||
-------------------------------------------------------
|
||||
|
||||
Allows user space to retrieve or change cpu subfunctions to be indicated for
|
||||
all VCPUs of a VM. This attribute will only be available if kernel and
|
||||
@ -164,107 +192,125 @@ As long as no data has been written, a read will fail. The IBC will be used
|
||||
to determine available subfunctions in this case, this will guarantee backward
|
||||
compatibility.
|
||||
|
||||
See 2.5. for a description of the parameter struct.
|
||||
See :ref:`KVM_S390_VM_CPU_MACHINE_SUBFUNC` for a
|
||||
description of the parameter struct.
|
||||
|
||||
Parameters: address of a buffer to store/load the subfunction blocks from.
|
||||
Returns: -EFAULT if the given address is not accessible from kernel space.
|
||||
-EINVAL when reading, if there was no write yet.
|
||||
-EBUSY if at least one VCPU has already been defined.
|
||||
:Parameters: address of a buffer to store/load the subfunction blocks from.
|
||||
:Returns: -EFAULT if the given address is not accessible from kernel space;
|
||||
-EINVAL when reading, if there was no write yet;
|
||||
-EBUSY if at least one VCPU has already been defined;
|
||||
0 in case of success.
|
||||
|
||||
3. GROUP: KVM_S390_VM_TOD
|
||||
Architectures: s390
|
||||
=========================
|
||||
|
||||
:Architectures: s390
|
||||
|
||||
3.1. ATTRIBUTE: KVM_S390_VM_TOD_HIGH
|
||||
------------------------------------
|
||||
|
||||
Allows user space to set/get the TOD clock extension (u8) (superseded by
|
||||
KVM_S390_VM_TOD_EXT).
|
||||
|
||||
Parameters: address of a buffer in user space to store the data (u8) to
|
||||
Returns: -EFAULT if the given address is not accessible from kernel space
|
||||
:Parameters: address of a buffer in user space to store the data (u8) to
|
||||
:Returns: -EFAULT if the given address is not accessible from kernel space;
|
||||
-EINVAL if setting the TOD clock extension to != 0 is not supported
|
||||
|
||||
3.2. ATTRIBUTE: KVM_S390_VM_TOD_LOW
|
||||
-----------------------------------
|
||||
|
||||
Allows user space to set/get bits 0-63 of the TOD clock register as defined in
|
||||
the POP (u64).
|
||||
|
||||
Parameters: address of a buffer in user space to store the data (u64) to
|
||||
Returns: -EFAULT if the given address is not accessible from kernel space
|
||||
:Parameters: address of a buffer in user space to store the data (u64) to
|
||||
:Returns: -EFAULT if the given address is not accessible from kernel space
|
||||
|
||||
3.3. ATTRIBUTE: KVM_S390_VM_TOD_EXT
|
||||
-----------------------------------
|
||||
|
||||
Allows user space to set/get bits 0-63 of the TOD clock register as defined in
|
||||
the POP (u64). If the guest CPU model supports the TOD clock extension (u8), it
|
||||
also allows user space to get/set it. If the guest CPU model does not support
|
||||
it, it is stored as 0 and not allowed to be set to a value != 0.
|
||||
|
||||
Parameters: address of a buffer in user space to store the data
|
||||
(kvm_s390_vm_tod_clock) to
|
||||
Returns: -EFAULT if the given address is not accessible from kernel space
|
||||
:Parameters: address of a buffer in user space to store the data
|
||||
(kvm_s390_vm_tod_clock) to
|
||||
:Returns: -EFAULT if the given address is not accessible from kernel space;
|
||||
-EINVAL if setting the TOD clock extension to != 0 is not supported
|
||||
|
||||
4. GROUP: KVM_S390_VM_CRYPTO
|
||||
Architectures: s390
|
||||
============================
|
||||
|
||||
:Architectures: s390
|
||||
|
||||
4.1. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_AES_KW (w/o)
|
||||
------------------------------------------------------
|
||||
|
||||
Allows user space to enable aes key wrapping, including generating a new
|
||||
wrapping key.
|
||||
|
||||
Parameters: none
|
||||
Returns: 0
|
||||
:Parameters: none
|
||||
:Returns: 0
|
||||
|
||||
4.2. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_DEA_KW (w/o)
|
||||
------------------------------------------------------
|
||||
|
||||
Allows user space to enable dea key wrapping, including generating a new
|
||||
wrapping key.
|
||||
|
||||
Parameters: none
|
||||
Returns: 0
|
||||
:Parameters: none
|
||||
:Returns: 0
|
||||
|
||||
4.3. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_AES_KW (w/o)
|
||||
-------------------------------------------------------
|
||||
|
||||
Allows user space to disable aes key wrapping, clearing the wrapping key.
|
||||
|
||||
Parameters: none
|
||||
Returns: 0
|
||||
:Parameters: none
|
||||
:Returns: 0
|
||||
|
||||
4.4. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_DEA_KW (w/o)
|
||||
-------------------------------------------------------
|
||||
|
||||
Allows user space to disable dea key wrapping, clearing the wrapping key.
|
||||
|
||||
Parameters: none
|
||||
Returns: 0
|
||||
:Parameters: none
|
||||
:Returns: 0
|
||||
|
||||
5. GROUP: KVM_S390_VM_MIGRATION
|
||||
Architectures: s390
|
||||
===============================
|
||||
|
||||
:Architectures: s390
|
||||
|
||||
5.1. ATTRIBUTE: KVM_S390_VM_MIGRATION_STOP (w/o)
|
||||
------------------------------------------------
|
||||
|
||||
Allows userspace to stop migration mode, needed for PGSTE migration.
|
||||
Setting this attribute when migration mode is not active will have no
|
||||
effects.
|
||||
|
||||
Parameters: none
|
||||
Returns: 0
|
||||
:Parameters: none
|
||||
:Returns: 0
|
||||
|
||||
5.2. ATTRIBUTE: KVM_S390_VM_MIGRATION_START (w/o)
|
||||
-------------------------------------------------
|
||||
|
||||
Allows userspace to start migration mode, needed for PGSTE migration.
|
||||
Setting this attribute when migration mode is already active will have
|
||||
no effects.
|
||||
|
||||
Parameters: none
|
||||
Returns: -ENOMEM if there is not enough free memory to start migration mode
|
||||
-EINVAL if the state of the VM is invalid (e.g. no memory defined)
|
||||
:Parameters: none
|
||||
:Returns: -ENOMEM if there is not enough free memory to start migration mode;
|
||||
-EINVAL if the state of the VM is invalid (e.g. no memory defined);
|
||||
0 in case of success.
|
||||
|
||||
5.3. ATTRIBUTE: KVM_S390_VM_MIGRATION_STATUS (r/o)
|
||||
--------------------------------------------------
|
||||
|
||||
Allows userspace to query the status of migration mode.
|
||||
|
||||
Parameters: address of a buffer in user space to store the data (u64) to;
|
||||
the data itself is either 0 if migration mode is disabled or 1
|
||||
if it is enabled
|
||||
Returns: -EFAULT if the given address is not accessible from kernel space
|
||||
:Parameters: address of a buffer in user space to store the data (u64) to;
|
||||
the data itself is either 0 if migration mode is disabled or 1
|
||||
if it is enabled
|
||||
:Returns: -EFAULT if the given address is not accessible from kernel space;
|
||||
0 in case of success.
|
@ -1,20 +1,31 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=========================
|
||||
XICS interrupt controller
|
||||
=========================
|
||||
|
||||
Device type supported: KVM_DEV_TYPE_XICS
|
||||
|
||||
Groups:
|
||||
1. KVM_DEV_XICS_GRP_SOURCES
|
||||
Attributes: One per interrupt source, indexed by the source number.
|
||||
Attributes:
|
||||
|
||||
One per interrupt source, indexed by the source number.
|
||||
2. KVM_DEV_XICS_GRP_CTRL
|
||||
Attributes:
|
||||
2.1 KVM_DEV_XICS_NR_SERVERS (write only)
|
||||
Attributes:
|
||||
|
||||
2.1 KVM_DEV_XICS_NR_SERVERS (write only)
|
||||
|
||||
The kvm_device_attr.addr points to a __u32 value which is the number of
|
||||
interrupt server numbers (ie, highest possible vcpu id plus one).
|
||||
|
||||
Errors:
|
||||
-EINVAL: Value greater than KVM_MAX_VCPU_ID.
|
||||
-EFAULT: Invalid user pointer for attr->addr.
|
||||
-EBUSY: A vcpu is already connected to the device.
|
||||
|
||||
======= ==========================================
|
||||
-EINVAL Value greater than KVM_MAX_VCPU_ID.
|
||||
-EFAULT Invalid user pointer for attr->addr.
|
||||
-EBUSY A vcpu is already connected to the device.
|
||||
======= ==========================================
|
||||
|
||||
This device emulates the XICS (eXternal Interrupt Controller
|
||||
Specification) defined in PAPR. The XICS has a set of interrupt
|
||||
@ -53,24 +64,29 @@ the interrupt source number. The 64 bit state word has the following
|
||||
bitfields, starting from the least-significant end of the word:
|
||||
|
||||
* Destination (server number), 32 bits
|
||||
|
||||
This specifies where the interrupt should be sent, and is the
|
||||
interrupt server number specified for the destination vcpu.
|
||||
|
||||
* Priority, 8 bits
|
||||
|
||||
This is the priority specified for this interrupt source, where 0 is
|
||||
the highest priority and 255 is the lowest. An interrupt with a
|
||||
priority of 255 will never be delivered.
|
||||
|
||||
* Level sensitive flag, 1 bit
|
||||
|
||||
This bit is 1 for a level-sensitive interrupt source, or 0 for
|
||||
edge-sensitive (or MSI).
|
||||
|
||||
* Masked flag, 1 bit
|
||||
|
||||
This bit is set to 1 if the interrupt is masked (cannot be delivered
|
||||
regardless of its priority), for example by the ibm,int-off RTAS
|
||||
call, or 0 if it is not masked.
|
||||
|
||||
* Pending flag, 1 bit
|
||||
|
||||
This bit is 1 if the source has a pending interrupt, otherwise 0.
|
||||
|
||||
Only one XICS instance may be created per VM.
|
@ -1,8 +1,11 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===========================================================
|
||||
POWER9 eXternal Interrupt Virtualization Engine (XIVE Gen1)
|
||||
==========================================================
|
||||
===========================================================
|
||||
|
||||
Device types supported:
|
||||
KVM_DEV_TYPE_XIVE POWER9 XIVE Interrupt Controller generation 1
|
||||
- KVM_DEV_TYPE_XIVE POWER9 XIVE Interrupt Controller generation 1
|
||||
|
||||
This device acts as a VM interrupt controller. It provides the KVM
|
||||
interface to configure the interrupt sources of a VM in the underlying
|
||||
@ -64,72 +67,100 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
|
||||
|
||||
* Groups:
|
||||
|
||||
1. KVM_DEV_XIVE_GRP_CTRL
|
||||
Provides global controls on the device
|
||||
1. KVM_DEV_XIVE_GRP_CTRL
|
||||
Provides global controls on the device
|
||||
|
||||
Attributes:
|
||||
1.1 KVM_DEV_XIVE_RESET (write only)
|
||||
Resets the interrupt controller configuration for sources and event
|
||||
queues. To be used by kexec and kdump.
|
||||
|
||||
Errors: none
|
||||
|
||||
1.2 KVM_DEV_XIVE_EQ_SYNC (write only)
|
||||
Sync all the sources and queues and mark the EQ pages dirty. This
|
||||
to make sure that a consistent memory state is captured when
|
||||
migrating the VM.
|
||||
|
||||
Errors: none
|
||||
|
||||
1.3 KVM_DEV_XIVE_NR_SERVERS (write only)
|
||||
The kvm_device_attr.addr points to a __u32 value which is the number of
|
||||
interrupt server numbers (ie, highest possible vcpu id plus one).
|
||||
Errors:
|
||||
-EINVAL: Value greater than KVM_MAX_VCPU_ID.
|
||||
-EFAULT: Invalid user pointer for attr->addr.
|
||||
-EBUSY: A vCPU is already connected to the device.
|
||||
|
||||
2. KVM_DEV_XIVE_GRP_SOURCE (write only)
|
||||
Initializes a new source in the XIVE device and mask it.
|
||||
Errors:
|
||||
|
||||
======= ==========================================
|
||||
-EINVAL Value greater than KVM_MAX_VCPU_ID.
|
||||
-EFAULT Invalid user pointer for attr->addr.
|
||||
-EBUSY A vCPU is already connected to the device.
|
||||
======= ==========================================
|
||||
|
||||
2. KVM_DEV_XIVE_GRP_SOURCE (write only)
|
||||
Initializes a new source in the XIVE device and mask it.
|
||||
|
||||
Attributes:
|
||||
Interrupt source number (64-bit)
|
||||
The kvm_device_attr.addr points to a __u64 value:
|
||||
bits: | 63 .... 2 | 1 | 0
|
||||
values: | unused | level | type
|
||||
|
||||
The kvm_device_attr.addr points to a __u64 value::
|
||||
|
||||
bits: | 63 .... 2 | 1 | 0
|
||||
values: | unused | level | type
|
||||
|
||||
- type: 0:MSI 1:LSI
|
||||
- level: assertion level in case of an LSI.
|
||||
Errors:
|
||||
-E2BIG: Interrupt source number is out of range
|
||||
-ENOMEM: Could not create a new source block
|
||||
-EFAULT: Invalid user pointer for attr->addr.
|
||||
-ENXIO: Could not allocate underlying HW interrupt
|
||||
|
||||
3. KVM_DEV_XIVE_GRP_SOURCE_CONFIG (write only)
|
||||
Configures source targeting
|
||||
Errors:
|
||||
|
||||
======= ==========================================
|
||||
-E2BIG Interrupt source number is out of range
|
||||
-ENOMEM Could not create a new source block
|
||||
-EFAULT Invalid user pointer for attr->addr.
|
||||
-ENXIO Could not allocate underlying HW interrupt
|
||||
======= ==========================================
|
||||
|
||||
3. KVM_DEV_XIVE_GRP_SOURCE_CONFIG (write only)
|
||||
Configures source targeting
|
||||
|
||||
Attributes:
|
||||
Interrupt source number (64-bit)
|
||||
The kvm_device_attr.addr points to a __u64 value:
|
||||
bits: | 63 .... 33 | 32 | 31 .. 3 | 2 .. 0
|
||||
values: | eisn | mask | server | priority
|
||||
|
||||
The kvm_device_attr.addr points to a __u64 value::
|
||||
|
||||
bits: | 63 .... 33 | 32 | 31 .. 3 | 2 .. 0
|
||||
values: | eisn | mask | server | priority
|
||||
|
||||
- priority: 0-7 interrupt priority level
|
||||
- server: CPU number chosen to handle the interrupt
|
||||
- mask: mask flag (unused)
|
||||
- eisn: Effective Interrupt Source Number
|
||||
Errors:
|
||||
-ENOENT: Unknown source number
|
||||
-EINVAL: Not initialized source number
|
||||
-EINVAL: Invalid priority
|
||||
-EINVAL: Invalid CPU number.
|
||||
-EFAULT: Invalid user pointer for attr->addr.
|
||||
-ENXIO: CPU event queues not configured or configuration of the
|
||||
underlying HW interrupt failed
|
||||
-EBUSY: No CPU available to serve interrupt
|
||||
|
||||
4. KVM_DEV_XIVE_GRP_EQ_CONFIG (read-write)
|
||||
Configures an event queue of a CPU
|
||||
Errors:
|
||||
|
||||
======= =======================================================
|
||||
-ENOENT Unknown source number
|
||||
-EINVAL Not initialized source number
|
||||
-EINVAL Invalid priority
|
||||
-EINVAL Invalid CPU number.
|
||||
-EFAULT Invalid user pointer for attr->addr.
|
||||
-ENXIO CPU event queues not configured or configuration of the
|
||||
underlying HW interrupt failed
|
||||
-EBUSY No CPU available to serve interrupt
|
||||
======= =======================================================
|
||||
|
||||
4. KVM_DEV_XIVE_GRP_EQ_CONFIG (read-write)
|
||||
Configures an event queue of a CPU
|
||||
|
||||
Attributes:
|
||||
EQ descriptor identifier (64-bit)
|
||||
The EQ descriptor identifier is a tuple (server, priority) :
|
||||
bits: | 63 .... 32 | 31 .. 3 | 2 .. 0
|
||||
values: | unused | server | priority
|
||||
The kvm_device_attr.addr points to :
|
||||
|
||||
The EQ descriptor identifier is a tuple (server, priority)::
|
||||
|
||||
bits: | 63 .... 32 | 31 .. 3 | 2 .. 0
|
||||
values: | unused | server | priority
|
||||
|
||||
The kvm_device_attr.addr points to::
|
||||
|
||||
struct kvm_ppc_xive_eq {
|
||||
__u32 flags;
|
||||
__u32 qshift;
|
||||
@ -138,8 +169,9 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
|
||||
__u32 qindex;
|
||||
__u8 pad[40];
|
||||
};
|
||||
|
||||
- flags: queue flags
|
||||
KVM_XIVE_EQ_ALWAYS_NOTIFY (required)
|
||||
KVM_XIVE_EQ_ALWAYS_NOTIFY (required)
|
||||
forces notification without using the coalescing mechanism
|
||||
provided by the XIVE END ESBs.
|
||||
- qshift: queue size (power of 2)
|
||||
@ -147,22 +179,31 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
|
||||
- qtoggle: current queue toggle bit
|
||||
- qindex: current queue index
|
||||
- pad: reserved for future use
|
||||
Errors:
|
||||
-ENOENT: Invalid CPU number
|
||||
-EINVAL: Invalid priority
|
||||
-EINVAL: Invalid flags
|
||||
-EINVAL: Invalid queue size
|
||||
-EINVAL: Invalid queue address
|
||||
-EFAULT: Invalid user pointer for attr->addr.
|
||||
-EIO: Configuration of the underlying HW failed
|
||||
|
||||
5. KVM_DEV_XIVE_GRP_SOURCE_SYNC (write only)
|
||||
Synchronize the source to flush event notifications
|
||||
Errors:
|
||||
|
||||
======= =========================================
|
||||
-ENOENT Invalid CPU number
|
||||
-EINVAL Invalid priority
|
||||
-EINVAL Invalid flags
|
||||
-EINVAL Invalid queue size
|
||||
-EINVAL Invalid queue address
|
||||
-EFAULT Invalid user pointer for attr->addr.
|
||||
-EIO Configuration of the underlying HW failed
|
||||
======= =========================================
|
||||
|
||||
5. KVM_DEV_XIVE_GRP_SOURCE_SYNC (write only)
|
||||
Synchronize the source to flush event notifications
|
||||
|
||||
Attributes:
|
||||
Interrupt source number (64-bit)
|
||||
|
||||
Errors:
|
||||
-ENOENT: Unknown source number
|
||||
-EINVAL: Not initialized source number
|
||||
|
||||
======= =============================
|
||||
-ENOENT Unknown source number
|
||||
-EINVAL Not initialized source number
|
||||
======= =============================
|
||||
|
||||
* VCPU state
|
||||
|
||||
@ -175,11 +216,12 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
|
||||
as it synthesizes the priorities of the pending interrupts. We
|
||||
capture a bit more to report debug information.
|
||||
|
||||
KVM_REG_PPC_VP_STATE (2 * 64bits)
|
||||
bits: | 63 .... 32 | 31 .... 0 |
|
||||
values: | TIMA word0 | TIMA word1 |
|
||||
bits: | 127 .......... 64 |
|
||||
values: | unused |
|
||||
KVM_REG_PPC_VP_STATE (2 * 64bits)::
|
||||
|
||||
bits: | 63 .... 32 | 31 .... 0 |
|
||||
values: | TIMA word0 | TIMA word1 |
|
||||
bits: | 127 .......... 64 |
|
||||
values: | unused |
|
||||
|
||||
* Migration:
|
||||
|
||||
@ -196,7 +238,7 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
|
||||
3. Capture the state of the source targeting, the EQs configuration
|
||||
and the state of thread interrupt context registers.
|
||||
|
||||
Restore is similar :
|
||||
Restore is similar:
|
||||
|
||||
1. Restore the EQ configuration. As targeting depends on it.
|
||||
2. Restore targeting
|
@ -1,3 +1,6 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===========================
|
||||
The KVM halt polling system
|
||||
===========================
|
||||
|
||||
@ -68,7 +71,8 @@ steady state polling interval but will only really do a good job for wakeups
|
||||
which come at an approximately constant rate, otherwise there will be constant
|
||||
adjustment of the polling interval.
|
||||
|
||||
[0] total block time: the time between when the halt polling function is
|
||||
[0] total block time:
|
||||
the time between when the halt polling function is
|
||||
invoked and a wakeup source received (irrespective of
|
||||
whether the scheduler is invoked within that function).
|
||||
|
||||
@ -81,31 +85,32 @@ shrunk. These variables are defined in include/linux/kvm_host.h and as module
|
||||
parameters in virt/kvm/kvm_main.c, or arch/powerpc/kvm/book3s_hv.c in the
|
||||
powerpc kvm-hv case.
|
||||
|
||||
Module Parameter | Description | Default Value
|
||||
--------------------------------------------------------------------------------
|
||||
halt_poll_ns | The global max polling | KVM_HALT_POLL_NS_DEFAULT
|
||||
| interval which defines |
|
||||
| the ceiling value of the |
|
||||
| polling interval for | (per arch value)
|
||||
| each vcpu. |
|
||||
--------------------------------------------------------------------------------
|
||||
halt_poll_ns_grow | The value by which the | 2
|
||||
| halt polling interval is |
|
||||
| multiplied in the |
|
||||
| grow_halt_poll_ns() |
|
||||
| function. |
|
||||
--------------------------------------------------------------------------------
|
||||
halt_poll_ns_grow_start | The initial value to grow | 10000
|
||||
| to from zero in the |
|
||||
| grow_halt_poll_ns() |
|
||||
| function. |
|
||||
--------------------------------------------------------------------------------
|
||||
halt_poll_ns_shrink | The value by which the | 0
|
||||
| halt polling interval is |
|
||||
| divided in the |
|
||||
| shrink_halt_poll_ns() |
|
||||
| function. |
|
||||
--------------------------------------------------------------------------------
|
||||
+-----------------------+---------------------------+-------------------------+
|
||||
|Module Parameter | Description | Default Value |
|
||||
+-----------------------+---------------------------+-------------------------+
|
||||
|halt_poll_ns | The global max polling | KVM_HALT_POLL_NS_DEFAULT|
|
||||
| | interval which defines | |
|
||||
| | the ceiling value of the | |
|
||||
| | polling interval for | (per arch value) |
|
||||
| | each vcpu. | |
|
||||
+-----------------------+---------------------------+-------------------------+
|
||||
|halt_poll_ns_grow | The value by which the | 2 |
|
||||
| | halt polling interval is | |
|
||||
| | multiplied in the | |
|
||||
| | grow_halt_poll_ns() | |
|
||||
| | function. | |
|
||||
+-----------------------+---------------------------+-------------------------+
|
||||
|halt_poll_ns_grow_start| The initial value to grow | 10000 |
|
||||
| | to from zero in the | |
|
||||
| | grow_halt_poll_ns() | |
|
||||
| | function. | |
|
||||
+-----------------------+---------------------------+-------------------------+
|
||||
|halt_poll_ns_shrink | The value by which the | 0 |
|
||||
| | halt polling interval is | |
|
||||
| | divided in the | |
|
||||
| | shrink_halt_poll_ns() | |
|
||||
| | function. | |
|
||||
+-----------------------+---------------------------+-------------------------+
|
||||
|
||||
These module parameters can be set from the debugfs files in:
|
||||
|
||||
@ -117,20 +122,19 @@ Note: that these module parameters are system wide values and are not able to
|
||||
Further Notes
|
||||
=============
|
||||
|
||||
- Care should be taken when setting the halt_poll_ns module parameter as a
|
||||
large value has the potential to drive the cpu usage to 100% on a machine which
|
||||
would be almost entirely idle otherwise. This is because even if a guest has
|
||||
wakeups during which very little work is done and which are quite far apart, if
|
||||
the period is shorter than the global max polling interval (halt_poll_ns) then
|
||||
the host will always poll for the entire block time and thus cpu utilisation
|
||||
will go to 100%.
|
||||
- Care should be taken when setting the halt_poll_ns module parameter as a large value
|
||||
has the potential to drive the cpu usage to 100% on a machine which would be almost
|
||||
entirely idle otherwise. This is because even if a guest has wakeups during which very
|
||||
little work is done and which are quite far apart, if the period is shorter than the
|
||||
global max polling interval (halt_poll_ns) then the host will always poll for the
|
||||
entire block time and thus cpu utilisation will go to 100%.
|
||||
|
||||
- Halt polling essentially presents a trade off between power usage and latency
|
||||
and the module parameters should be used to tune the affinity for this. Idle
|
||||
cpu time is essentially converted to host kernel time with the aim of decreasing
|
||||
latency when entering the guest.
|
||||
- Halt polling essentially presents a trade off between power usage and latency and
|
||||
the module parameters should be used to tune the affinity for this. Idle cpu time is
|
||||
essentially converted to host kernel time with the aim of decreasing latency when
|
||||
entering the guest.
|
||||
|
||||
- Halt polling will only be conducted by the host when no other tasks are
|
||||
runnable on that cpu, otherwise the polling will cease immediately and
|
||||
schedule will be invoked to allow that other task to run. Thus this doesn't
|
||||
allow a guest to denial of service the cpu.
|
||||
- Halt polling will only be conducted by the host when no other tasks are runnable on
|
||||
that cpu, otherwise the polling will cease immediately and schedule will be invoked to
|
||||
allow that other task to run. Thus this doesn't allow a guest to denial of service the
|
||||
cpu.
|
@ -1,5 +1,9 @@
|
||||
Linux KVM Hypercall:
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===================
|
||||
Linux KVM Hypercall
|
||||
===================
|
||||
|
||||
X86:
|
||||
KVM Hypercalls have a three-byte sequence of either the vmcall or the vmmcall
|
||||
instruction. The hypervisor can replace it with instructions that are
|
||||
@ -20,7 +24,7 @@ S390:
|
||||
For further information on the S390 diagnose call as supported by KVM,
|
||||
refer to Documentation/virt/kvm/s390-diag.txt.
|
||||
|
||||
PowerPC:
|
||||
PowerPC:
|
||||
It uses R3-R10 and hypercall number in R11. R4-R11 are used as output registers.
|
||||
Return value is placed in R3.
|
||||
|
||||
@ -34,7 +38,8 @@ MIPS:
|
||||
the return value is placed in $2 (v0).
|
||||
|
||||
KVM Hypercalls Documentation
|
||||
===========================
|
||||
============================
|
||||
|
||||
The template for each hypercall is:
|
||||
1. Hypercall name.
|
||||
2. Architecture(s)
|
||||
@ -43,56 +48,64 @@ The template for each hypercall is:
|
||||
|
||||
1. KVM_HC_VAPIC_POLL_IRQ
|
||||
------------------------
|
||||
Architecture: x86
|
||||
Status: active
|
||||
Purpose: Trigger guest exit so that the host can check for pending
|
||||
interrupts on reentry.
|
||||
|
||||
:Architecture: x86
|
||||
:Status: active
|
||||
:Purpose: Trigger guest exit so that the host can check for pending
|
||||
interrupts on reentry.
|
||||
|
||||
2. KVM_HC_MMU_OP
|
||||
------------------------
|
||||
Architecture: x86
|
||||
Status: deprecated.
|
||||
Purpose: Support MMU operations such as writing to PTE,
|
||||
flushing TLB, release PT.
|
||||
----------------
|
||||
|
||||
:Architecture: x86
|
||||
:Status: deprecated.
|
||||
:Purpose: Support MMU operations such as writing to PTE,
|
||||
flushing TLB, release PT.
|
||||
|
||||
3. KVM_HC_FEATURES
|
||||
------------------------
|
||||
Architecture: PPC
|
||||
Status: active
|
||||
Purpose: Expose hypercall availability to the guest. On x86 platforms, cpuid
|
||||
used to enumerate which hypercalls are available. On PPC, either device tree
|
||||
based lookup ( which is also what EPAPR dictates) OR KVM specific enumeration
|
||||
mechanism (which is this hypercall) can be used.
|
||||
------------------
|
||||
|
||||
:Architecture: PPC
|
||||
:Status: active
|
||||
:Purpose: Expose hypercall availability to the guest. On x86 platforms, cpuid
|
||||
used to enumerate which hypercalls are available. On PPC, either
|
||||
device tree based lookup ( which is also what EPAPR dictates)
|
||||
OR KVM specific enumeration mechanism (which is this hypercall)
|
||||
can be used.
|
||||
|
||||
4. KVM_HC_PPC_MAP_MAGIC_PAGE
|
||||
------------------------
|
||||
Architecture: PPC
|
||||
Status: active
|
||||
Purpose: To enable communication between the hypervisor and guest there is a
|
||||
shared page that contains parts of supervisor visible register state.
|
||||
The guest can map this shared page to access its supervisor register through
|
||||
memory using this hypercall.
|
||||
----------------------------
|
||||
|
||||
:Architecture: PPC
|
||||
:Status: active
|
||||
:Purpose: To enable communication between the hypervisor and guest there is a
|
||||
shared page that contains parts of supervisor visible register state.
|
||||
The guest can map this shared page to access its supervisor register
|
||||
through memory using this hypercall.
|
||||
|
||||
5. KVM_HC_KICK_CPU
|
||||
------------------------
|
||||
Architecture: x86
|
||||
Status: active
|
||||
Purpose: Hypercall used to wakeup a vcpu from HLT state
|
||||
Usage example : A vcpu of a paravirtualized guest that is busywaiting in guest
|
||||
kernel mode for an event to occur (ex: a spinlock to become available) can
|
||||
execute HLT instruction once it has busy-waited for more than a threshold
|
||||
time-interval. Execution of HLT instruction would cause the hypervisor to put
|
||||
the vcpu to sleep until occurrence of an appropriate event. Another vcpu of the
|
||||
same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall,
|
||||
specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0)
|
||||
is used in the hypercall for future use.
|
||||
------------------
|
||||
|
||||
:Architecture: x86
|
||||
:Status: active
|
||||
:Purpose: Hypercall used to wakeup a vcpu from HLT state
|
||||
:Usage example:
|
||||
A vcpu of a paravirtualized guest that is busywaiting in guest
|
||||
kernel mode for an event to occur (ex: a spinlock to become available) can
|
||||
execute HLT instruction once it has busy-waited for more than a threshold
|
||||
time-interval. Execution of HLT instruction would cause the hypervisor to put
|
||||
the vcpu to sleep until occurrence of an appropriate event. Another vcpu of the
|
||||
same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall,
|
||||
specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0)
|
||||
is used in the hypercall for future use.
|
||||
|
||||
|
||||
6. KVM_HC_CLOCK_PAIRING
|
||||
------------------------
|
||||
Architecture: x86
|
||||
Status: active
|
||||
Purpose: Hypercall used to synchronize host and guest clocks.
|
||||
-----------------------
|
||||
:Architecture: x86
|
||||
:Status: active
|
||||
:Purpose: Hypercall used to synchronize host and guest clocks.
|
||||
|
||||
Usage:
|
||||
|
||||
a0: guest physical address where host copies
|
||||
@ -101,6 +114,8 @@ a0: guest physical address where host copies
|
||||
a1: clock_type, ATM only KVM_CLOCK_PAIRING_WALLCLOCK (0)
|
||||
is supported (corresponding to the host's CLOCK_REALTIME clock).
|
||||
|
||||
::
|
||||
|
||||
struct kvm_clock_pairing {
|
||||
__s64 sec;
|
||||
__s64 nsec;
|
||||
@ -123,15 +138,16 @@ Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
|
||||
or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
|
||||
|
||||
6. KVM_HC_SEND_IPI
|
||||
------------------------
|
||||
Architecture: x86
|
||||
Status: active
|
||||
Purpose: Send IPIs to multiple vCPUs.
|
||||
------------------
|
||||
|
||||
a0: lower part of the bitmap of destination APIC IDs
|
||||
a1: higher part of the bitmap of destination APIC IDs
|
||||
a2: the lowest APIC ID in bitmap
|
||||
a3: APIC ICR
|
||||
:Architecture: x86
|
||||
:Status: active
|
||||
:Purpose: Send IPIs to multiple vCPUs.
|
||||
|
||||
- a0: lower part of the bitmap of destination APIC IDs
|
||||
- a1: higher part of the bitmap of destination APIC IDs
|
||||
- a2: the lowest APIC ID in bitmap
|
||||
- a3: APIC ICR
|
||||
|
||||
The hypercall lets a guest send multicast IPIs, with at most 128
|
||||
128 destinations per hypercall in 64-bit mode and 64 vCPUs per
|
||||
@ -143,12 +159,13 @@ corresponds to the APIC ID a2+1, and so on.
|
||||
Returns the number of CPUs to which the IPIs were delivered successfully.
|
||||
|
||||
7. KVM_HC_SCHED_YIELD
|
||||
------------------------
|
||||
Architecture: x86
|
||||
Status: active
|
||||
Purpose: Hypercall used to yield if the IPI target vCPU is preempted
|
||||
---------------------
|
||||
|
||||
:Architecture: x86
|
||||
:Status: active
|
||||
:Purpose: Hypercall used to yield if the IPI target vCPU is preempted
|
||||
|
||||
a0: destination APIC ID
|
||||
|
||||
Usage example: When sending a call-function IPI-many to vCPUs, yield if
|
||||
any of the IPI target vCPUs was preempted.
|
||||
:Usage example: When sending a call-function IPI-many to vCPUs, yield if
|
||||
any of the IPI target vCPUs was preempted.
|
@ -7,6 +7,22 @@ KVM
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
api
|
||||
amd-memory-encryption
|
||||
cpuid
|
||||
halt-polling
|
||||
hypercalls
|
||||
locking
|
||||
mmu
|
||||
msr
|
||||
nested-vmx
|
||||
ppc-pv
|
||||
s390-diag
|
||||
timekeeping
|
||||
vcpu-requests
|
||||
|
||||
review-checklist
|
||||
|
||||
arm/index
|
||||
|
||||
devices/index
|
||||
|
243
Documentation/virt/kvm/locking.rst
Normal file
243
Documentation/virt/kvm/locking.rst
Normal file
@ -0,0 +1,243 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=================
|
||||
KVM Lock Overview
|
||||
=================
|
||||
|
||||
1. Acquisition Orders
|
||||
---------------------
|
||||
|
||||
The acquisition orders for mutexes are as follows:
|
||||
|
||||
- kvm->lock is taken outside vcpu->mutex
|
||||
|
||||
- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
|
||||
|
||||
- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
|
||||
them together is quite rare.
|
||||
|
||||
On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
|
||||
|
||||
Everything else is a leaf: no other lock is taken inside the critical
|
||||
sections.
|
||||
|
||||
2. Exception
|
||||
------------
|
||||
|
||||
Fast page fault:
|
||||
|
||||
Fast page fault is the fast path which fixes the guest page fault out of
|
||||
the mmu-lock on x86. Currently, the page fault can be fast in one of the
|
||||
following two cases:
|
||||
|
||||
1. Access Tracking: The SPTE is not present, but it is marked for access
|
||||
tracking i.e. the SPTE_SPECIAL_MASK is set. That means we need to
|
||||
restore the saved R/X bits. This is described in more detail later below.
|
||||
|
||||
2. Write-Protection: The SPTE is present and the fault is
|
||||
caused by write-protect. That means we just need to change the W bit of
|
||||
the spte.
|
||||
|
||||
What we use to avoid all the race is the SPTE_HOST_WRITEABLE bit and
|
||||
SPTE_MMU_WRITEABLE bit on the spte:
|
||||
|
||||
- SPTE_HOST_WRITEABLE means the gfn is writable on host.
|
||||
- SPTE_MMU_WRITEABLE means the gfn is writable on mmu. The bit is set when
|
||||
the gfn is writable on guest mmu and it is not write-protected by shadow
|
||||
page write-protection.
|
||||
|
||||
On fast page fault path, we will use cmpxchg to atomically set the spte W
|
||||
bit if spte.SPTE_HOST_WRITEABLE = 1 and spte.SPTE_WRITE_PROTECT = 1, or
|
||||
restore the saved R/X bits if VMX_EPT_TRACK_ACCESS mask is set, or both. This
|
||||
is safe because whenever changing these bits can be detected by cmpxchg.
|
||||
|
||||
But we need carefully check these cases:
|
||||
|
||||
1) The mapping from gfn to pfn
|
||||
|
||||
The mapping from gfn to pfn may be changed since we can only ensure the pfn
|
||||
is not changed during cmpxchg. This is a ABA problem, for example, below case
|
||||
will happen:
|
||||
|
||||
+------------------------------------------------------------------------+
|
||||
| At the beginning:: |
|
||||
| |
|
||||
| gpte = gfn1 |
|
||||
| gfn1 is mapped to pfn1 on host |
|
||||
| spte is the shadow page table entry corresponding with gpte and |
|
||||
| spte = pfn1 |
|
||||
+------------------------------------------------------------------------+
|
||||
| On fast page fault path: |
|
||||
+------------------------------------+-----------------------------------+
|
||||
| CPU 0: | CPU 1: |
|
||||
+------------------------------------+-----------------------------------+
|
||||
| :: | |
|
||||
| | |
|
||||
| old_spte = *spte; | |
|
||||
+------------------------------------+-----------------------------------+
|
||||
| | pfn1 is swapped out:: |
|
||||
| | |
|
||||
| | spte = 0; |
|
||||
| | |
|
||||
| | pfn1 is re-alloced for gfn2. |
|
||||
| | |
|
||||
| | gpte is changed to point to |
|
||||
| | gfn2 by the guest:: |
|
||||
| | |
|
||||
| | spte = pfn1; |
|
||||
+------------------------------------+-----------------------------------+
|
||||
| :: |
|
||||
| |
|
||||
| if (cmpxchg(spte, old_spte, old_spte+W) |
|
||||
| mark_page_dirty(vcpu->kvm, gfn1) |
|
||||
| OOPS!!! |
|
||||
+------------------------------------------------------------------------+
|
||||
|
||||
We dirty-log for gfn1, that means gfn2 is lost in dirty-bitmap.
|
||||
|
||||
For direct sp, we can easily avoid it since the spte of direct sp is fixed
|
||||
to gfn. For indirect sp, before we do cmpxchg, we call gfn_to_pfn_atomic()
|
||||
to pin gfn to pfn, because after gfn_to_pfn_atomic():
|
||||
|
||||
- We have held the refcount of pfn that means the pfn can not be freed and
|
||||
be reused for another gfn.
|
||||
- The pfn is writable that means it can not be shared between different gfns
|
||||
by KSM.
|
||||
|
||||
Then, we can ensure the dirty bitmaps is correctly set for a gfn.
|
||||
|
||||
Currently, to simplify the whole things, we disable fast page fault for
|
||||
indirect shadow page.
|
||||
|
||||
2) Dirty bit tracking
|
||||
|
||||
In the origin code, the spte can be fast updated (non-atomically) if the
|
||||
spte is read-only and the Accessed bit has already been set since the
|
||||
Accessed bit and Dirty bit can not be lost.
|
||||
|
||||
But it is not true after fast page fault since the spte can be marked
|
||||
writable between reading spte and updating spte. Like below case:
|
||||
|
||||
+------------------------------------------------------------------------+
|
||||
| At the beginning:: |
|
||||
| |
|
||||
| spte.W = 0 |
|
||||
| spte.Accessed = 1 |
|
||||
+------------------------------------+-----------------------------------+
|
||||
| CPU 0: | CPU 1: |
|
||||
+------------------------------------+-----------------------------------+
|
||||
| In mmu_spte_clear_track_bits():: | |
|
||||
| | |
|
||||
| old_spte = *spte; | |
|
||||
| | |
|
||||
| | |
|
||||
| /* 'if' condition is satisfied. */| |
|
||||
| if (old_spte.Accessed == 1 && | |
|
||||
| old_spte.W == 0) | |
|
||||
| spte = 0ull; | |
|
||||
+------------------------------------+-----------------------------------+
|
||||
| | on fast page fault path:: |
|
||||
| | |
|
||||
| | spte.W = 1 |
|
||||
| | |
|
||||
| | memory write on the spte:: |
|
||||
| | |
|
||||
| | spte.Dirty = 1 |
|
||||
+------------------------------------+-----------------------------------+
|
||||
| :: | |
|
||||
| | |
|
||||
| else | |
|
||||
| old_spte = xchg(spte, 0ull) | |
|
||||
| if (old_spte.Accessed == 1) | |
|
||||
| kvm_set_pfn_accessed(spte.pfn);| |
|
||||
| if (old_spte.Dirty == 1) | |
|
||||
| kvm_set_pfn_dirty(spte.pfn); | |
|
||||
| OOPS!!! | |
|
||||
+------------------------------------+-----------------------------------+
|
||||
|
||||
The Dirty bit is lost in this case.
|
||||
|
||||
In order to avoid this kind of issue, we always treat the spte as "volatile"
|
||||
if it can be updated out of mmu-lock, see spte_has_volatile_bits(), it means,
|
||||
the spte is always atomically updated in this case.
|
||||
|
||||
3) flush tlbs due to spte updated
|
||||
|
||||
If the spte is updated from writable to readonly, we should flush all TLBs,
|
||||
otherwise rmap_write_protect will find a read-only spte, even though the
|
||||
writable spte might be cached on a CPU's TLB.
|
||||
|
||||
As mentioned before, the spte can be updated to writable out of mmu-lock on
|
||||
fast page fault path, in order to easily audit the path, we see if TLBs need
|
||||
be flushed caused by this reason in mmu_spte_update() since this is a common
|
||||
function to update spte (present -> present).
|
||||
|
||||
Since the spte is "volatile" if it can be updated out of mmu-lock, we always
|
||||
atomically update the spte, the race caused by fast page fault can be avoided,
|
||||
See the comments in spte_has_volatile_bits() and mmu_spte_update().
|
||||
|
||||
Lockless Access Tracking:
|
||||
|
||||
This is used for Intel CPUs that are using EPT but do not support the EPT A/D
|
||||
bits. In this case, when the KVM MMU notifier is called to track accesses to a
|
||||
page (via kvm_mmu_notifier_clear_flush_young), it marks the PTE as not-present
|
||||
by clearing the RWX bits in the PTE and storing the original R & X bits in
|
||||
some unused/ignored bits. In addition, the SPTE_SPECIAL_MASK is also set on the
|
||||
PTE (using the ignored bit 62). When the VM tries to access the page later on,
|
||||
a fault is generated and the fast page fault mechanism described above is used
|
||||
to atomically restore the PTE to a Present state. The W bit is not saved when
|
||||
the PTE is marked for access tracking and during restoration to the Present
|
||||
state, the W bit is set depending on whether or not it was a write access. If
|
||||
it wasn't, then the W bit will remain clear until a write access happens, at
|
||||
which time it will be set using the Dirty tracking mechanism described above.
|
||||
|
||||
3. Reference
|
||||
------------
|
||||
|
||||
:Name: kvm_lock
|
||||
:Type: mutex
|
||||
:Arch: any
|
||||
:Protects: - vm_list
|
||||
|
||||
:Name: kvm_count_lock
|
||||
:Type: raw_spinlock_t
|
||||
:Arch: any
|
||||
:Protects: - hardware virtualization enable/disable
|
||||
:Comment: 'raw' because hardware enabling/disabling must be atomic /wrt
|
||||
migration.
|
||||
|
||||
:Name: kvm_arch::tsc_write_lock
|
||||
:Type: raw_spinlock
|
||||
:Arch: x86
|
||||
:Protects: - kvm_arch::{last_tsc_write,last_tsc_nsec,last_tsc_offset}
|
||||
- tsc offset in vmcb
|
||||
:Comment: 'raw' because updating the tsc offsets must not be preempted.
|
||||
|
||||
:Name: kvm->mmu_lock
|
||||
:Type: spinlock_t
|
||||
:Arch: any
|
||||
:Protects: -shadow page/shadow tlb entry
|
||||
:Comment: it is a spinlock since it is used in mmu notifier.
|
||||
|
||||
:Name: kvm->srcu
|
||||
:Type: srcu lock
|
||||
:Arch: any
|
||||
:Protects: - kvm->memslots
|
||||
- kvm->buses
|
||||
:Comment: The srcu read lock must be held while accessing memslots (e.g.
|
||||
when using gfn_to_* functions) and while accessing in-kernel
|
||||
MMIO/PIO address->device structure mapping (kvm->buses).
|
||||
The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
|
||||
if it is needed by multiple functions.
|
||||
|
||||
:Name: blocked_vcpu_on_cpu_lock
|
||||
:Type: spinlock_t
|
||||
:Arch: x86
|
||||
:Protects: blocked_vcpu_on_cpu
|
||||
:Comment: This is a per-CPU lock and it is used for VT-d posted-interrupts.
|
||||
When VT-d posted-interrupts is supported and the VM has assigned
|
||||
devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
|
||||
protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
|
||||
wakeup notification event since external interrupts from the
|
||||
assigned devices happens, we will find the vCPU on the list to
|
||||
wakeup.
|
@ -1,215 +0,0 @@
|
||||
KVM Lock Overview
|
||||
=================
|
||||
|
||||
1. Acquisition Orders
|
||||
---------------------
|
||||
|
||||
The acquisition orders for mutexes are as follows:
|
||||
|
||||
- kvm->lock is taken outside vcpu->mutex
|
||||
|
||||
- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
|
||||
|
||||
- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
|
||||
them together is quite rare.
|
||||
|
||||
On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
|
||||
|
||||
Everything else is a leaf: no other lock is taken inside the critical
|
||||
sections.
|
||||
|
||||
2: Exception
|
||||
------------
|
||||
|
||||
Fast page fault:
|
||||
|
||||
Fast page fault is the fast path which fixes the guest page fault out of
|
||||
the mmu-lock on x86. Currently, the page fault can be fast in one of the
|
||||
following two cases:
|
||||
|
||||
1. Access Tracking: The SPTE is not present, but it is marked for access
|
||||
tracking i.e. the SPTE_SPECIAL_MASK is set. That means we need to
|
||||
restore the saved R/X bits. This is described in more detail later below.
|
||||
|
||||
2. Write-Protection: The SPTE is present and the fault is
|
||||
caused by write-protect. That means we just need to change the W bit of the
|
||||
spte.
|
||||
|
||||
What we use to avoid all the race is the SPTE_HOST_WRITEABLE bit and
|
||||
SPTE_MMU_WRITEABLE bit on the spte:
|
||||
- SPTE_HOST_WRITEABLE means the gfn is writable on host.
|
||||
- SPTE_MMU_WRITEABLE means the gfn is writable on mmu. The bit is set when
|
||||
the gfn is writable on guest mmu and it is not write-protected by shadow
|
||||
page write-protection.
|
||||
|
||||
On fast page fault path, we will use cmpxchg to atomically set the spte W
|
||||
bit if spte.SPTE_HOST_WRITEABLE = 1 and spte.SPTE_WRITE_PROTECT = 1, or
|
||||
restore the saved R/X bits if VMX_EPT_TRACK_ACCESS mask is set, or both. This
|
||||
is safe because whenever changing these bits can be detected by cmpxchg.
|
||||
|
||||
But we need carefully check these cases:
|
||||
1): The mapping from gfn to pfn
|
||||
The mapping from gfn to pfn may be changed since we can only ensure the pfn
|
||||
is not changed during cmpxchg. This is a ABA problem, for example, below case
|
||||
will happen:
|
||||
|
||||
At the beginning:
|
||||
gpte = gfn1
|
||||
gfn1 is mapped to pfn1 on host
|
||||
spte is the shadow page table entry corresponding with gpte and
|
||||
spte = pfn1
|
||||
|
||||
VCPU 0 VCPU0
|
||||
on fast page fault path:
|
||||
|
||||
old_spte = *spte;
|
||||
pfn1 is swapped out:
|
||||
spte = 0;
|
||||
|
||||
pfn1 is re-alloced for gfn2.
|
||||
|
||||
gpte is changed to point to
|
||||
gfn2 by the guest:
|
||||
spte = pfn1;
|
||||
|
||||
if (cmpxchg(spte, old_spte, old_spte+W)
|
||||
mark_page_dirty(vcpu->kvm, gfn1)
|
||||
OOPS!!!
|
||||
|
||||
We dirty-log for gfn1, that means gfn2 is lost in dirty-bitmap.
|
||||
|
||||
For direct sp, we can easily avoid it since the spte of direct sp is fixed
|
||||
to gfn. For indirect sp, before we do cmpxchg, we call gfn_to_pfn_atomic()
|
||||
to pin gfn to pfn, because after gfn_to_pfn_atomic():
|
||||
- We have held the refcount of pfn that means the pfn can not be freed and
|
||||
be reused for another gfn.
|
||||
- The pfn is writable that means it can not be shared between different gfns
|
||||
by KSM.
|
||||
|
||||
Then, we can ensure the dirty bitmaps is correctly set for a gfn.
|
||||
|
||||
Currently, to simplify the whole things, we disable fast page fault for
|
||||
indirect shadow page.
|
||||
|
||||
2): Dirty bit tracking
|
||||
In the origin code, the spte can be fast updated (non-atomically) if the
|
||||
spte is read-only and the Accessed bit has already been set since the
|
||||
Accessed bit and Dirty bit can not be lost.
|
||||
|
||||
But it is not true after fast page fault since the spte can be marked
|
||||
writable between reading spte and updating spte. Like below case:
|
||||
|
||||
At the beginning:
|
||||
spte.W = 0
|
||||
spte.Accessed = 1
|
||||
|
||||
VCPU 0 VCPU0
|
||||
In mmu_spte_clear_track_bits():
|
||||
|
||||
old_spte = *spte;
|
||||
|
||||
/* 'if' condition is satisfied. */
|
||||
if (old_spte.Accessed == 1 &&
|
||||
old_spte.W == 0)
|
||||
spte = 0ull;
|
||||
on fast page fault path:
|
||||
spte.W = 1
|
||||
memory write on the spte:
|
||||
spte.Dirty = 1
|
||||
|
||||
|
||||
else
|
||||
old_spte = xchg(spte, 0ull)
|
||||
|
||||
|
||||
if (old_spte.Accessed == 1)
|
||||
kvm_set_pfn_accessed(spte.pfn);
|
||||
if (old_spte.Dirty == 1)
|
||||
kvm_set_pfn_dirty(spte.pfn);
|
||||
OOPS!!!
|
||||
|
||||
The Dirty bit is lost in this case.
|
||||
|
||||
In order to avoid this kind of issue, we always treat the spte as "volatile"
|
||||
if it can be updated out of mmu-lock, see spte_has_volatile_bits(), it means,
|
||||
the spte is always atomically updated in this case.
|
||||
|
||||
3): flush tlbs due to spte updated
|
||||
If the spte is updated from writable to readonly, we should flush all TLBs,
|
||||
otherwise rmap_write_protect will find a read-only spte, even though the
|
||||
writable spte might be cached on a CPU's TLB.
|
||||
|
||||
As mentioned before, the spte can be updated to writable out of mmu-lock on
|
||||
fast page fault path, in order to easily audit the path, we see if TLBs need
|
||||
be flushed caused by this reason in mmu_spte_update() since this is a common
|
||||
function to update spte (present -> present).
|
||||
|
||||
Since the spte is "volatile" if it can be updated out of mmu-lock, we always
|
||||
atomically update the spte, the race caused by fast page fault can be avoided,
|
||||
See the comments in spte_has_volatile_bits() and mmu_spte_update().
|
||||
|
||||
Lockless Access Tracking:
|
||||
|
||||
This is used for Intel CPUs that are using EPT but do not support the EPT A/D
|
||||
bits. In this case, when the KVM MMU notifier is called to track accesses to a
|
||||
page (via kvm_mmu_notifier_clear_flush_young), it marks the PTE as not-present
|
||||
by clearing the RWX bits in the PTE and storing the original R & X bits in
|
||||
some unused/ignored bits. In addition, the SPTE_SPECIAL_MASK is also set on the
|
||||
PTE (using the ignored bit 62). When the VM tries to access the page later on,
|
||||
a fault is generated and the fast page fault mechanism described above is used
|
||||
to atomically restore the PTE to a Present state. The W bit is not saved when
|
||||
the PTE is marked for access tracking and during restoration to the Present
|
||||
state, the W bit is set depending on whether or not it was a write access. If
|
||||
it wasn't, then the W bit will remain clear until a write access happens, at
|
||||
which time it will be set using the Dirty tracking mechanism described above.
|
||||
|
||||
3. Reference
|
||||
------------
|
||||
|
||||
Name: kvm_lock
|
||||
Type: mutex
|
||||
Arch: any
|
||||
Protects: - vm_list
|
||||
|
||||
Name: kvm_count_lock
|
||||
Type: raw_spinlock_t
|
||||
Arch: any
|
||||
Protects: - hardware virtualization enable/disable
|
||||
Comment: 'raw' because hardware enabling/disabling must be atomic /wrt
|
||||
migration.
|
||||
|
||||
Name: kvm_arch::tsc_write_lock
|
||||
Type: raw_spinlock
|
||||
Arch: x86
|
||||
Protects: - kvm_arch::{last_tsc_write,last_tsc_nsec,last_tsc_offset}
|
||||
- tsc offset in vmcb
|
||||
Comment: 'raw' because updating the tsc offsets must not be preempted.
|
||||
|
||||
Name: kvm->mmu_lock
|
||||
Type: spinlock_t
|
||||
Arch: any
|
||||
Protects: -shadow page/shadow tlb entry
|
||||
Comment: it is a spinlock since it is used in mmu notifier.
|
||||
|
||||
Name: kvm->srcu
|
||||
Type: srcu lock
|
||||
Arch: any
|
||||
Protects: - kvm->memslots
|
||||
- kvm->buses
|
||||
Comment: The srcu read lock must be held while accessing memslots (e.g.
|
||||
when using gfn_to_* functions) and while accessing in-kernel
|
||||
MMIO/PIO address->device structure mapping (kvm->buses).
|
||||
The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
|
||||
if it is needed by multiple functions.
|
||||
|
||||
Name: blocked_vcpu_on_cpu_lock
|
||||
Type: spinlock_t
|
||||
Arch: x86
|
||||
Protects: blocked_vcpu_on_cpu
|
||||
Comment: This is a per-CPU lock and it is used for VT-d posted-interrupts.
|
||||
When VT-d posted-interrupts is supported and the VM has assigned
|
||||
devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
|
||||
protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
|
||||
wakeup notification event since external interrupts from the
|
||||
assigned devices happens, we will find the vCPU on the list to
|
||||
wakeup.
|
@ -1,3 +1,6 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
======================
|
||||
The x86 kvm shadow mmu
|
||||
======================
|
||||
|
||||
@ -7,27 +10,37 @@ physical addresses to host physical addresses.
|
||||
|
||||
The mmu code attempts to satisfy the following requirements:
|
||||
|
||||
- correctness: the guest should not be able to determine that it is running
|
||||
- correctness:
|
||||
the guest should not be able to determine that it is running
|
||||
on an emulated mmu except for timing (we attempt to comply
|
||||
with the specification, not emulate the characteristics of
|
||||
a particular implementation such as tlb size)
|
||||
- security: the guest must not be able to touch host memory not assigned
|
||||
- security:
|
||||
the guest must not be able to touch host memory not assigned
|
||||
to it
|
||||
- performance: minimize the performance penalty imposed by the mmu
|
||||
- scaling: need to scale to large memory and large vcpu guests
|
||||
- hardware: support the full range of x86 virtualization hardware
|
||||
- integration: Linux memory management code must be in control of guest memory
|
||||
- performance:
|
||||
minimize the performance penalty imposed by the mmu
|
||||
- scaling:
|
||||
need to scale to large memory and large vcpu guests
|
||||
- hardware:
|
||||
support the full range of x86 virtualization hardware
|
||||
- integration:
|
||||
Linux memory management code must be in control of guest memory
|
||||
so that swapping, page migration, page merging, transparent
|
||||
hugepages, and similar features work without change
|
||||
- dirty tracking: report writes to guest memory to enable live migration
|
||||
- dirty tracking:
|
||||
report writes to guest memory to enable live migration
|
||||
and framebuffer-based displays
|
||||
- footprint: keep the amount of pinned kernel memory low (most memory
|
||||
- footprint:
|
||||
keep the amount of pinned kernel memory low (most memory
|
||||
should be shrinkable)
|
||||
- reliability: avoid multipage or GFP_ATOMIC allocations
|
||||
- reliability:
|
||||
avoid multipage or GFP_ATOMIC allocations
|
||||
|
||||
Acronyms
|
||||
========
|
||||
|
||||
==== ====================================================================
|
||||
pfn host page frame number
|
||||
hpa host physical address
|
||||
hva host virtual address
|
||||
@ -41,6 +54,7 @@ pte page table entry (used also to refer generically to paging structure
|
||||
gpte guest pte (referring to gfns)
|
||||
spte shadow pte (referring to pfns)
|
||||
tdp two dimensional paging (vendor neutral term for NPT and EPT)
|
||||
==== ====================================================================
|
||||
|
||||
Virtual and real hardware supported
|
||||
===================================
|
||||
@ -90,11 +104,13 @@ Events
|
||||
The mmu is driven by events, some from the guest, some from the host.
|
||||
|
||||
Guest generated events:
|
||||
|
||||
- writes to control registers (especially cr3)
|
||||
- invlpg/invlpga instruction execution
|
||||
- access to missing or protected translations
|
||||
|
||||
Host generated events:
|
||||
|
||||
- changes in the gpa->hpa translation (either through gpa->hva changes or
|
||||
through hva->hpa changes)
|
||||
- memory pressure (the shrinker)
|
||||
@ -117,16 +133,19 @@ Leaf ptes point at guest pages.
|
||||
The following table shows translations encoded by leaf ptes, with higher-level
|
||||
translations in parentheses:
|
||||
|
||||
Non-nested guests:
|
||||
Non-nested guests::
|
||||
|
||||
nonpaging: gpa->hpa
|
||||
paging: gva->gpa->hpa
|
||||
paging, tdp: (gva->)gpa->hpa
|
||||
Nested guests:
|
||||
|
||||
Nested guests::
|
||||
|
||||
non-tdp: ngva->gpa->hpa (*)
|
||||
tdp: (ngva->)ngpa->gpa->hpa
|
||||
|
||||
(*) the guest hypervisor will encode the ngva->gpa translation into its page
|
||||
tables if npt is not present
|
||||
(*) the guest hypervisor will encode the ngva->gpa translation into its page
|
||||
tables if npt is not present
|
||||
|
||||
Shadow pages contain the following information:
|
||||
role.level:
|
||||
@ -291,28 +310,41 @@ Handling a page fault is performed as follows:
|
||||
|
||||
- if the RSV bit of the error code is set, the page fault is caused by guest
|
||||
accessing MMIO and cached MMIO information is available.
|
||||
|
||||
- walk shadow page table
|
||||
- check for valid generation number in the spte (see "Fast invalidation of
|
||||
MMIO sptes" below)
|
||||
- cache the information to vcpu->arch.mmio_gva, vcpu->arch.mmio_access and
|
||||
vcpu->arch.mmio_gfn, and call the emulator
|
||||
|
||||
- If both P bit and R/W bit of error code are set, this could possibly
|
||||
be handled as a "fast page fault" (fixed without taking the MMU lock). See
|
||||
the description in Documentation/virt/kvm/locking.txt.
|
||||
|
||||
- if needed, walk the guest page tables to determine the guest translation
|
||||
(gva->gpa or ngpa->gpa)
|
||||
|
||||
- if permissions are insufficient, reflect the fault back to the guest
|
||||
|
||||
- determine the host page
|
||||
|
||||
- if this is an mmio request, there is no host page; cache the info to
|
||||
vcpu->arch.mmio_gva, vcpu->arch.mmio_access and vcpu->arch.mmio_gfn
|
||||
|
||||
- walk the shadow page table to find the spte for the translation,
|
||||
instantiating missing intermediate page tables as necessary
|
||||
|
||||
- If this is an mmio request, cache the mmio info to the spte and set some
|
||||
reserved bit on the spte (see callers of kvm_mmu_set_mmio_spte_mask)
|
||||
|
||||
- try to unsynchronize the page
|
||||
|
||||
- if successful, we can let the guest continue and modify the gpte
|
||||
|
||||
- emulate the instruction
|
||||
|
||||
- if failed, unshadow the page and let the guest continue
|
||||
|
||||
- update any translations that were modified by the instruction
|
||||
|
||||
invlpg handling:
|
||||
@ -324,10 +356,12 @@ invlpg handling:
|
||||
Guest control register updates:
|
||||
|
||||
- mov to cr3
|
||||
|
||||
- look up new shadow roots
|
||||
- synchronize newly reachable shadow pages
|
||||
|
||||
- mov to cr0/cr4/efer
|
||||
|
||||
- set up mmu context for new paging mode
|
||||
- look up new shadow roots
|
||||
- synchronize newly reachable shadow pages
|
||||
@ -358,6 +392,7 @@ on fault type:
|
||||
(user write faults generate a #PF)
|
||||
|
||||
In the first case there are two additional complications:
|
||||
|
||||
- if CR4.SMEP is enabled: since we've turned the page into a kernel page,
|
||||
the kernel may now execute it. We handle this by also setting spte.nx.
|
||||
If we get a user fetch or read fault, we'll change spte.u=1 and
|
||||
@ -446,4 +481,3 @@ Further reading
|
||||
|
||||
- NPT presentation from KVM Forum 2008
|
||||
http://www.linux-kvm.org/images/c/c8/KvmForum2008%24kdf2008_21.pdf
|
||||
|
@ -1,6 +1,10 @@
|
||||
KVM-specific MSRs.
|
||||
Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010
|
||||
=====================================================
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=================
|
||||
KVM-specific MSRs
|
||||
=================
|
||||
|
||||
:Author: Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010
|
||||
|
||||
KVM makes use of some custom MSRs to service some requests.
|
||||
|
||||
@ -9,34 +13,39 @@ Custom MSRs have a range reserved for them, that goes from
|
||||
but they are deprecated and their use is discouraged.
|
||||
|
||||
Custom MSR list
|
||||
--------
|
||||
---------------
|
||||
|
||||
The current supported Custom MSR list is:
|
||||
|
||||
MSR_KVM_WALL_CLOCK_NEW: 0x4b564d00
|
||||
MSR_KVM_WALL_CLOCK_NEW:
|
||||
0x4b564d00
|
||||
|
||||
data: 4-byte alignment physical address of a memory area which must be
|
||||
data:
|
||||
4-byte alignment physical address of a memory area which must be
|
||||
in guest RAM. This memory is expected to hold a copy of the following
|
||||
structure:
|
||||
structure::
|
||||
|
||||
struct pvclock_wall_clock {
|
||||
struct pvclock_wall_clock {
|
||||
u32 version;
|
||||
u32 sec;
|
||||
u32 nsec;
|
||||
} __attribute__((__packed__));
|
||||
} __attribute__((__packed__));
|
||||
|
||||
whose data will be filled in by the hypervisor. The hypervisor is only
|
||||
guaranteed to update this data at the moment of MSR write.
|
||||
Users that want to reliably query this information more than once have
|
||||
to write more than once to this MSR. Fields have the following meanings:
|
||||
|
||||
version: guest has to check version before and after grabbing
|
||||
version:
|
||||
guest has to check version before and after grabbing
|
||||
time information and check that they are both equal and even.
|
||||
An odd version indicates an in-progress update.
|
||||
|
||||
sec: number of seconds for wallclock at time of boot.
|
||||
sec:
|
||||
number of seconds for wallclock at time of boot.
|
||||
|
||||
nsec: number of nanoseconds for wallclock at time of boot.
|
||||
nsec:
|
||||
number of nanoseconds for wallclock at time of boot.
|
||||
|
||||
In order to get the current wallclock time, the system_time from
|
||||
MSR_KVM_SYSTEM_TIME_NEW needs to be added.
|
||||
@ -47,13 +56,15 @@ MSR_KVM_WALL_CLOCK_NEW: 0x4b564d00
|
||||
Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
|
||||
leaf prior to usage.
|
||||
|
||||
MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
|
||||
MSR_KVM_SYSTEM_TIME_NEW:
|
||||
0x4b564d01
|
||||
|
||||
data: 4-byte aligned physical address of a memory area which must be in
|
||||
data:
|
||||
4-byte aligned physical address of a memory area which must be in
|
||||
guest RAM, plus an enable bit in bit 0. This memory is expected to hold
|
||||
a copy of the following structure:
|
||||
a copy of the following structure::
|
||||
|
||||
struct pvclock_vcpu_time_info {
|
||||
struct pvclock_vcpu_time_info {
|
||||
u32 version;
|
||||
u32 pad0;
|
||||
u64 tsc_timestamp;
|
||||
@ -62,7 +73,7 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
|
||||
s8 tsc_shift;
|
||||
u8 flags;
|
||||
u8 pad[2];
|
||||
} __attribute__((__packed__)); /* 32 bytes */
|
||||
} __attribute__((__packed__)); /* 32 bytes */
|
||||
|
||||
whose data will be filled in by the hypervisor periodically. Only one
|
||||
write, or registration, is needed for each VCPU. The interval between
|
||||
@ -72,23 +83,28 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
|
||||
|
||||
Fields have the following meanings:
|
||||
|
||||
version: guest has to check version before and after grabbing
|
||||
version:
|
||||
guest has to check version before and after grabbing
|
||||
time information and check that they are both equal and even.
|
||||
An odd version indicates an in-progress update.
|
||||
|
||||
tsc_timestamp: the tsc value at the current VCPU at the time
|
||||
tsc_timestamp:
|
||||
the tsc value at the current VCPU at the time
|
||||
of the update of this structure. Guests can subtract this value
|
||||
from current tsc to derive a notion of elapsed time since the
|
||||
structure update.
|
||||
|
||||
system_time: a host notion of monotonic time, including sleep
|
||||
system_time:
|
||||
a host notion of monotonic time, including sleep
|
||||
time at the time this structure was last updated. Unit is
|
||||
nanoseconds.
|
||||
|
||||
tsc_to_system_mul: multiplier to be used when converting
|
||||
tsc_to_system_mul:
|
||||
multiplier to be used when converting
|
||||
tsc-related quantity to nanoseconds
|
||||
|
||||
tsc_shift: shift to be used when converting tsc-related
|
||||
tsc_shift:
|
||||
shift to be used when converting tsc-related
|
||||
quantity to nanoseconds. This shift will ensure that
|
||||
multiplication with tsc_to_system_mul does not overflow.
|
||||
A positive value denotes a left shift, a negative value
|
||||
@ -96,7 +112,7 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
|
||||
|
||||
The conversion from tsc to nanoseconds involves an additional
|
||||
right shift by 32 bits. With this information, guests can
|
||||
derive per-CPU time by doing:
|
||||
derive per-CPU time by doing::
|
||||
|
||||
time = (current_tsc - tsc_timestamp)
|
||||
if (tsc_shift >= 0)
|
||||
@ -106,29 +122,34 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
|
||||
time = (time * tsc_to_system_mul) >> 32
|
||||
time = time + system_time
|
||||
|
||||
flags: bits in this field indicate extended capabilities
|
||||
flags:
|
||||
bits in this field indicate extended capabilities
|
||||
coordinated between the guest and the hypervisor. Availability
|
||||
of specific flags has to be checked in 0x40000001 cpuid leaf.
|
||||
Current flags are:
|
||||
|
||||
flag bit | cpuid bit | meaning
|
||||
-------------------------------------------------------------
|
||||
| | time measures taken across
|
||||
0 | 24 | multiple cpus are guaranteed to
|
||||
| | be monotonic
|
||||
-------------------------------------------------------------
|
||||
| | guest vcpu has been paused by
|
||||
1 | N/A | the host
|
||||
| | See 4.70 in api.txt
|
||||
-------------------------------------------------------------
|
||||
|
||||
+-----------+--------------+----------------------------------+
|
||||
| flag bit | cpuid bit | meaning |
|
||||
+-----------+--------------+----------------------------------+
|
||||
| | | time measures taken across |
|
||||
| 0 | 24 | multiple cpus are guaranteed to |
|
||||
| | | be monotonic |
|
||||
+-----------+--------------+----------------------------------+
|
||||
| | | guest vcpu has been paused by |
|
||||
| 1 | N/A | the host |
|
||||
| | | See 4.70 in api.txt |
|
||||
+-----------+--------------+----------------------------------+
|
||||
|
||||
Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
|
||||
leaf prior to usage.
|
||||
|
||||
|
||||
MSR_KVM_WALL_CLOCK: 0x11
|
||||
MSR_KVM_WALL_CLOCK:
|
||||
0x11
|
||||
|
||||
data and functioning: same as MSR_KVM_WALL_CLOCK_NEW. Use that instead.
|
||||
data and functioning:
|
||||
same as MSR_KVM_WALL_CLOCK_NEW. Use that instead.
|
||||
|
||||
This MSR falls outside the reserved KVM range and may be removed in the
|
||||
future. Its usage is deprecated.
|
||||
@ -136,9 +157,11 @@ MSR_KVM_WALL_CLOCK: 0x11
|
||||
Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid
|
||||
leaf prior to usage.
|
||||
|
||||
MSR_KVM_SYSTEM_TIME: 0x12
|
||||
MSR_KVM_SYSTEM_TIME:
|
||||
0x12
|
||||
|
||||
data and functioning: same as MSR_KVM_SYSTEM_TIME_NEW. Use that instead.
|
||||
data and functioning:
|
||||
same as MSR_KVM_SYSTEM_TIME_NEW. Use that instead.
|
||||
|
||||
This MSR falls outside the reserved KVM range and may be removed in the
|
||||
future. Its usage is deprecated.
|
||||
@ -146,7 +169,7 @@ MSR_KVM_SYSTEM_TIME: 0x12
|
||||
Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid
|
||||
leaf prior to usage.
|
||||
|
||||
The suggested algorithm for detecting kvmclock presence is then:
|
||||
The suggested algorithm for detecting kvmclock presence is then::
|
||||
|
||||
if (!kvm_para_available()) /* refer to cpuid.txt */
|
||||
return NON_PRESENT;
|
||||
@ -163,8 +186,11 @@ MSR_KVM_SYSTEM_TIME: 0x12
|
||||
} else
|
||||
return NON_PRESENT;
|
||||
|
||||
MSR_KVM_ASYNC_PF_EN: 0x4b564d02
|
||||
data: Bits 63-6 hold 64-byte aligned physical address of a
|
||||
MSR_KVM_ASYNC_PF_EN:
|
||||
0x4b564d02
|
||||
|
||||
data:
|
||||
Bits 63-6 hold 64-byte aligned physical address of a
|
||||
64 byte memory area which must be in guest RAM and must be
|
||||
zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1
|
||||
when asynchronous page faults are enabled on the vcpu 0 when
|
||||
@ -200,20 +226,22 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02
|
||||
Currently type 2 APF will be always delivered on the same vcpu as
|
||||
type 1 was, but guest should not rely on that.
|
||||
|
||||
MSR_KVM_STEAL_TIME: 0x4b564d03
|
||||
MSR_KVM_STEAL_TIME:
|
||||
0x4b564d03
|
||||
|
||||
data: 64-byte alignment physical address of a memory area which must be
|
||||
data:
|
||||
64-byte alignment physical address of a memory area which must be
|
||||
in guest RAM, plus an enable bit in bit 0. This memory is expected to
|
||||
hold a copy of the following structure:
|
||||
hold a copy of the following structure::
|
||||
|
||||
struct kvm_steal_time {
|
||||
struct kvm_steal_time {
|
||||
__u64 steal;
|
||||
__u32 version;
|
||||
__u32 flags;
|
||||
__u8 preempted;
|
||||
__u8 u8_pad[3];
|
||||
__u32 pad[11];
|
||||
}
|
||||
}
|
||||
|
||||
whose data will be filled in by the hypervisor periodically. Only one
|
||||
write, or registration, is needed for each VCPU. The interval between
|
||||
@ -224,25 +252,32 @@ MSR_KVM_STEAL_TIME: 0x4b564d03
|
||||
|
||||
Fields have the following meanings:
|
||||
|
||||
version: a sequence counter. In other words, guest has to check
|
||||
version:
|
||||
a sequence counter. In other words, guest has to check
|
||||
this field before and after grabbing time information and make
|
||||
sure they are both equal and even. An odd version indicates an
|
||||
in-progress update.
|
||||
|
||||
flags: At this point, always zero. May be used to indicate
|
||||
flags:
|
||||
At this point, always zero. May be used to indicate
|
||||
changes in this structure in the future.
|
||||
|
||||
steal: the amount of time in which this vCPU did not run, in
|
||||
steal:
|
||||
the amount of time in which this vCPU did not run, in
|
||||
nanoseconds. Time during which the vcpu is idle, will not be
|
||||
reported as steal time.
|
||||
|
||||
preempted: indicate the vCPU who owns this struct is running or
|
||||
preempted:
|
||||
indicate the vCPU who owns this struct is running or
|
||||
not. Non-zero values mean the vCPU has been preempted. Zero
|
||||
means the vCPU is not preempted. NOTE, it is always zero if the
|
||||
the hypervisor doesn't support this field.
|
||||
|
||||
MSR_KVM_EOI_EN: 0x4b564d04
|
||||
data: Bit 0 is 1 when PV end of interrupt is enabled on the vcpu; 0
|
||||
MSR_KVM_EOI_EN:
|
||||
0x4b564d04
|
||||
|
||||
data:
|
||||
Bit 0 is 1 when PV end of interrupt is enabled on the vcpu; 0
|
||||
when disabled. Bit 1 is reserved and must be zero. When PV end of
|
||||
interrupt is enabled (bit 0 set), bits 63-2 hold a 4-byte aligned
|
||||
physical address of a 4 byte memory area which must be in guest RAM and
|
||||
@ -274,11 +309,13 @@ MSR_KVM_EOI_EN: 0x4b564d04
|
||||
clear it using a single CPU instruction, such as test and clear, or
|
||||
compare and exchange.
|
||||
|
||||
MSR_KVM_POLL_CONTROL: 0x4b564d05
|
||||
MSR_KVM_POLL_CONTROL:
|
||||
0x4b564d05
|
||||
|
||||
Control host-side polling.
|
||||
|
||||
data: Bit 0 enables (1) or disables (0) host-side HLT polling logic.
|
||||
data:
|
||||
Bit 0 enables (1) or disables (0) host-side HLT polling logic.
|
||||
|
||||
KVM guests can request the host not to poll on HLT, for example if
|
||||
they are performing polling themselves.
|
||||
|
@ -1,3 +1,6 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
==========
|
||||
Nested VMX
|
||||
==========
|
||||
|
||||
@ -41,9 +44,9 @@ No modifications are required to user space (qemu). However, qemu's default
|
||||
emulated CPU type (qemu64) does not list the "VMX" CPU feature, so it must be
|
||||
explicitly enabled, by giving qemu one of the following options:
|
||||
|
||||
-cpu host (emulated CPU has all features of the real CPU)
|
||||
- cpu host (emulated CPU has all features of the real CPU)
|
||||
|
||||
-cpu qemu64,+vmx (add just the vmx feature to a named CPU type)
|
||||
- cpu qemu64,+vmx (add just the vmx feature to a named CPU type)
|
||||
|
||||
|
||||
ABIs
|
||||
@ -75,6 +78,8 @@ of this structure changes, this can break live migration across KVM versions.
|
||||
VMCS12_REVISION (from vmx.c) should be changed if struct vmcs12 or its inner
|
||||
struct shadow_vmcs is ever changed.
|
||||
|
||||
::
|
||||
|
||||
typedef u64 natural_width;
|
||||
struct __packed vmcs12 {
|
||||
/* According to the Intel spec, a VMCS region must start with
|
||||
@ -220,21 +225,21 @@ Authors
|
||||
-------
|
||||
|
||||
These patches were written by:
|
||||
Abel Gordon, abelg <at> il.ibm.com
|
||||
Nadav Har'El, nyh <at> il.ibm.com
|
||||
Orit Wasserman, oritw <at> il.ibm.com
|
||||
Ben-Ami Yassor, benami <at> il.ibm.com
|
||||
Muli Ben-Yehuda, muli <at> il.ibm.com
|
||||
- Abel Gordon, abelg <at> il.ibm.com
|
||||
- Nadav Har'El, nyh <at> il.ibm.com
|
||||
- Orit Wasserman, oritw <at> il.ibm.com
|
||||
- Ben-Ami Yassor, benami <at> il.ibm.com
|
||||
- Muli Ben-Yehuda, muli <at> il.ibm.com
|
||||
|
||||
With contributions by:
|
||||
Anthony Liguori, aliguori <at> us.ibm.com
|
||||
Mike Day, mdday <at> us.ibm.com
|
||||
Michael Factor, factor <at> il.ibm.com
|
||||
Zvi Dubitzky, dubi <at> il.ibm.com
|
||||
- Anthony Liguori, aliguori <at> us.ibm.com
|
||||
- Mike Day, mdday <at> us.ibm.com
|
||||
- Michael Factor, factor <at> il.ibm.com
|
||||
- Zvi Dubitzky, dubi <at> il.ibm.com
|
||||
|
||||
And valuable reviews by:
|
||||
Avi Kivity, avi <at> redhat.com
|
||||
Gleb Natapov, gleb <at> redhat.com
|
||||
Marcelo Tosatti, mtosatti <at> redhat.com
|
||||
Kevin Tian, kevin.tian <at> intel.com
|
||||
and others.
|
||||
- Avi Kivity, avi <at> redhat.com
|
||||
- Gleb Natapov, gleb <at> redhat.com
|
||||
- Marcelo Tosatti, mtosatti <at> redhat.com
|
||||
- Kevin Tian, kevin.tian <at> intel.com
|
||||
- and others.
|
@ -1,3 +1,6 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=================================
|
||||
The PPC KVM paravirtual interface
|
||||
=================================
|
||||
|
||||
@ -34,8 +37,9 @@ up the hypercall. To call a hypercall, just call these instructions.
|
||||
|
||||
The parameters are as follows:
|
||||
|
||||
======== ================ ================
|
||||
Register IN OUT
|
||||
|
||||
======== ================ ================
|
||||
r0 - volatile
|
||||
r3 1st parameter Return code
|
||||
r4 2nd parameter 1st output value
|
||||
@ -47,6 +51,7 @@ The parameters are as follows:
|
||||
r10 8th parameter 7th output value
|
||||
r11 hypercall number 8th output value
|
||||
r12 - volatile
|
||||
======== ================ ================
|
||||
|
||||
Hypercall definitions are shared in generic code, so the same hypercall numbers
|
||||
apply for x86 and powerpc alike with the exception that each KVM hypercall
|
||||
@ -54,11 +59,13 @@ also needs to be ORed with the KVM vendor code which is (42 << 16).
|
||||
|
||||
Return codes can be as follows:
|
||||
|
||||
==== =========================
|
||||
Code Meaning
|
||||
|
||||
==== =========================
|
||||
0 Success
|
||||
12 Hypercall not implemented
|
||||
<0 Error
|
||||
==== =========================
|
||||
|
||||
The magic page
|
||||
==============
|
||||
@ -72,7 +79,7 @@ desired location. The first parameter indicates the effective address when the
|
||||
MMU is enabled. The second parameter indicates the address in real mode, if
|
||||
applicable to the target. For now, we always map the page to -4096. This way we
|
||||
can access it using absolute load and store functions. The following
|
||||
instruction reads the first field of the magic page:
|
||||
instruction reads the first field of the magic page::
|
||||
|
||||
ld rX, -4096(0)
|
||||
|
||||
@ -93,8 +100,10 @@ a bitmap of available features inside the magic page.
|
||||
|
||||
The following enhancements to the magic page are currently available:
|
||||
|
||||
============================ =======================================
|
||||
KVM_MAGIC_FEAT_SR Maps SR registers r/w in the magic page
|
||||
KVM_MAGIC_FEAT_MAS0_TO_SPRG7 Maps MASn, ESR, PIR and high SPRGs
|
||||
============================ =======================================
|
||||
|
||||
For enhanced features in the magic page, please check for the existence of the
|
||||
feature before using them!
|
||||
@ -121,8 +130,8 @@ when entering the guest or don't have any impact on the hypervisor's behavior.
|
||||
|
||||
The following bits are safe to be set inside the guest:
|
||||
|
||||
MSR_EE
|
||||
MSR_RI
|
||||
- MSR_EE
|
||||
- MSR_RI
|
||||
|
||||
If any other bit changes in the MSR, please still use mtmsr(d).
|
||||
|
||||
@ -138,9 +147,9 @@ guest. Implementing any of those mappings is optional, as the instruction traps
|
||||
also act on the shared page. So calling privileged instructions still works as
|
||||
before.
|
||||
|
||||
======================= ================================
|
||||
From To
|
||||
==== ==
|
||||
|
||||
======================= ================================
|
||||
mfmsr rX ld rX, magic_page->msr
|
||||
mfsprg rX, 0 ld rX, magic_page->sprg0
|
||||
mfsprg rX, 1 ld rX, magic_page->sprg1
|
||||
@ -173,7 +182,7 @@ mtsrin rX, rY b <special mtsrin section>
|
||||
|
||||
[BookE only]
|
||||
wrteei [0|1] b <special wrteei section>
|
||||
|
||||
======================= ================================
|
||||
|
||||
Some instructions require more logic to determine what's going on than a load
|
||||
or store instruction can deliver. To enable patching of those, we keep some
|
||||
@ -191,6 +200,7 @@ for example.
|
||||
|
||||
Hypercall ABIs in KVM on PowerPC
|
||||
=================================
|
||||
|
||||
1) KVM hypercalls (ePAPR)
|
||||
|
||||
These are ePAPR compliant hypercall implementation (mentioned above). Even
|
@ -1,3 +1,6 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
================================
|
||||
Review checklist for kvm patches
|
||||
================================
|
||||
|
@ -1,3 +1,6 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=============================
|
||||
The s390 DIAGNOSE call on KVM
|
||||
=============================
|
||||
|
||||
@ -16,12 +19,12 @@ DIAGNOSE calls by the guest cause a mandatory intercept. This implies
|
||||
all supported DIAGNOSE calls need to be handled by either KVM or its
|
||||
userspace.
|
||||
|
||||
All DIAGNOSE calls supported by KVM use the RS-a format:
|
||||
All DIAGNOSE calls supported by KVM use the RS-a format::
|
||||
|
||||
--------------------------------------
|
||||
| '83' | R1 | R3 | B2 | D2 |
|
||||
--------------------------------------
|
||||
0 8 12 16 20 31
|
||||
--------------------------------------
|
||||
| '83' | R1 | R3 | B2 | D2 |
|
||||
--------------------------------------
|
||||
0 8 12 16 20 31
|
||||
|
||||
The second-operand address (obtained by the base/displacement calculation)
|
||||
is not used to address data. Instead, bits 48-63 of this address specify
|
@ -1,17 +1,21 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
Timekeeping Virtualization for X86-Based Architectures
|
||||
======================================================
|
||||
Timekeeping Virtualization for X86-Based Architectures
|
||||
======================================================
|
||||
|
||||
Zachary Amsden <zamsden@redhat.com>
|
||||
Copyright (c) 2010, Red Hat. All rights reserved.
|
||||
:Author: Zachary Amsden <zamsden@redhat.com>
|
||||
:Copyright: (c) 2010, Red Hat. All rights reserved.
|
||||
|
||||
1) Overview
|
||||
2) Timing Devices
|
||||
3) TSC Hardware
|
||||
4) Virtualization Problems
|
||||
.. Contents
|
||||
|
||||
=========================================================================
|
||||
1) Overview
|
||||
2) Timing Devices
|
||||
3) TSC Hardware
|
||||
4) Virtualization Problems
|
||||
|
||||
1) Overview
|
||||
1. Overview
|
||||
===========
|
||||
|
||||
One of the most complicated parts of the X86 platform, and specifically,
|
||||
the virtualization of this platform is the plethora of timing devices available
|
||||
@ -27,15 +31,15 @@ The purpose of this document is to collect data and information relevant to
|
||||
timekeeping which may be difficult to find elsewhere, specifically,
|
||||
information relevant to KVM and hardware-based virtualization.
|
||||
|
||||
=========================================================================
|
||||
|
||||
2) Timing Devices
|
||||
2. Timing Devices
|
||||
=================
|
||||
|
||||
First we discuss the basic hardware devices available. TSC and the related
|
||||
KVM clock are special enough to warrant a full exposition and are described in
|
||||
the following section.
|
||||
|
||||
2.1) i8254 - PIT
|
||||
2.1. i8254 - PIT
|
||||
----------------
|
||||
|
||||
One of the first timer devices available is the programmable interrupt timer,
|
||||
or PIT. The PIT has a fixed frequency 1.193182 MHz base clock and three
|
||||
@ -50,13 +54,13 @@ The PIT uses I/O ports 0x40 - 0x43. Access to the 16-bit counters is done
|
||||
using single or multiple byte access to the I/O ports. There are 6 modes
|
||||
available, but not all modes are available to all timers, as only timer 2
|
||||
has a connected gate input, required for modes 1 and 5. The gate line is
|
||||
controlled by port 61h, bit 0, as illustrated in the following diagram.
|
||||
controlled by port 61h, bit 0, as illustrated in the following diagram::
|
||||
|
||||
-------------- ----------------
|
||||
| | | |
|
||||
| 1.1932 MHz |---------->| CLOCK OUT | ---------> IRQ 0
|
||||
| Clock | | | |
|
||||
-------------- | +->| GATE TIMER 0 |
|
||||
-------------- ----------------
|
||||
| | | |
|
||||
| 1.1932 MHz|---------->| CLOCK OUT | ---------> IRQ 0
|
||||
| Clock | | | |
|
||||
-------------- | +->| GATE TIMER 0 |
|
||||
| ----------------
|
||||
|
|
||||
| ----------------
|
||||
@ -70,29 +74,33 @@ controlled by port 61h, bit 0, as illustrated in the following diagram.
|
||||
| | |
|
||||
|------>| CLOCK OUT | ---------> Port 61h, bit 5
|
||||
| | |
|
||||
Port 61h, bit 0 ---------->| GATE TIMER 2 | \_.---- ____
|
||||
Port 61h, bit 0 -------->| GATE TIMER 2 | \_.---- ____
|
||||
---------------- _| )--|LPF|---Speaker
|
||||
/ *---- \___/
|
||||
Port 61h, bit 1 -----------------------------------/
|
||||
Port 61h, bit 1 ---------------------------------/
|
||||
|
||||
The timer modes are now described.
|
||||
|
||||
Mode 0: Single Timeout. This is a one-shot software timeout that counts down
|
||||
Mode 0: Single Timeout.
|
||||
This is a one-shot software timeout that counts down
|
||||
when the gate is high (always true for timers 0 and 1). When the count
|
||||
reaches zero, the output goes high.
|
||||
|
||||
Mode 1: Triggered One-shot. The output is initially set high. When the gate
|
||||
Mode 1: Triggered One-shot.
|
||||
The output is initially set high. When the gate
|
||||
line is set high, a countdown is initiated (which does not stop if the gate is
|
||||
lowered), during which the output is set low. When the count reaches zero,
|
||||
the output goes high.
|
||||
|
||||
Mode 2: Rate Generator. The output is initially set high. When the countdown
|
||||
Mode 2: Rate Generator.
|
||||
The output is initially set high. When the countdown
|
||||
reaches 1, the output goes low for one count and then returns high. The value
|
||||
is reloaded and the countdown automatically resumes. If the gate line goes
|
||||
low, the count is halted. If the output is low when the gate is lowered, the
|
||||
output automatically goes high (this only affects timer 2).
|
||||
|
||||
Mode 3: Square Wave. This generates a high / low square wave. The count
|
||||
Mode 3: Square Wave.
|
||||
This generates a high / low square wave. The count
|
||||
determines the length of the pulse, which alternates between high and low
|
||||
when zero is reached. The count only proceeds when gate is high and is
|
||||
automatically reloaded on reaching zero. The count is decremented twice at
|
||||
@ -103,12 +111,14 @@ Mode 3: Square Wave. This generates a high / low square wave. The count
|
||||
values are not observed when reading. This is the intended mode for timer 2,
|
||||
which generates sine-like tones by low-pass filtering the square wave output.
|
||||
|
||||
Mode 4: Software Strobe. After programming this mode and loading the counter,
|
||||
Mode 4: Software Strobe.
|
||||
After programming this mode and loading the counter,
|
||||
the output remains high until the counter reaches zero. Then the output
|
||||
goes low for 1 clock cycle and returns high. The counter is not reloaded.
|
||||
Counting only occurs when gate is high.
|
||||
|
||||
Mode 5: Hardware Strobe. After programming and loading the counter, the
|
||||
Mode 5: Hardware Strobe.
|
||||
After programming and loading the counter, the
|
||||
output remains high. When the gate is raised, a countdown is initiated
|
||||
(which does not stop if the gate is lowered). When the counter reaches zero,
|
||||
the output goes low for 1 clock cycle and then returns high. The counter is
|
||||
@ -118,49 +128,49 @@ In addition to normal binary counting, the PIT supports BCD counting. The
|
||||
command port, 0x43 is used to set the counter and mode for each of the three
|
||||
timers.
|
||||
|
||||
PIT commands, issued to port 0x43, using the following bit encoding:
|
||||
PIT commands, issued to port 0x43, using the following bit encoding::
|
||||
|
||||
Bit 7-4: Command (See table below)
|
||||
Bit 3-1: Mode (000 = Mode 0, 101 = Mode 5, 11X = undefined)
|
||||
Bit 0 : Binary (0) / BCD (1)
|
||||
Bit 7-4: Command (See table below)
|
||||
Bit 3-1: Mode (000 = Mode 0, 101 = Mode 5, 11X = undefined)
|
||||
Bit 0 : Binary (0) / BCD (1)
|
||||
|
||||
Command table:
|
||||
Command table::
|
||||
|
||||
0000 - Latch Timer 0 count for port 0x40
|
||||
0000 - Latch Timer 0 count for port 0x40
|
||||
sample and hold the count to be read in port 0x40;
|
||||
additional commands ignored until counter is read;
|
||||
mode bits ignored.
|
||||
|
||||
0001 - Set Timer 0 LSB mode for port 0x40
|
||||
0001 - Set Timer 0 LSB mode for port 0x40
|
||||
set timer to read LSB only and force MSB to zero;
|
||||
mode bits set timer mode
|
||||
|
||||
0010 - Set Timer 0 MSB mode for port 0x40
|
||||
0010 - Set Timer 0 MSB mode for port 0x40
|
||||
set timer to read MSB only and force LSB to zero;
|
||||
mode bits set timer mode
|
||||
|
||||
0011 - Set Timer 0 16-bit mode for port 0x40
|
||||
0011 - Set Timer 0 16-bit mode for port 0x40
|
||||
set timer to read / write LSB first, then MSB;
|
||||
mode bits set timer mode
|
||||
|
||||
0100 - Latch Timer 1 count for port 0x41 - as described above
|
||||
0101 - Set Timer 1 LSB mode for port 0x41 - as described above
|
||||
0110 - Set Timer 1 MSB mode for port 0x41 - as described above
|
||||
0111 - Set Timer 1 16-bit mode for port 0x41 - as described above
|
||||
0100 - Latch Timer 1 count for port 0x41 - as described above
|
||||
0101 - Set Timer 1 LSB mode for port 0x41 - as described above
|
||||
0110 - Set Timer 1 MSB mode for port 0x41 - as described above
|
||||
0111 - Set Timer 1 16-bit mode for port 0x41 - as described above
|
||||
|
||||
1000 - Latch Timer 2 count for port 0x42 - as described above
|
||||
1001 - Set Timer 2 LSB mode for port 0x42 - as described above
|
||||
1010 - Set Timer 2 MSB mode for port 0x42 - as described above
|
||||
1011 - Set Timer 2 16-bit mode for port 0x42 as described above
|
||||
1000 - Latch Timer 2 count for port 0x42 - as described above
|
||||
1001 - Set Timer 2 LSB mode for port 0x42 - as described above
|
||||
1010 - Set Timer 2 MSB mode for port 0x42 - as described above
|
||||
1011 - Set Timer 2 16-bit mode for port 0x42 as described above
|
||||
|
||||
1101 - General counter latch
|
||||
1101 - General counter latch
|
||||
Latch combination of counters into corresponding ports
|
||||
Bit 3 = Counter 2
|
||||
Bit 2 = Counter 1
|
||||
Bit 1 = Counter 0
|
||||
Bit 0 = Unused
|
||||
|
||||
1110 - Latch timer status
|
||||
1110 - Latch timer status
|
||||
Latch combination of counter mode into corresponding ports
|
||||
Bit 3 = Counter 2
|
||||
Bit 2 = Counter 1
|
||||
@ -177,7 +187,8 @@ Command table:
|
||||
Bit 3-1 = Mode
|
||||
Bit 0 = Binary (0) / BCD mode (1)
|
||||
|
||||
2.2) RTC
|
||||
2.2. RTC
|
||||
--------
|
||||
|
||||
The second device which was available in the original PC was the MC146818 real
|
||||
time clock. The original device is now obsolete, and usually emulated by the
|
||||
@ -201,21 +212,21 @@ in progress, as indicated in the status register.
|
||||
The clock uses a 32.768kHz crystal, so bits 6-4 of register A should be
|
||||
programmed to a 32kHz divider if the RTC is to count seconds.
|
||||
|
||||
This is the RAM map originally used for the RTC/CMOS:
|
||||
This is the RAM map originally used for the RTC/CMOS::
|
||||
|
||||
Location Size Description
|
||||
------------------------------------------
|
||||
00h byte Current second (BCD)
|
||||
01h byte Seconds alarm (BCD)
|
||||
02h byte Current minute (BCD)
|
||||
03h byte Minutes alarm (BCD)
|
||||
04h byte Current hour (BCD)
|
||||
05h byte Hours alarm (BCD)
|
||||
06h byte Current day of week (BCD)
|
||||
07h byte Current day of month (BCD)
|
||||
08h byte Current month (BCD)
|
||||
09h byte Current year (BCD)
|
||||
0Ah byte Register A
|
||||
Location Size Description
|
||||
------------------------------------------
|
||||
00h byte Current second (BCD)
|
||||
01h byte Seconds alarm (BCD)
|
||||
02h byte Current minute (BCD)
|
||||
03h byte Minutes alarm (BCD)
|
||||
04h byte Current hour (BCD)
|
||||
05h byte Hours alarm (BCD)
|
||||
06h byte Current day of week (BCD)
|
||||
07h byte Current day of month (BCD)
|
||||
08h byte Current month (BCD)
|
||||
09h byte Current year (BCD)
|
||||
0Ah byte Register A
|
||||
bit 7 = Update in progress
|
||||
bit 6-4 = Divider for clock
|
||||
000 = 4.194 MHz
|
||||
@ -234,7 +245,7 @@ Location Size Description
|
||||
1101 = 125 mS
|
||||
1110 = 250 mS
|
||||
1111 = 500 mS
|
||||
0Bh byte Register B
|
||||
0Bh byte Register B
|
||||
bit 7 = Run (0) / Halt (1)
|
||||
bit 6 = Periodic interrupt enable
|
||||
bit 5 = Alarm interrupt enable
|
||||
@ -243,19 +254,20 @@ Location Size Description
|
||||
bit 2 = BCD calendar (0) / Binary (1)
|
||||
bit 1 = 12-hour mode (0) / 24-hour mode (1)
|
||||
bit 0 = 0 (DST off) / 1 (DST enabled)
|
||||
OCh byte Register C (read only)
|
||||
OCh byte Register C (read only)
|
||||
bit 7 = interrupt request flag (IRQF)
|
||||
bit 6 = periodic interrupt flag (PF)
|
||||
bit 5 = alarm interrupt flag (AF)
|
||||
bit 4 = update interrupt flag (UF)
|
||||
bit 3-0 = reserved
|
||||
ODh byte Register D (read only)
|
||||
ODh byte Register D (read only)
|
||||
bit 7 = RTC has power
|
||||
bit 6-0 = reserved
|
||||
32h byte Current century BCD (*)
|
||||
32h byte Current century BCD (*)
|
||||
(*) location vendor specific and now determined from ACPI global tables
|
||||
|
||||
2.3) APIC
|
||||
2.3. APIC
|
||||
---------
|
||||
|
||||
On Pentium and later processors, an on-board timer is available to each CPU
|
||||
as part of the Advanced Programmable Interrupt Controller. The APIC is
|
||||
@ -276,7 +288,8 @@ timer is programmed through the LVT (local vector timer) register, is capable
|
||||
of one-shot or periodic operation, and is based on the bus clock divided down
|
||||
by the programmable divider register.
|
||||
|
||||
2.4) HPET
|
||||
2.4. HPET
|
||||
---------
|
||||
|
||||
HPET is quite complex, and was originally intended to replace the PIT / RTC
|
||||
support of the X86 PC. It remains to be seen whether that will be the case, as
|
||||
@ -297,7 +310,8 @@ indicated through ACPI tables by the BIOS.
|
||||
Detailed specification of the HPET is beyond the current scope of this
|
||||
document, as it is also very well documented elsewhere.
|
||||
|
||||
2.5) Offboard Timers
|
||||
2.5. Offboard Timers
|
||||
--------------------
|
||||
|
||||
Several cards, both proprietary (watchdog boards) and commonplace (e1000) have
|
||||
timing chips built into the cards which may have registers which are accessible
|
||||
@ -307,9 +321,8 @@ general frowned upon as not playing by the agreed rules of the game. Such a
|
||||
timer device would require additional support to be virtualized properly and is
|
||||
not considered important at this time as no known operating system does this.
|
||||
|
||||
=========================================================================
|
||||
|
||||
3) TSC Hardware
|
||||
3. TSC Hardware
|
||||
===============
|
||||
|
||||
The TSC or time stamp counter is relatively simple in theory; it counts
|
||||
instruction cycles issued by the processor, which can be used as a measure of
|
||||
@ -340,7 +353,8 @@ allows the guest visible TSC to be offset by a constant. Newer implementations
|
||||
promise to allow the TSC to additionally be scaled, but this hardware is not
|
||||
yet widely available.
|
||||
|
||||
3.1) TSC synchronization
|
||||
3.1. TSC synchronization
|
||||
------------------------
|
||||
|
||||
The TSC is a CPU-local clock in most implementations. This means, on SMP
|
||||
platforms, the TSCs of different CPUs may start at different times depending
|
||||
@ -357,7 +371,8 @@ practice, getting a perfectly synchronized TSC will not be possible unless all
|
||||
values are read from the same clock, which generally only is possible on single
|
||||
socket systems or those with special hardware support.
|
||||
|
||||
3.2) TSC and CPU hotplug
|
||||
3.2. TSC and CPU hotplug
|
||||
------------------------
|
||||
|
||||
As touched on already, CPUs which arrive later than the boot time of the system
|
||||
may not have a TSC value that is synchronized with the rest of the system.
|
||||
@ -367,7 +382,8 @@ a guarantee. This can have the effect of bringing a system from a state where
|
||||
TSC is synchronized back to a state where TSC synchronization flaws, however
|
||||
small, may be exposed to the OS and any virtualization environment.
|
||||
|
||||
3.3) TSC and multi-socket / NUMA
|
||||
3.3. TSC and multi-socket / NUMA
|
||||
--------------------------------
|
||||
|
||||
Multi-socket systems, especially large multi-socket systems are likely to have
|
||||
individual clocksources rather than a single, universally distributed clock.
|
||||
@ -385,7 +401,8 @@ standards for telecommunications and computer equipment.
|
||||
It is recommended not to trust the TSCs to remain synchronized on NUMA or
|
||||
multiple socket systems for these reasons.
|
||||
|
||||
3.4) TSC and C-states
|
||||
3.4. TSC and C-states
|
||||
---------------------
|
||||
|
||||
C-states, or idling states of the processor, especially C1E and deeper sleep
|
||||
states may be problematic for TSC as well. The TSC may stop advancing in such
|
||||
@ -396,7 +413,8 @@ based on CPU and chipset identifications.
|
||||
The TSC in such a case may be corrected by catching it up to a known external
|
||||
clocksource.
|
||||
|
||||
3.5) TSC frequency change / P-states
|
||||
3.5. TSC frequency change / P-states
|
||||
------------------------------------
|
||||
|
||||
To make things slightly more interesting, some CPUs may change frequency. They
|
||||
may or may not run the TSC at the same rate, and because the frequency change
|
||||
@ -416,14 +434,16 @@ other processors. In such cases, the TSC on halted CPUs could advance faster
|
||||
than that of non-halted processors. AMD Turion processors are known to have
|
||||
this problem.
|
||||
|
||||
3.6) TSC and STPCLK / T-states
|
||||
3.6. TSC and STPCLK / T-states
|
||||
------------------------------
|
||||
|
||||
External signals given to the processor may also have the effect of stopping
|
||||
the TSC. This is typically done for thermal emergency power control to prevent
|
||||
an overheating condition, and typically, there is no way to detect that this
|
||||
condition has happened.
|
||||
|
||||
3.7) TSC virtualization - VMX
|
||||
3.7. TSC virtualization - VMX
|
||||
-----------------------------
|
||||
|
||||
VMX provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP
|
||||
instructions, which is enough for full virtualization of TSC in any manner. In
|
||||
@ -431,14 +451,16 @@ addition, VMX allows passing through the host TSC plus an additional TSC_OFFSET
|
||||
field specified in the VMCS. Special instructions must be used to read and
|
||||
write the VMCS field.
|
||||
|
||||
3.8) TSC virtualization - SVM
|
||||
3.8. TSC virtualization - SVM
|
||||
-----------------------------
|
||||
|
||||
SVM provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP
|
||||
instructions, which is enough for full virtualization of TSC in any manner. In
|
||||
addition, SVM allows passing through the host TSC plus an additional offset
|
||||
field specified in the SVM control block.
|
||||
|
||||
3.9) TSC feature bits in Linux
|
||||
3.9. TSC feature bits in Linux
|
||||
------------------------------
|
||||
|
||||
In summary, there is no way to guarantee the TSC remains in perfect
|
||||
synchronization unless it is explicitly guaranteed by the architecture. Even
|
||||
@ -448,13 +470,16 @@ despite being locally consistent.
|
||||
The following feature bits are used by Linux to signal various TSC attributes,
|
||||
but they can only be taken to be meaningful for UP or single node systems.
|
||||
|
||||
X86_FEATURE_TSC : The TSC is available in hardware
|
||||
X86_FEATURE_RDTSCP : The RDTSCP instruction is available
|
||||
X86_FEATURE_CONSTANT_TSC : The TSC rate is unchanged with P-states
|
||||
X86_FEATURE_NONSTOP_TSC : The TSC does not stop in C-states
|
||||
X86_FEATURE_TSC_RELIABLE : TSC sync checks are skipped (VMware)
|
||||
========================= =======================================
|
||||
X86_FEATURE_TSC The TSC is available in hardware
|
||||
X86_FEATURE_RDTSCP The RDTSCP instruction is available
|
||||
X86_FEATURE_CONSTANT_TSC The TSC rate is unchanged with P-states
|
||||
X86_FEATURE_NONSTOP_TSC The TSC does not stop in C-states
|
||||
X86_FEATURE_TSC_RELIABLE TSC sync checks are skipped (VMware)
|
||||
========================= =======================================
|
||||
|
||||
4) Virtualization Problems
|
||||
4. Virtualization Problems
|
||||
==========================
|
||||
|
||||
Timekeeping is especially problematic for virtualization because a number of
|
||||
challenges arise. The most obvious problem is that time is now shared between
|
||||
@ -473,7 +498,8 @@ BIOS, but not in such an extreme fashion. However, the fact that SMM mode may
|
||||
cause similar problems to virtualization makes it a good justification for
|
||||
solving many of these problems on bare metal.
|
||||
|
||||
4.1) Interrupt clocking
|
||||
4.1. Interrupt clocking
|
||||
-----------------------
|
||||
|
||||
One of the most immediate problems that occurs with legacy operating systems
|
||||
is that the system timekeeping routines are often designed to keep track of
|
||||
@ -502,7 +528,8 @@ thus requires interrupt slewing to keep proper time. It does use a low enough
|
||||
rate (ed: is it 18.2 Hz?) however that it has not yet been a problem in
|
||||
practice.
|
||||
|
||||
4.2) TSC sampling and serialization
|
||||
4.2. TSC sampling and serialization
|
||||
-----------------------------------
|
||||
|
||||
As the highest precision time source available, the cycle counter of the CPU
|
||||
has aroused much interest from developers. As explained above, this timer has
|
||||
@ -524,7 +551,8 @@ it may be necessary for an implementation to guard against "backwards" reads of
|
||||
the TSC as seen from other CPUs, even in an otherwise perfectly synchronized
|
||||
system.
|
||||
|
||||
4.3) Timespec aliasing
|
||||
4.3. Timespec aliasing
|
||||
----------------------
|
||||
|
||||
Additionally, this lack of serialization from the TSC poses another challenge
|
||||
when using results of the TSC when measured against another time source. As
|
||||
@ -548,7 +576,8 @@ This aliasing requires care in the computation and recalibration of kvmclock
|
||||
and any other values derived from TSC computation (such as TSC virtualization
|
||||
itself).
|
||||
|
||||
4.4) Migration
|
||||
4.4. Migration
|
||||
--------------
|
||||
|
||||
Migration of a virtual machine raises problems for timekeeping in two ways.
|
||||
First, the migration itself may take time, during which interrupts cannot be
|
||||
@ -566,7 +595,8 @@ always be caught up to the original rate. KVM clock avoids these problems by
|
||||
simply storing multipliers and offsets against the TSC for the guest to convert
|
||||
back into nanosecond resolution values.
|
||||
|
||||
4.5) Scheduling
|
||||
4.5. Scheduling
|
||||
---------------
|
||||
|
||||
Since scheduling may be based on precise timing and firing of interrupts, the
|
||||
scheduling algorithms of an operating system may be adversely affected by
|
||||
@ -579,7 +609,8 @@ In an attempt to work around this, several implementations have provided a
|
||||
paravirtualized scheduler clock, which reveals the true amount of CPU time for
|
||||
which a virtual machine has been running.
|
||||
|
||||
4.6) Watchdogs
|
||||
4.6. Watchdogs
|
||||
--------------
|
||||
|
||||
Watchdog timers, such as the lock detector in Linux may fire accidentally when
|
||||
running under hardware virtualization due to timer interrupts being delayed or
|
||||
@ -587,7 +618,8 @@ misinterpretation of the passage of real time. Usually, these warnings are
|
||||
spurious and can be ignored, but in some circumstances it may be necessary to
|
||||
disable such detection.
|
||||
|
||||
4.7) Delays and precision timing
|
||||
4.7. Delays and precision timing
|
||||
--------------------------------
|
||||
|
||||
Precise timing and delays may not be possible in a virtualized system. This
|
||||
can happen if the system is controlling physical hardware, or issues delays to
|
||||
@ -600,7 +632,8 @@ The second issue may cause performance problems, but this is unlikely to be a
|
||||
significant issue. In many cases these delays may be eliminated through
|
||||
configuration or paravirtualization.
|
||||
|
||||
4.8) Covert channels and leaks
|
||||
4.8. Covert channels and leaks
|
||||
------------------------------
|
||||
|
||||
In addition to the above problems, time information will inevitably leak to the
|
||||
guest about the host in anything but a perfect implementation of virtualized
|
File diff suppressed because it is too large
Load Diff
@ -19,7 +19,6 @@ x86-specific Documentation
|
||||
tlb
|
||||
mtrr
|
||||
pat
|
||||
intel_mpx
|
||||
intel-iommu
|
||||
intel_txt
|
||||
amd-memory-encryption
|
||||
|
93
MAINTAINERS
93
MAINTAINERS
@ -2796,11 +2796,11 @@ F: drivers/block/aoe/
|
||||
|
||||
ATHEROS 71XX/9XXX GPIO DRIVER
|
||||
M: Alban Bedel <albeu@free.fr>
|
||||
S: Maintained
|
||||
W: https://github.com/AlbanBedel/linux
|
||||
T: git git://github.com/AlbanBedel/linux
|
||||
S: Maintained
|
||||
F: drivers/gpio/gpio-ath79.c
|
||||
F: Documentation/devicetree/bindings/gpio/gpio-ath79.txt
|
||||
F: drivers/gpio/gpio-ath79.c
|
||||
|
||||
ATHEROS 71XX/9XXX USB PHY DRIVER
|
||||
M: Alban Bedel <albeu@free.fr>
|
||||
@ -3422,8 +3422,8 @@ BROADCOM BRCMSTB GPIO DRIVER
|
||||
M: Gregory Fong <gregory.0xf0@gmail.com>
|
||||
L: bcm-kernel-feedback-list@broadcom.com
|
||||
S: Supported
|
||||
F: drivers/gpio/gpio-brcmstb.c
|
||||
F: Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.txt
|
||||
F: drivers/gpio/gpio-brcmstb.c
|
||||
|
||||
BROADCOM BRCMSTB I2C DRIVER
|
||||
M: Kamal Dasu <kdasu.kdev@gmail.com>
|
||||
@ -3481,8 +3481,8 @@ BROADCOM KONA GPIO DRIVER
|
||||
M: Ray Jui <rjui@broadcom.com>
|
||||
L: bcm-kernel-feedback-list@broadcom.com
|
||||
S: Supported
|
||||
F: drivers/gpio/gpio-bcm-kona.c
|
||||
F: Documentation/devicetree/bindings/gpio/brcm,kona-gpio.txt
|
||||
F: drivers/gpio/gpio-bcm-kona.c
|
||||
|
||||
BROADCOM NETXTREME-E ROCE DRIVER
|
||||
M: Selvin Xavier <selvin.xavier@broadcom.com>
|
||||
@ -3597,8 +3597,8 @@ F: sound/pci/bt87x.c
|
||||
|
||||
BT8XXGPIO DRIVER
|
||||
M: Michael Buesch <m@bues.ch>
|
||||
W: http://bu3sch.de/btgpio.php
|
||||
S: Maintained
|
||||
W: http://bu3sch.de/btgpio.php
|
||||
F: drivers/gpio/gpio-bt8xx.c
|
||||
|
||||
BTRFS FILE SYSTEM
|
||||
@ -3649,6 +3649,7 @@ F: sound/pci/oxygen/
|
||||
|
||||
C-SKY ARCHITECTURE
|
||||
M: Guo Ren <guoren@kernel.org>
|
||||
L: linux-csky@vger.kernel.org
|
||||
T: git https://github.com/c-sky/csky-linux.git
|
||||
S: Supported
|
||||
F: arch/csky/
|
||||
@ -3909,7 +3910,7 @@ S: Supported
|
||||
F: Documentation/filesystems/ceph.txt
|
||||
F: fs/ceph/
|
||||
|
||||
CERTIFICATE HANDLING:
|
||||
CERTIFICATE HANDLING
|
||||
M: David Howells <dhowells@redhat.com>
|
||||
M: David Woodhouse <dwmw2@infradead.org>
|
||||
L: keyrings@vger.kernel.org
|
||||
@ -3919,7 +3920,7 @@ F: certs/
|
||||
F: scripts/sign-file.c
|
||||
F: scripts/extract-cert.c
|
||||
|
||||
CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM:
|
||||
CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM
|
||||
L: devel@driverdev.osuosl.org
|
||||
S: Obsolete
|
||||
F: drivers/staging/wusbcore/
|
||||
@ -5932,12 +5933,12 @@ S: Maintained
|
||||
F: drivers/media/dvb-frontends/ec100*
|
||||
|
||||
ECRYPT FILE SYSTEM
|
||||
M: Tyler Hicks <tyhicks@canonical.com>
|
||||
M: Tyler Hicks <code@tyhicks.com>
|
||||
L: ecryptfs@vger.kernel.org
|
||||
W: http://ecryptfs.org
|
||||
W: https://launchpad.net/ecryptfs
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tyhicks/ecryptfs.git
|
||||
S: Supported
|
||||
S: Odd Fixes
|
||||
F: Documentation/filesystems/ecryptfs.txt
|
||||
F: fs/ecryptfs/
|
||||
|
||||
@ -7047,7 +7048,7 @@ L: kvm@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/uio/uio_pci_generic.c
|
||||
|
||||
GENERIC VDSO LIBRARY:
|
||||
GENERIC VDSO LIBRARY
|
||||
M: Andy Lutomirski <luto@kernel.org>
|
||||
M: Thomas Gleixner <tglx@linutronix.de>
|
||||
M: Vincenzo Frascino <vincenzo.frascino@arm.com>
|
||||
@ -7143,18 +7144,18 @@ GPIO SUBSYSTEM
|
||||
M: Linus Walleij <linus.walleij@linaro.org>
|
||||
M: Bartosz Golaszewski <bgolaszewski@baylibre.com>
|
||||
L: linux-gpio@vger.kernel.org
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
|
||||
S: Maintained
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
|
||||
F: Documentation/ABI/obsolete/sysfs-gpio
|
||||
F: Documentation/ABI/testing/gpio-cdev
|
||||
F: Documentation/admin-guide/gpio/
|
||||
F: Documentation/devicetree/bindings/gpio/
|
||||
F: Documentation/driver-api/gpio/
|
||||
F: Documentation/admin-guide/gpio/
|
||||
F: Documentation/ABI/testing/gpio-cdev
|
||||
F: Documentation/ABI/obsolete/sysfs-gpio
|
||||
F: drivers/gpio/
|
||||
F: include/asm-generic/gpio.h
|
||||
F: include/linux/gpio/
|
||||
F: include/linux/gpio.h
|
||||
F: include/linux/of_gpio.h
|
||||
F: include/asm-generic/gpio.h
|
||||
F: include/uapi/linux/gpio.h
|
||||
F: tools/gpio/
|
||||
|
||||
@ -8055,8 +8056,8 @@ F: drivers/scsi/ips.*
|
||||
ICH LPC AND GPIO DRIVER
|
||||
M: Peter Tyser <ptyser@xes-inc.com>
|
||||
S: Maintained
|
||||
F: drivers/mfd/lpc_ich.c
|
||||
F: drivers/gpio/gpio-ich.c
|
||||
F: drivers/mfd/lpc_ich.c
|
||||
|
||||
ICY I2C DRIVER
|
||||
M: Max Staudt <max@enpas.org>
|
||||
@ -8392,7 +8393,7 @@ M: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
|
||||
M: Rodrigo Vivi <rodrigo.vivi@intel.com>
|
||||
L: intel-gfx@lists.freedesktop.org
|
||||
W: https://01.org/linuxgraphics/
|
||||
B: https://01.org/linuxgraphics/documentation/how-report-bugs
|
||||
B: https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs
|
||||
C: irc://chat.freenode.net/intel-gfx
|
||||
Q: http://patchwork.freedesktop.org/project/intel-gfx/
|
||||
T: git git://anongit.freedesktop.org/drm-intel
|
||||
@ -9278,7 +9279,7 @@ F: include/keys/trusted-type.h
|
||||
F: security/keys/trusted.c
|
||||
F: include/keys/trusted.h
|
||||
|
||||
KEYS/KEYRINGS:
|
||||
KEYS/KEYRINGS
|
||||
M: David Howells <dhowells@redhat.com>
|
||||
M: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
|
||||
L: keyrings@vger.kernel.org
|
||||
@ -11114,14 +11115,12 @@ S: Maintained
|
||||
F: drivers/usb/image/microtek.*
|
||||
|
||||
MIPS
|
||||
M: Ralf Baechle <ralf@linux-mips.org>
|
||||
M: Paul Burton <paulburton@kernel.org>
|
||||
M: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
|
||||
L: linux-mips@vger.kernel.org
|
||||
W: http://www.linux-mips.org/
|
||||
T: git git://git.linux-mips.org/pub/scm/ralf/linux.git
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mips/linux.git
|
||||
Q: http://patchwork.linux-mips.org/project/linux-mips/list/
|
||||
S: Supported
|
||||
S: Maintained
|
||||
F: Documentation/devicetree/bindings/mips/
|
||||
F: Documentation/mips/
|
||||
F: arch/mips/
|
||||
@ -11484,7 +11483,7 @@ F: drivers/scsi/mac_scsi.*
|
||||
F: drivers/scsi/sun3_scsi.*
|
||||
F: drivers/scsi/sun3_scsi_vme.c
|
||||
|
||||
NCSI LIBRARY:
|
||||
NCSI LIBRARY
|
||||
M: Samuel Mendoza-Jonas <sam@mendozajonas.com>
|
||||
S: Maintained
|
||||
F: net/ncsi/
|
||||
@ -12740,7 +12739,7 @@ M: Tom Joseph <tjoseph@cadence.com>
|
||||
L: linux-pci@vger.kernel.org
|
||||
S: Maintained
|
||||
F: Documentation/devicetree/bindings/pci/cdns,*.txt
|
||||
F: drivers/pci/controller/pcie-cadence*
|
||||
F: drivers/pci/controller/cadence/
|
||||
|
||||
PCI DRIVER FOR FREESCALE LAYERSCAPE
|
||||
M: Minghuan Lian <minghuan.Lian@nxp.com>
|
||||
@ -13512,7 +13511,7 @@ L: linuxppc-dev@lists.ozlabs.org
|
||||
S: Maintained
|
||||
F: drivers/block/ps3vram.c
|
||||
|
||||
PSAMPLE PACKET SAMPLING SUPPORT:
|
||||
PSAMPLE PACKET SAMPLING SUPPORT
|
||||
M: Yotam Gigi <yotam.gi@gmail.com>
|
||||
S: Maintained
|
||||
F: net/psample
|
||||
@ -14582,10 +14581,10 @@ F: drivers/media/pci/saa7146/
|
||||
F: include/media/drv-intf/saa7146*
|
||||
|
||||
SAFESETID SECURITY MODULE
|
||||
M: Micah Morton <mortonm@chromium.org>
|
||||
S: Supported
|
||||
F: security/safesetid/
|
||||
F: Documentation/admin-guide/LSM/SafeSetID.rst
|
||||
M: Micah Morton <mortonm@chromium.org>
|
||||
S: Supported
|
||||
F: security/safesetid/
|
||||
F: Documentation/admin-guide/LSM/SafeSetID.rst
|
||||
|
||||
SAMSUNG AUDIO (ASoC) DRIVERS
|
||||
M: Krzysztof Kozlowski <krzk@kernel.org>
|
||||
@ -16075,8 +16074,8 @@ F: Documentation/devicetree/bindings/reset/snps,axs10x-reset.txt
|
||||
SYNOPSYS CREG GPIO DRIVER
|
||||
M: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
|
||||
S: Maintained
|
||||
F: drivers/gpio/gpio-creg-snps.c
|
||||
F: Documentation/devicetree/bindings/gpio/snps,creg-gpio.txt
|
||||
F: drivers/gpio/gpio-creg-snps.c
|
||||
|
||||
SYNOPSYS DESIGNWARE 8250 UART DRIVER
|
||||
R: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
|
||||
@ -16087,8 +16086,8 @@ SYNOPSYS DESIGNWARE APB GPIO DRIVER
|
||||
M: Hoan Tran <hoan@os.amperecomputing.com>
|
||||
L: linux-gpio@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/gpio/gpio-dwapb.c
|
||||
F: Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt
|
||||
F: drivers/gpio/gpio-dwapb.c
|
||||
|
||||
SYNOPSYS DESIGNWARE AXI DMAC DRIVER
|
||||
M: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
|
||||
@ -16552,8 +16551,8 @@ M: Michael Jamet <michael.jamet@intel.com>
|
||||
M: Mika Westerberg <mika.westerberg@linux.intel.com>
|
||||
M: Yehezkel Bernat <YehezkelShB@gmail.com>
|
||||
L: linux-usb@vger.kernel.org
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git
|
||||
S: Maintained
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git
|
||||
F: Documentation/admin-guide/thunderbolt.rst
|
||||
F: drivers/thunderbolt/
|
||||
F: include/linux/thunderbolt.h
|
||||
@ -17080,7 +17079,7 @@ S: Maintained
|
||||
F: Documentation/admin-guide/ufs.rst
|
||||
F: fs/ufs/
|
||||
|
||||
UHID USERSPACE HID IO DRIVER:
|
||||
UHID USERSPACE HID IO DRIVER
|
||||
M: David Herrmann <dh.herrmann@googlemail.com>
|
||||
L: linux-input@vger.kernel.org
|
||||
S: Maintained
|
||||
@ -17094,18 +17093,18 @@ S: Maintained
|
||||
F: drivers/usb/common/ulpi.c
|
||||
F: include/linux/ulpi/
|
||||
|
||||
ULTRA-WIDEBAND (UWB) SUBSYSTEM:
|
||||
ULTRA-WIDEBAND (UWB) SUBSYSTEM
|
||||
L: devel@driverdev.osuosl.org
|
||||
S: Obsolete
|
||||
F: drivers/staging/uwb/
|
||||
|
||||
UNICODE SUBSYSTEM:
|
||||
UNICODE SUBSYSTEM
|
||||
M: Gabriel Krisman Bertazi <krisman@collabora.com>
|
||||
L: linux-fsdevel@vger.kernel.org
|
||||
S: Supported
|
||||
F: fs/unicode/
|
||||
|
||||
UNICORE32 ARCHITECTURE:
|
||||
UNICORE32 ARCHITECTURE
|
||||
M: Guan Xuetao <gxt@pku.edu.cn>
|
||||
W: http://mprc.pku.edu.cn/~guanxuetao/linux
|
||||
S: Maintained
|
||||
@ -17392,11 +17391,14 @@ F: drivers/usb/
|
||||
F: include/linux/usb.h
|
||||
F: include/linux/usb/
|
||||
|
||||
USB TYPEC PI3USB30532 MUX DRIVER
|
||||
M: Hans de Goede <hdegoede@redhat.com>
|
||||
USB TYPEC BUS FOR ALTERNATE MODES
|
||||
M: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||
L: linux-usb@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/usb/typec/mux/pi3usb30532.c
|
||||
F: Documentation/ABI/testing/sysfs-bus-typec
|
||||
F: Documentation/driver-api/usb/typec_bus.rst
|
||||
F: drivers/usb/typec/altmodes/
|
||||
F: include/linux/usb/typec_altmode.h
|
||||
|
||||
USB TYPEC CLASS
|
||||
M: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||
@ -17407,14 +17409,11 @@ F: Documentation/driver-api/usb/typec.rst
|
||||
F: drivers/usb/typec/
|
||||
F: include/linux/usb/typec.h
|
||||
|
||||
USB TYPEC BUS FOR ALTERNATE MODES
|
||||
M: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||
USB TYPEC PI3USB30532 MUX DRIVER
|
||||
M: Hans de Goede <hdegoede@redhat.com>
|
||||
L: linux-usb@vger.kernel.org
|
||||
S: Maintained
|
||||
F: Documentation/ABI/testing/sysfs-bus-typec
|
||||
F: Documentation/driver-api/usb/typec_bus.rst
|
||||
F: drivers/usb/typec/altmodes/
|
||||
F: include/linux/usb/typec_altmode.h
|
||||
F: drivers/usb/typec/mux/pi3usb30532.c
|
||||
|
||||
USB TYPEC PORT CONTROLLER DRIVERS
|
||||
M: Guenter Roeck <linux@roeck-us.net>
|
||||
@ -17791,7 +17790,7 @@ F: include/linux/vbox_utils.h
|
||||
F: include/uapi/linux/vbox*.h
|
||||
F: drivers/virt/vboxguest/
|
||||
|
||||
VIRTUAL BOX SHARED FOLDER VFS DRIVER:
|
||||
VIRTUAL BOX SHARED FOLDER VFS DRIVER
|
||||
M: Hans de Goede <hdegoede@redhat.com>
|
||||
L: linux-fsdevel@vger.kernel.org
|
||||
S: Maintained
|
||||
@ -18414,8 +18413,8 @@ M: Nandor Han <nandor.han@ge.com>
|
||||
M: Semi Malinen <semi.malinen@ge.com>
|
||||
L: linux-gpio@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/gpio/gpio-xra1403.c
|
||||
F: Documentation/devicetree/bindings/gpio/gpio-xra1403.txt
|
||||
F: drivers/gpio/gpio-xra1403.c
|
||||
|
||||
XTENSA XTFPGA PLATFORM SUPPORT
|
||||
M: Max Filippov <jcmvbkbc@gmail.com>
|
||||
|
6
Makefile
6
Makefile
@ -2,7 +2,7 @@
|
||||
VERSION = 5
|
||||
PATCHLEVEL = 6
|
||||
SUBLEVEL = 0
|
||||
EXTRAVERSION = -rc1
|
||||
EXTRAVERSION = -rc4
|
||||
NAME = Kleptomaniac Octopus
|
||||
|
||||
# *DOCUMENTATION*
|
||||
@ -68,6 +68,7 @@ unexport GREP_OPTIONS
|
||||
#
|
||||
# If KBUILD_VERBOSE equals 0 then the above command will be hidden.
|
||||
# If KBUILD_VERBOSE equals 1 then the above command is displayed.
|
||||
# If KBUILD_VERBOSE equals 2 then give the reason why each target is rebuilt.
|
||||
#
|
||||
# To put more focus on warnings, be less verbose as default
|
||||
# Use 'make V=1' to see the full commands
|
||||
@ -1238,7 +1239,7 @@ ifneq ($(dtstree),)
|
||||
%.dtb: include/config/kernel.release scripts_dtc
|
||||
$(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@
|
||||
|
||||
PHONY += dtbs dtbs_install dt_binding_check
|
||||
PHONY += dtbs dtbs_install dtbs_check
|
||||
dtbs dtbs_check: include/config/kernel.release scripts_dtc
|
||||
$(Q)$(MAKE) $(build)=$(dtstree)
|
||||
|
||||
@ -1258,6 +1259,7 @@ PHONY += scripts_dtc
|
||||
scripts_dtc: scripts_basic
|
||||
$(Q)$(MAKE) $(build)=scripts/dtc
|
||||
|
||||
PHONY += dt_binding_check
|
||||
dt_binding_check: scripts_dtc
|
||||
$(Q)$(MAKE) $(build)=Documentation/devicetree/bindings
|
||||
|
||||
|
@ -178,9 +178,6 @@
|
||||
phy-mode = "rgmii";
|
||||
pinctrl-0 = <&pinctrl_rgmii1 &pinctrl_rgmii1_mdio_1>;
|
||||
|
||||
snps,phy-bus-name = "stmmac";
|
||||
snps,phy-bus-id = <0>;
|
||||
snps,phy-addr = <0>;
|
||||
snps,reset-gpio = <&pio0 7 0>;
|
||||
snps,reset-active-low;
|
||||
snps,reset-delays-us = <0 10000 1000000>;
|
||||
|
@ -46,7 +46,7 @@
|
||||
/* DAC */
|
||||
format = "i2s";
|
||||
mclk-fs = <256>;
|
||||
frame-inversion = <1>;
|
||||
frame-inversion;
|
||||
cpu {
|
||||
sound-dai = <&sti_uni_player2>;
|
||||
};
|
||||
|
@ -11,8 +11,6 @@ CONFIG_SLAB=y
|
||||
CONFIG_MODULES=y
|
||||
CONFIG_MODULE_UNLOAD=y
|
||||
# CONFIG_BLK_DEV_BSG is not set
|
||||
# CONFIG_IOSCHED_DEADLINE is not set
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
CONFIG_ARCH_PXA=y
|
||||
CONFIG_ARCH_GUMSTIX=y
|
||||
CONFIG_PCCARD=y
|
||||
|
@ -25,7 +25,6 @@ CONFIG_EMBEDDED=y
|
||||
CONFIG_PROFILING=y
|
||||
CONFIG_MODULES=y
|
||||
CONFIG_MODULE_UNLOAD=y
|
||||
# CONFIG_IOSCHED_DEADLINE is not set
|
||||
CONFIG_ARCH_AXXIA=y
|
||||
CONFIG_GPIO_PCA953X=y
|
||||
CONFIG_ARM_LPAE=y
|
||||
|
@ -7,7 +7,6 @@ CONFIG_EMBEDDED=y
|
||||
CONFIG_SLOB=y
|
||||
CONFIG_JUMP_LABEL=y
|
||||
CONFIG_PARTITION_ADVANCED=y
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
CONFIG_ARCH_CLPS711X=y
|
||||
CONFIG_ARCH_AUTCPU12=y
|
||||
CONFIG_ARCH_CDB89712=y
|
||||
|
@ -17,7 +17,7 @@ CONFIG_MODULE_UNLOAD=y
|
||||
CONFIG_MODULE_FORCE_UNLOAD=y
|
||||
CONFIG_MODVERSIONS=y
|
||||
# CONFIG_BLK_DEV_BSG is not set
|
||||
CONFIG_IOSCHED_CFQ=m
|
||||
CONFIG_IOSCHED_BFQ=m
|
||||
CONFIG_ARCH_MULTI_V6=y
|
||||
#CONFIG_ARCH_MULTI_V7 is not set
|
||||
CONFIG_ARCH_CNS3XXX=y
|
||||
|
@ -43,7 +43,6 @@ CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
|
||||
CONFIG_USB_MON=y
|
||||
CONFIG_USB_STORAGE=y
|
||||
CONFIG_MMC=y
|
||||
# CONFIG_MMC_BLOCK_BOUNCE is not set
|
||||
CONFIG_MMC_PXA=y
|
||||
CONFIG_EXT3_FS=y
|
||||
CONFIG_NFS_FS=y
|
||||
|
@ -7,8 +7,6 @@ CONFIG_EXPERT=y
|
||||
# CONFIG_BASE_FULL is not set
|
||||
# CONFIG_EPOLL is not set
|
||||
CONFIG_SLOB=y
|
||||
# CONFIG_IOSCHED_DEADLINE is not set
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
CONFIG_ARCH_SA1100=y
|
||||
CONFIG_SA1100_COLLIE=y
|
||||
CONFIG_PCCARD=y
|
||||
|
@ -15,8 +15,6 @@ CONFIG_MODULE_UNLOAD=y
|
||||
CONFIG_MODULE_FORCE_UNLOAD=y
|
||||
CONFIG_MODVERSIONS=y
|
||||
CONFIG_PARTITION_ADVANCED=y
|
||||
# CONFIG_IOSCHED_DEADLINE is not set
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
CONFIG_ARCH_MULTIPLATFORM=y
|
||||
CONFIG_ARCH_MULTI_V7=n
|
||||
CONFIG_ARCH_MULTI_V5=y
|
||||
|
@ -12,8 +12,6 @@ CONFIG_EMBEDDED=y
|
||||
# CONFIG_VM_EVENT_COUNTERS is not set
|
||||
# CONFIG_SLUB_DEBUG is not set
|
||||
# CONFIG_BLK_DEV_BSG is not set
|
||||
# CONFIG_IOSCHED_DEADLINE is not set
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
# CONFIG_MMU is not set
|
||||
CONFIG_ARM_SINGLE_ARMV7M=y
|
||||
CONFIG_ARCH_EFM32=y
|
||||
|
@ -11,7 +11,6 @@ CONFIG_MODULE_UNLOAD=y
|
||||
CONFIG_MODULE_FORCE_UNLOAD=y
|
||||
# CONFIG_BLK_DEV_BSG is not set
|
||||
CONFIG_PARTITION_ADVANCED=y
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
CONFIG_ARCH_EP93XX=y
|
||||
CONFIG_CRUNCH=y
|
||||
CONFIG_MACH_ADSSPHERE=y
|
||||
|
@ -9,8 +9,6 @@ CONFIG_MODULES=y
|
||||
CONFIG_MODULE_UNLOAD=y
|
||||
CONFIG_MODULE_FORCE_UNLOAD=y
|
||||
# CONFIG_BLK_DEV_BSG is not set
|
||||
# CONFIG_IOSCHED_DEADLINE is not set
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
CONFIG_ARCH_PXA=y
|
||||
CONFIG_ARCH_PXA_ESERIES=y
|
||||
# CONFIG_ARM_THUMB is not set
|
||||
|
@ -14,7 +14,6 @@ CONFIG_MODULE_UNLOAD=y
|
||||
CONFIG_MODULE_FORCE_UNLOAD=y
|
||||
CONFIG_MODVERSIONS=y
|
||||
# CONFIG_BLK_DEV_BSG is not set
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
CONFIG_ARCH_PXA=y
|
||||
CONFIG_PXA_EZX=y
|
||||
CONFIG_NO_HZ=y
|
||||
|
@ -5,8 +5,6 @@ CONFIG_LOG_BUF_SHIFT=14
|
||||
CONFIG_BLK_DEV_INITRD=y
|
||||
CONFIG_MODULES=y
|
||||
# CONFIG_BLK_DEV_BSG is not set
|
||||
# CONFIG_IOSCHED_DEADLINE is not set
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
CONFIG_ARCH_SA1100=y
|
||||
CONFIG_SA1100_H3600=y
|
||||
CONFIG_PCCARD=y
|
||||
|
@ -10,7 +10,6 @@ CONFIG_MODULES=y
|
||||
CONFIG_MODULE_UNLOAD=y
|
||||
CONFIG_MODULE_FORCE_UNLOAD=y
|
||||
# CONFIG_BLK_DEV_BSG is not set
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
CONFIG_ARCH_PXA=y
|
||||
CONFIG_MACH_H5000=y
|
||||
CONFIG_AEABI=y
|
||||
|
@ -13,7 +13,6 @@ CONFIG_MODULE_UNLOAD=y
|
||||
CONFIG_MODULE_FORCE_UNLOAD=y
|
||||
CONFIG_MODVERSIONS=y
|
||||
# CONFIG_BLK_DEV_BSG is not set
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
CONFIG_ARCH_PXA=y
|
||||
CONFIG_MACH_INTELMOTE2=y
|
||||
CONFIG_NO_HZ=y
|
||||
|
@ -32,8 +32,6 @@ CONFIG_KPROBES=y
|
||||
CONFIG_MODULES=y
|
||||
CONFIG_MODULE_UNLOAD=y
|
||||
# CONFIG_BLK_DEV_BSG is not set
|
||||
# CONFIG_IOSCHED_DEADLINE is not set
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
CONFIG_NET=y
|
||||
CONFIG_PACKET=y
|
||||
CONFIG_UNIX=y
|
||||
|
@ -1,4 +1,3 @@
|
||||
CONFIG_CROSS_COMPILE="arm-linux-gnueabihf-"
|
||||
CONFIG_HIGH_RES_TIMERS=y
|
||||
CONFIG_PREEMPT=y
|
||||
CONFIG_BLK_DEV_INITRD=y
|
||||
@ -28,10 +27,7 @@ CONFIG_FLASH_SIZE=0x00080000
|
||||
CONFIG_ZBOOT_ROM_TEXT=0x0
|
||||
CONFIG_ZBOOT_ROM_BSS=0x0
|
||||
CONFIG_ARM_APPENDED_DTB=y
|
||||
# CONFIG_LBDAF is not set
|
||||
# CONFIG_BLK_DEV_BSG is not set
|
||||
# CONFIG_IOSCHED_DEADLINE is not set
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
CONFIG_BINFMT_FLAT=y
|
||||
CONFIG_BINFMT_ZFLAT=y
|
||||
CONFIG_BINFMT_SHARED_FLAT=y
|
||||
|
@ -9,8 +9,6 @@ CONFIG_SLAB=y
|
||||
CONFIG_MODULES=y
|
||||
CONFIG_MODULE_UNLOAD=y
|
||||
# CONFIG_BLK_DEV_BSG is not set
|
||||
# CONFIG_IOSCHED_DEADLINE is not set
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
CONFIG_ARCH_PXA=y
|
||||
CONFIG_MACH_H4700=y
|
||||
CONFIG_MACH_MAGICIAN=y
|
||||
|
@ -15,7 +15,6 @@ CONFIG_EMBEDDED=y
|
||||
# CONFIG_SLUB_DEBUG is not set
|
||||
# CONFIG_COMPAT_BRK is not set
|
||||
# CONFIG_BLK_DEV_BSG is not set
|
||||
# CONFIG_IOSCHED_DEADLINE is not set
|
||||
CONFIG_ARCH_MULTI_V4=y
|
||||
# CONFIG_ARCH_MULTI_V7 is not set
|
||||
CONFIG_ARCH_MOXART=y
|
||||
|
@ -25,8 +25,6 @@ CONFIG_MODULE_UNLOAD=y
|
||||
CONFIG_MODULE_FORCE_UNLOAD=y
|
||||
CONFIG_MODVERSIONS=y
|
||||
CONFIG_BLK_DEV_INTEGRITY=y
|
||||
# CONFIG_IOSCHED_DEADLINE is not set
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
CONFIG_NET=y
|
||||
CONFIG_PACKET=y
|
||||
CONFIG_UNIX=y
|
||||
|
@ -18,8 +18,6 @@ CONFIG_MODULES=y
|
||||
CONFIG_MODULE_UNLOAD=y
|
||||
CONFIG_MODULE_FORCE_UNLOAD=y
|
||||
# CONFIG_BLK_DEV_BSG is not set
|
||||
# CONFIG_IOSCHED_DEADLINE is not set
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
CONFIG_ARCH_OMAP=y
|
||||
CONFIG_ARCH_OMAP1=y
|
||||
CONFIG_OMAP_RESET_CLOCKS=y
|
||||
|
@ -7,8 +7,6 @@ CONFIG_SLAB=y
|
||||
CONFIG_MODULES=y
|
||||
CONFIG_MODULE_UNLOAD=y
|
||||
# CONFIG_BLK_DEV_BSG is not set
|
||||
# CONFIG_IOSCHED_DEADLINE is not set
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
CONFIG_ARCH_PXA=y
|
||||
CONFIG_ARCH_PXA_PALM=y
|
||||
# CONFIG_MACH_PALMTX is not set
|
||||
|
@ -13,8 +13,6 @@ CONFIG_MODULES=y
|
||||
CONFIG_MODULE_UNLOAD=y
|
||||
CONFIG_MODULE_FORCE_UNLOAD=y
|
||||
# CONFIG_BLK_DEV_BSG is not set
|
||||
# CONFIG_IOSCHED_DEADLINE is not set
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
CONFIG_ARCH_PXA=y
|
||||
CONFIG_MACH_PCM027=y
|
||||
CONFIG_MACH_PCM990_BASEBOARD=y
|
||||
|
@ -6,8 +6,6 @@ CONFIG_EXPERT=y
|
||||
# CONFIG_HOTPLUG is not set
|
||||
# CONFIG_SHMEM is not set
|
||||
CONFIG_MODULES=y
|
||||
# CONFIG_IOSCHED_DEADLINE is not set
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
CONFIG_ARCH_SA1100=y
|
||||
CONFIG_SA1100_PLEB=y
|
||||
CONFIG_ZBOOT_ROM_TEXT=0x0
|
||||
|
@ -8,7 +8,6 @@ CONFIG_SLAB=y
|
||||
CONFIG_MODULES=y
|
||||
CONFIG_MODULE_UNLOAD=y
|
||||
# CONFIG_BLK_DEV_BSG is not set
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
CONFIG_ARCH_MULTI_V6=y
|
||||
CONFIG_ARCH_REALVIEW=y
|
||||
CONFIG_MACH_REALVIEW_EB=y
|
||||
|
@ -14,8 +14,6 @@ CONFIG_MODULE_FORCE_LOAD=y
|
||||
CONFIG_MODULE_UNLOAD=y
|
||||
CONFIG_MODULE_FORCE_UNLOAD=y
|
||||
# CONFIG_BLK_DEV_BSG is not set
|
||||
# CONFIG_IOSCHED_DEADLINE is not set
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
CONFIG_ARCH_AT91=y
|
||||
CONFIG_SOC_SAMA5D2=y
|
||||
CONFIG_SOC_SAMA5D3=y
|
||||
@ -182,7 +180,6 @@ CONFIG_USB_GADGET=y
|
||||
CONFIG_USB_ATMEL_USBA=y
|
||||
CONFIG_USB_G_SERIAL=y
|
||||
CONFIG_MMC=y
|
||||
# CONFIG_MMC_BLOCK_BOUNCE is not set
|
||||
CONFIG_MMC_SDHCI=y
|
||||
CONFIG_MMC_SDHCI_PLTFM=y
|
||||
CONFIG_MMC_SDHCI_OF_AT91=y
|
||||
|
@ -14,8 +14,6 @@ CONFIG_EMBEDDED=y
|
||||
# CONFIG_VM_EVENT_COUNTERS is not set
|
||||
# CONFIG_SLUB_DEBUG is not set
|
||||
# CONFIG_BLK_DEV_BSG is not set
|
||||
# CONFIG_IOSCHED_DEADLINE is not set
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
# CONFIG_MMU is not set
|
||||
CONFIG_ARCH_STM32=y
|
||||
CONFIG_CPU_V7M_NUM_IRQ=240
|
||||
|
@ -85,6 +85,7 @@ CONFIG_BATTERY_AXP20X=y
|
||||
CONFIG_AXP20X_POWER=y
|
||||
CONFIG_THERMAL=y
|
||||
CONFIG_CPU_THERMAL=y
|
||||
CONFIG_SUN8I_THERMAL=y
|
||||
CONFIG_WATCHDOG=y
|
||||
CONFIG_SUNXI_WATCHDOG=y
|
||||
CONFIG_MFD_AC100=y
|
||||
|
@ -11,7 +11,6 @@ CONFIG_MODULES=y
|
||||
CONFIG_MODULE_UNLOAD=y
|
||||
# CONFIG_BLK_DEV_BSG is not set
|
||||
CONFIG_PARTITION_ADVANCED=y
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
# CONFIG_ARCH_MULTI_V7 is not set
|
||||
CONFIG_ARCH_U300=y
|
||||
CONFIG_MACH_U300_SPIDUMMY=y
|
||||
@ -46,7 +45,6 @@ CONFIG_FB=y
|
||||
CONFIG_BACKLIGHT_CLASS_DEVICE=y
|
||||
# CONFIG_USB_SUPPORT is not set
|
||||
CONFIG_MMC=y
|
||||
# CONFIG_MMC_BLOCK_BOUNCE is not set
|
||||
CONFIG_MMC_ARMMMCI=y
|
||||
CONFIG_RTC_CLASS=y
|
||||
# CONFIG_RTC_HCTOSYS is not set
|
||||
|
@ -15,8 +15,6 @@ CONFIG_OPROFILE=y
|
||||
CONFIG_MODULES=y
|
||||
CONFIG_MODULE_UNLOAD=y
|
||||
# CONFIG_BLK_DEV_BSG is not set
|
||||
# CONFIG_IOSCHED_DEADLINE is not set
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
CONFIG_ARCH_VEXPRESS=y
|
||||
CONFIG_ARCH_VEXPRESS_DCSCB=y
|
||||
CONFIG_ARCH_VEXPRESS_TC2_PM=y
|
||||
|
@ -9,7 +9,6 @@ CONFIG_SLAB=y
|
||||
CONFIG_MODULES=y
|
||||
CONFIG_MODULE_UNLOAD=y
|
||||
# CONFIG_BLK_DEV_BSG is not set
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
CONFIG_ARCH_PXA=y
|
||||
CONFIG_ARCH_VIPER=y
|
||||
CONFIG_IWMMXT=y
|
||||
|
@ -4,7 +4,6 @@ CONFIG_LOG_BUF_SHIFT=13
|
||||
CONFIG_MODULES=y
|
||||
CONFIG_MODULE_UNLOAD=y
|
||||
# CONFIG_BLK_DEV_BSG is not set
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
CONFIG_ARCH_PXA=y
|
||||
CONFIG_MACH_ARCOM_ZEUS=y
|
||||
CONFIG_PCCARD=m
|
||||
@ -137,7 +136,6 @@ CONFIG_USB_MASS_STORAGE=m
|
||||
CONFIG_USB_G_SERIAL=m
|
||||
CONFIG_USB_G_PRINTER=m
|
||||
CONFIG_MMC=y
|
||||
# CONFIG_MMC_BLOCK_BOUNCE is not set
|
||||
CONFIG_MMC_PXA=y
|
||||
CONFIG_NEW_LEDS=y
|
||||
CONFIG_LEDS_CLASS=m
|
||||
|
@ -16,7 +16,6 @@ CONFIG_EMBEDDED=y
|
||||
CONFIG_PERF_EVENTS=y
|
||||
CONFIG_SLAB=y
|
||||
# CONFIG_BLK_DEV_BSG is not set
|
||||
# CONFIG_IOSCHED_CFQ is not set
|
||||
CONFIG_ARCH_ZX=y
|
||||
CONFIG_SOC_ZX296702=y
|
||||
# CONFIG_SWP_EMULATE is not set
|
||||
|
@ -392,9 +392,6 @@ static inline void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {}
|
||||
|
||||
static inline void kvm_arm_vhe_guest_enter(void) {}
|
||||
static inline void kvm_arm_vhe_guest_exit(void) {}
|
||||
|
||||
#define KVM_BP_HARDEN_UNKNOWN -1
|
||||
#define KVM_BP_HARDEN_WA_NEEDED 0
|
||||
#define KVM_BP_HARDEN_NOT_REQUIRED 1
|
||||
|
@ -78,13 +78,10 @@ static int ftrace_modify_code(unsigned long pc, unsigned long old,
|
||||
{
|
||||
unsigned long replaced;
|
||||
|
||||
if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) {
|
||||
if (IS_ENABLED(CONFIG_THUMB2_KERNEL))
|
||||
old = __opcode_to_mem_thumb32(old);
|
||||
new = __opcode_to_mem_thumb32(new);
|
||||
} else {
|
||||
else
|
||||
old = __opcode_to_mem_arm(old);
|
||||
new = __opcode_to_mem_arm(new);
|
||||
}
|
||||
|
||||
if (validate) {
|
||||
if (probe_kernel_read(&replaced, (void *)pc, MCOUNT_INSN_SIZE))
|
||||
|
@ -16,10 +16,10 @@ struct patch {
|
||||
unsigned int insn;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
static DEFINE_RAW_SPINLOCK(patch_lock);
|
||||
|
||||
static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
|
||||
__acquires(&patch_lock)
|
||||
{
|
||||
unsigned int uintaddr = (uintptr_t) addr;
|
||||
bool module = !core_kernel_text(uintaddr);
|
||||
@ -34,8 +34,6 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
|
||||
|
||||
if (flags)
|
||||
raw_spin_lock_irqsave(&patch_lock, *flags);
|
||||
else
|
||||
__acquire(&patch_lock);
|
||||
|
||||
set_fixmap(fixmap, page_to_phys(page));
|
||||
|
||||
@ -43,15 +41,19 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
|
||||
}
|
||||
|
||||
static void __kprobes patch_unmap(int fixmap, unsigned long *flags)
|
||||
__releases(&patch_lock)
|
||||
{
|
||||
clear_fixmap(fixmap);
|
||||
|
||||
if (flags)
|
||||
raw_spin_unlock_irqrestore(&patch_lock, *flags);
|
||||
else
|
||||
__release(&patch_lock);
|
||||
}
|
||||
#else
|
||||
static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
|
||||
{
|
||||
return addr;
|
||||
}
|
||||
static void __kprobes patch_unmap(int fixmap, unsigned long *flags) { }
|
||||
#endif
|
||||
|
||||
void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap)
|
||||
{
|
||||
@ -64,8 +66,6 @@ void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap)
|
||||
|
||||
if (remap)
|
||||
waddr = patch_map(addr, FIX_TEXT_POKE0, &flags);
|
||||
else
|
||||
__acquire(&patch_lock);
|
||||
|
||||
if (thumb2 && __opcode_is_thumb16(insn)) {
|
||||
*(u16 *)waddr = __opcode_to_mem_thumb16(insn);
|
||||
@ -102,8 +102,7 @@ void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap)
|
||||
if (waddr != addr) {
|
||||
flush_kernel_vmap_range(waddr, twopage ? size / 2 : size);
|
||||
patch_unmap(FIX_TEXT_POKE0, &flags);
|
||||
} else
|
||||
__release(&patch_lock);
|
||||
}
|
||||
|
||||
flush_icache_range((uintptr_t)(addr),
|
||||
(uintptr_t)(addr) + size);
|
||||
|
@ -11,7 +11,7 @@ config ARCH_NPCM7XX
|
||||
depends on ARCH_MULTI_V7
|
||||
select PINCTRL_NPCM7XX
|
||||
select NPCM7XX_TIMER
|
||||
select ARCH_REQUIRE_GPIOLIB
|
||||
select GPIOLIB
|
||||
select CACHE_L2X0
|
||||
select ARM_GIC
|
||||
select HAVE_ARM_TWD if SMP
|
||||
|
@ -161,10 +161,10 @@
|
||||
bus-range = <0x0 0x1>;
|
||||
reg = <0x0 0x40000000 0x0 0x10000000>;
|
||||
ranges = <0x2000000 0x0 0x50000000 0x0 0x50000000 0x0 0x10000000>;
|
||||
interrupt-map = <0 0 0 1 &gic GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>,
|
||||
<0 0 0 2 &gic GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>,
|
||||
<0 0 0 3 &gic GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>,
|
||||
<0 0 0 4 &gic GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>;
|
||||
interrupt-map = <0 0 0 1 &gic 0 0 GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>,
|
||||
<0 0 0 2 &gic 0 0 GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>,
|
||||
<0 0 0 3 &gic 0 0 GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>,
|
||||
<0 0 0 4 &gic 0 0 GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>;
|
||||
interrupt-map-mask = <0x0 0x0 0x0 0x7>;
|
||||
msi-map = <0x0 &its 0x0 0x10000>;
|
||||
iommu-map = <0x0 &smmu 0x0 0x10000>;
|
||||
|
@ -452,6 +452,7 @@ CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y
|
||||
CONFIG_CPU_THERMAL=y
|
||||
CONFIG_THERMAL_EMULATION=y
|
||||
CONFIG_QORIQ_THERMAL=m
|
||||
CONFIG_SUN8I_THERMAL=y
|
||||
CONFIG_ROCKCHIP_THERMAL=m
|
||||
CONFIG_RCAR_THERMAL=y
|
||||
CONFIG_RCAR_GEN3_THERMAL=y
|
||||
@ -547,6 +548,7 @@ CONFIG_ROCKCHIP_DW_MIPI_DSI=y
|
||||
CONFIG_ROCKCHIP_INNO_HDMI=y
|
||||
CONFIG_DRM_RCAR_DU=m
|
||||
CONFIG_DRM_SUN4I=m
|
||||
CONFIG_DRM_SUN6I_DSI=m
|
||||
CONFIG_DRM_SUN8I_DW_HDMI=m
|
||||
CONFIG_DRM_SUN8I_MIXER=m
|
||||
CONFIG_DRM_MSM=m
|
||||
@ -681,7 +683,7 @@ CONFIG_RTC_DRV_SNVS=m
|
||||
CONFIG_RTC_DRV_IMX_SC=m
|
||||
CONFIG_RTC_DRV_XGENE=y
|
||||
CONFIG_DMADEVICES=y
|
||||
CONFIG_DMA_BCM2835=m
|
||||
CONFIG_DMA_BCM2835=y
|
||||
CONFIG_DMA_SUN6I=m
|
||||
CONFIG_FSL_EDMA=y
|
||||
CONFIG_IMX_SDMA=y
|
||||
|
@ -32,7 +32,7 @@ static inline void gic_write_eoir(u32 irq)
|
||||
isb();
|
||||
}
|
||||
|
||||
static inline void gic_write_dir(u32 irq)
|
||||
static __always_inline void gic_write_dir(u32 irq)
|
||||
{
|
||||
write_sysreg_s(irq, SYS_ICC_DIR_EL1);
|
||||
isb();
|
||||
|
@ -69,7 +69,7 @@ static inline int icache_is_aliasing(void)
|
||||
return test_bit(ICACHEF_ALIASING, &__icache_flags);
|
||||
}
|
||||
|
||||
static inline int icache_is_vpipt(void)
|
||||
static __always_inline int icache_is_vpipt(void)
|
||||
{
|
||||
return test_bit(ICACHEF_VPIPT, &__icache_flags);
|
||||
}
|
||||
|
@ -145,7 +145,7 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
|
||||
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
|
||||
extern void flush_dcache_page(struct page *);
|
||||
|
||||
static inline void __flush_icache_all(void)
|
||||
static __always_inline void __flush_icache_all(void)
|
||||
{
|
||||
if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
|
||||
return;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user