mirror of
https://github.com/qemu/qemu.git
synced 2025-01-22 13:33:25 +08:00
virtio, pci, pc: fixes, features
Bugfixes all over the place. HMAT support. New flags for vhost-user-blk utility. Auto-tuning of seg max for virtio storage. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> -----BEGIN PGP SIGNATURE----- iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAl4TaMEPHG1zdEByZWRo YXQuY29tAAoJECgfDbjSjVRpvzgH/2LyDAzCa9h93ikSJjmyUk5FUaqve38daEb3 S3JYjwKxQx7u1ydooKhvBQnBCZ2i3S+k62gfYyKB+nBv8xvjs0Eg5D1YJ5E8hciy lf5OFGWWtX2iPDjZwQwT13kiJe0o3JRGxJJ6XqTEG+1EYOp7cky/FEv4PD030b9m I2wROZ/Am+onB9YJX8c0Vv1CG+AryuJNXnvwQzTXEjj4U7bEYUyJwVZaCRyAdWQ3 uYXIZN9VwjVX6BFvy9ZAJbEsUVJvOM1/aQaDqcrLz+VlzRT7bRkKHi2G3vakrm1I r5OpgyLo84132awCncbSykKDH5o8WaxLaJBjGmuBfasMz9wPzAg= =uL1o -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging virtio, pci, pc: fixes, features Bugfixes all over the place. HMAT support. New flags for vhost-user-blk utility. Auto-tuning of seg max for virtio storage. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # gpg: Signature made Mon 06 Jan 2020 17:05:05 GMT # gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469 # gpg: issuer "mst@redhat.com" # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [full] # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [full] # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * remotes/mst/tags/for_upstream: (32 commits) intel_iommu: add present bit check for pasid table entries intel_iommu: a fix to vtd_find_as_from_bus_num() virtio-net: delete also control queue when TX/RX deleted virtio: reset region cache when on queue deletion virtio-mmio: update queue size on guest write tests: add virtio-scsi and virtio-blk seg_max_adjust test virtio: make seg_max virtqueue size dependent hw: fix using 4.2 compat in 5.0 machine types for i440fx/q35 vhost-user-scsi: reset the device if supported vhost-user: add VHOST_USER_RESET_DEVICE to reset devices hw/pci/pci_host: Let pci_data_[read/write] use unsigned 'size' argument hw/pci/pci_host: Remove redundant PCI_DPRINTF() virtio-mmio: Clear v2 transport state on soft reset ACPI: add expected files for HMAT tests (acpihmat) tests/bios-tables-test: add test cases for ACPI HMAT tests/numa: Add case for QMP build HMAT hmat acpi: Build Memory Side Cache Information Structure(s) hmat acpi: Build System Locality Latency and Bandwidth Information Structure(s) hmat acpi: Build Memory Proximity Domain Attributes Structure(s) numa: Extend CLI to provide memory side cache information ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
973d306dd6
@ -576,70 +576,90 @@ vub_new(char *blk_file)
|
||||
return vdev_blk;
|
||||
}
|
||||
|
||||
static int opt_fdnum = -1;
|
||||
static char *opt_socket_path;
|
||||
static char *opt_blk_file;
|
||||
static gboolean opt_print_caps;
|
||||
static gboolean opt_read_only;
|
||||
|
||||
static GOptionEntry entries[] = {
|
||||
{ "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps,
|
||||
"Print capabilities", NULL },
|
||||
{ "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum,
|
||||
"Use inherited fd socket", "FDNUM" },
|
||||
{ "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path,
|
||||
"Use UNIX socket path", "PATH" },
|
||||
{"blk-file", 'b', 0, G_OPTION_ARG_FILENAME, &opt_blk_file,
|
||||
"block device or file path", "PATH"},
|
||||
{ "read-only", 'r', 0, G_OPTION_ARG_NONE, &opt_read_only,
|
||||
"Enable read-only", NULL }
|
||||
};
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int opt;
|
||||
char *unix_socket = NULL;
|
||||
char *blk_file = NULL;
|
||||
bool enable_ro = false;
|
||||
int lsock = -1, csock = -1;
|
||||
VubDev *vdev_blk = NULL;
|
||||
GError *error = NULL;
|
||||
GOptionContext *context;
|
||||
|
||||
while ((opt = getopt(argc, argv, "b:rs:h")) != -1) {
|
||||
switch (opt) {
|
||||
case 'b':
|
||||
blk_file = g_strdup(optarg);
|
||||
break;
|
||||
case 's':
|
||||
unix_socket = g_strdup(optarg);
|
||||
break;
|
||||
case 'r':
|
||||
enable_ro = true;
|
||||
break;
|
||||
case 'h':
|
||||
default:
|
||||
printf("Usage: %s [ -b block device or file, -s UNIX domain socket"
|
||||
" | -r Enable read-only ] | [ -h ]\n", argv[0]);
|
||||
return 0;
|
||||
context = g_option_context_new(NULL);
|
||||
g_option_context_add_main_entries(context, entries, NULL);
|
||||
if (!g_option_context_parse(context, &argc, &argv, &error)) {
|
||||
g_printerr("Option parsing failed: %s\n", error->message);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (opt_print_caps) {
|
||||
g_print("{\n");
|
||||
g_print(" \"type\": \"block\",\n");
|
||||
g_print(" \"features\": [\n");
|
||||
g_print(" \"read-only\",\n");
|
||||
g_print(" \"blk-file\"\n");
|
||||
g_print(" ]\n");
|
||||
g_print("}\n");
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
if (!opt_blk_file) {
|
||||
g_print("%s\n", g_option_context_get_help(context, true, NULL));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (opt_socket_path) {
|
||||
lsock = unix_sock_new(opt_socket_path);
|
||||
if (lsock < 0) {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
} else if (opt_fdnum < 0) {
|
||||
g_print("%s\n", g_option_context_get_help(context, true, NULL));
|
||||
exit(EXIT_FAILURE);
|
||||
} else {
|
||||
lsock = opt_fdnum;
|
||||
}
|
||||
|
||||
if (!unix_socket || !blk_file) {
|
||||
printf("Usage: %s [ -b block device or file, -s UNIX domain socket"
|
||||
" | -r Enable read-only ] | [ -h ]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
lsock = unix_sock_new(unix_socket);
|
||||
if (lsock < 0) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
csock = accept(lsock, (void *)0, (void *)0);
|
||||
csock = accept(lsock, NULL, NULL);
|
||||
if (csock < 0) {
|
||||
fprintf(stderr, "Accept error %s\n", strerror(errno));
|
||||
goto err;
|
||||
g_printerr("Accept error %s\n", strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
vdev_blk = vub_new(blk_file);
|
||||
vdev_blk = vub_new(opt_blk_file);
|
||||
if (!vdev_blk) {
|
||||
goto err;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (enable_ro) {
|
||||
if (opt_read_only) {
|
||||
vdev_blk->enable_ro = true;
|
||||
}
|
||||
|
||||
if (!vug_init(&vdev_blk->parent, VHOST_USER_BLK_MAX_QUEUES, csock,
|
||||
vub_panic_cb, &vub_iface)) {
|
||||
fprintf(stderr, "Failed to initialized libvhost-user-glib\n");
|
||||
goto err;
|
||||
g_printerr("Failed to initialize libvhost-user-glib\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
g_main_loop_run(vdev_blk->loop);
|
||||
|
||||
g_main_loop_unref(vdev_blk->loop);
|
||||
g_option_context_free(context);
|
||||
vug_deinit(&vdev_blk->parent);
|
||||
|
||||
err:
|
||||
vub_free(vdev_blk);
|
||||
if (csock >= 0) {
|
||||
close(csock);
|
||||
@ -647,8 +667,8 @@ err:
|
||||
if (lsock >= 0) {
|
||||
close(lsock);
|
||||
}
|
||||
g_free(unix_socket);
|
||||
g_free(blk_file);
|
||||
g_free(opt_socket_path);
|
||||
g_free(opt_blk_file);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -54,6 +54,37 @@
|
||||
]
|
||||
}
|
||||
|
||||
##
|
||||
# @VHostUserBackendBlockFeature:
|
||||
#
|
||||
# List of vhost user "block" features.
|
||||
#
|
||||
# @read-only: The --read-only command line option is supported.
|
||||
# @blk-file: The --blk-file command line option is supported.
|
||||
#
|
||||
# Since: 5.0
|
||||
##
|
||||
{
|
||||
'enum': 'VHostUserBackendBlockFeature',
|
||||
'data': [ 'read-only', 'blk-file' ]
|
||||
}
|
||||
|
||||
##
|
||||
# @VHostUserBackendCapabilitiesBlock:
|
||||
#
|
||||
# Capabilities reported by vhost user "block" backends
|
||||
#
|
||||
# @features: list of supported features.
|
||||
#
|
||||
# Since: 5.0
|
||||
##
|
||||
{
|
||||
'struct': 'VHostUserBackendCapabilitiesBlock',
|
||||
'data': {
|
||||
'features': [ 'VHostUserBackendBlockFeature' ]
|
||||
}
|
||||
}
|
||||
|
||||
##
|
||||
# @VHostUserBackendInputFeature:
|
||||
#
|
||||
|
@ -785,6 +785,7 @@ Protocol features
|
||||
#define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10
|
||||
#define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11
|
||||
#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12
|
||||
#define VHOST_USER_PROTOCOL_F_RESET_DEVICE 13
|
||||
|
||||
Master message types
|
||||
--------------------
|
||||
@ -1190,6 +1191,20 @@ Master message types
|
||||
ancillary data. The GPU protocol is used to inform the master of
|
||||
rendering state and updates. See vhost-user-gpu.rst for details.
|
||||
|
||||
``VHOST_USER_RESET_DEVICE``
|
||||
:id: 34
|
||||
:equivalent ioctl: N/A
|
||||
:master payload: N/A
|
||||
:slave payload: N/A
|
||||
|
||||
Ask the vhost user backend to disable all rings and reset all
|
||||
internal device state to the initial state, ready to be
|
||||
reinitialized. The backend retains ownership of the device
|
||||
throughout the reset operation.
|
||||
|
||||
Only valid if the ``VHOST_USER_PROTOCOL_F_RESET_DEVICE`` protocol
|
||||
feature is set by the backend.
|
||||
|
||||
Slave message types
|
||||
-------------------
|
||||
|
||||
@ -1376,3 +1391,20 @@ Command line options:
|
||||
Enable virgl rendering support.
|
||||
|
||||
(optional)
|
||||
|
||||
vhost-user-blk
|
||||
--------------
|
||||
|
||||
Command line options:
|
||||
|
||||
--blk-file=PATH
|
||||
|
||||
Specify block device or file path.
|
||||
|
||||
(optional)
|
||||
|
||||
--read-only
|
||||
|
||||
Enable read-only.
|
||||
|
||||
(optional)
|
||||
|
@ -7,6 +7,7 @@ config ACPI_X86
|
||||
select ACPI_NVDIMM
|
||||
select ACPI_CPU_HOTPLUG
|
||||
select ACPI_MEMORY_HOTPLUG
|
||||
select ACPI_HMAT
|
||||
|
||||
config ACPI_X86_ICH
|
||||
bool
|
||||
@ -23,6 +24,10 @@ config ACPI_NVDIMM
|
||||
bool
|
||||
depends on ACPI
|
||||
|
||||
config ACPI_HMAT
|
||||
bool
|
||||
depends on ACPI
|
||||
|
||||
config ACPI_PCI
|
||||
bool
|
||||
depends on ACPI && PCI
|
||||
@ -33,5 +38,3 @@ config ACPI_VMGENID
|
||||
depends on PC
|
||||
|
||||
config ACPI_HW_REDUCED
|
||||
bool
|
||||
depends on ACPI
|
||||
|
@ -7,6 +7,7 @@ common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o
|
||||
common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
|
||||
common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o
|
||||
common-obj-$(CONFIG_ACPI_HW_REDUCED) += generic_event_device.o
|
||||
common-obj-$(CONFIG_ACPI_HMAT) += hmat.o
|
||||
common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o
|
||||
common-obj-$(call lnot,$(CONFIG_PC)) += acpi-x86-stub.o
|
||||
|
||||
|
268
hw/acpi/hmat.c
Normal file
268
hw/acpi/hmat.c
Normal file
@ -0,0 +1,268 @@
|
||||
/*
|
||||
* HMAT ACPI Implementation
|
||||
*
|
||||
* Copyright(C) 2019 Intel Corporation.
|
||||
*
|
||||
* Author:
|
||||
* Liu jingqi <jingqi.liu@linux.intel.com>
|
||||
* Tao Xu <tao3.xu@intel.com>
|
||||
*
|
||||
* HMAT is defined in ACPI 6.3: 5.2.27 Heterogeneous Memory Attribute Table
|
||||
* (HMAT)
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/units.h"
|
||||
#include "sysemu/numa.h"
|
||||
#include "hw/acpi/hmat.h"
|
||||
|
||||
/*
|
||||
* ACPI 6.3:
|
||||
* 5.2.27.3 Memory Proximity Domain Attributes Structure: Table 5-145
|
||||
*/
|
||||
static void build_hmat_mpda(GArray *table_data, uint16_t flags,
|
||||
uint32_t initiator, uint32_t mem_node)
|
||||
{
|
||||
|
||||
/* Memory Proximity Domain Attributes Structure */
|
||||
/* Type */
|
||||
build_append_int_noprefix(table_data, 0, 2);
|
||||
/* Reserved */
|
||||
build_append_int_noprefix(table_data, 0, 2);
|
||||
/* Length */
|
||||
build_append_int_noprefix(table_data, 40, 4);
|
||||
/* Flags */
|
||||
build_append_int_noprefix(table_data, flags, 2);
|
||||
/* Reserved */
|
||||
build_append_int_noprefix(table_data, 0, 2);
|
||||
/* Proximity Domain for the Attached Initiator */
|
||||
build_append_int_noprefix(table_data, initiator, 4);
|
||||
/* Proximity Domain for the Memory */
|
||||
build_append_int_noprefix(table_data, mem_node, 4);
|
||||
/* Reserved */
|
||||
build_append_int_noprefix(table_data, 0, 4);
|
||||
/*
|
||||
* Reserved:
|
||||
* Previously defined as the Start Address of the System Physical
|
||||
* Address Range. Deprecated since ACPI Spec 6.3.
|
||||
*/
|
||||
build_append_int_noprefix(table_data, 0, 8);
|
||||
/*
|
||||
* Reserved:
|
||||
* Previously defined as the Range Length of the region in bytes.
|
||||
* Deprecated since ACPI Spec 6.3.
|
||||
*/
|
||||
build_append_int_noprefix(table_data, 0, 8);
|
||||
}
|
||||
|
||||
/*
|
||||
* ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
|
||||
* Structure: Table 5-146
|
||||
*/
|
||||
static void build_hmat_lb(GArray *table_data, HMAT_LB_Info *hmat_lb,
|
||||
uint32_t num_initiator, uint32_t num_target,
|
||||
uint32_t *initiator_list)
|
||||
{
|
||||
int i, index;
|
||||
HMAT_LB_Data *lb_data;
|
||||
uint16_t *entry_list;
|
||||
uint32_t base;
|
||||
/* Length in bytes for entire structure */
|
||||
uint32_t lb_length
|
||||
= 32 /* Table length upto and including Entry Base Unit */
|
||||
+ 4 * num_initiator /* Initiator Proximity Domain List */
|
||||
+ 4 * num_target /* Target Proximity Domain List */
|
||||
+ 2 * num_initiator * num_target; /* Latency or Bandwidth Entries */
|
||||
|
||||
/* Type */
|
||||
build_append_int_noprefix(table_data, 1, 2);
|
||||
/* Reserved */
|
||||
build_append_int_noprefix(table_data, 0, 2);
|
||||
/* Length */
|
||||
build_append_int_noprefix(table_data, lb_length, 4);
|
||||
/* Flags: Bits [3:0] Memory Hierarchy, Bits[7:4] Reserved */
|
||||
assert(!(hmat_lb->hierarchy >> 4));
|
||||
build_append_int_noprefix(table_data, hmat_lb->hierarchy, 1);
|
||||
/* Data Type */
|
||||
build_append_int_noprefix(table_data, hmat_lb->data_type, 1);
|
||||
/* Reserved */
|
||||
build_append_int_noprefix(table_data, 0, 2);
|
||||
/* Number of Initiator Proximity Domains (s) */
|
||||
build_append_int_noprefix(table_data, num_initiator, 4);
|
||||
/* Number of Target Proximity Domains (t) */
|
||||
build_append_int_noprefix(table_data, num_target, 4);
|
||||
/* Reserved */
|
||||
build_append_int_noprefix(table_data, 0, 4);
|
||||
|
||||
/* Entry Base Unit */
|
||||
if (hmat_lb->data_type <= HMAT_LB_DATA_WRITE_LATENCY) {
|
||||
/* Convert latency base from nanoseconds to picosecond */
|
||||
base = hmat_lb->base * 1000;
|
||||
} else {
|
||||
/* Convert bandwidth base from Byte to Megabyte */
|
||||
base = hmat_lb->base / MiB;
|
||||
}
|
||||
build_append_int_noprefix(table_data, base, 8);
|
||||
|
||||
/* Initiator Proximity Domain List */
|
||||
for (i = 0; i < num_initiator; i++) {
|
||||
build_append_int_noprefix(table_data, initiator_list[i], 4);
|
||||
}
|
||||
|
||||
/* Target Proximity Domain List */
|
||||
for (i = 0; i < num_target; i++) {
|
||||
build_append_int_noprefix(table_data, i, 4);
|
||||
}
|
||||
|
||||
/* Latency or Bandwidth Entries */
|
||||
entry_list = g_malloc0(num_initiator * num_target * sizeof(uint16_t));
|
||||
for (i = 0; i < hmat_lb->list->len; i++) {
|
||||
lb_data = &g_array_index(hmat_lb->list, HMAT_LB_Data, i);
|
||||
index = lb_data->initiator * num_target + lb_data->target;
|
||||
|
||||
entry_list[index] = (uint16_t)(lb_data->data / hmat_lb->base);
|
||||
}
|
||||
|
||||
for (i = 0; i < num_initiator * num_target; i++) {
|
||||
build_append_int_noprefix(table_data, entry_list[i], 2);
|
||||
}
|
||||
|
||||
g_free(entry_list);
|
||||
}
|
||||
|
||||
/* ACPI 6.3: 5.2.27.5 Memory Side Cache Information Structure: Table 5-147 */
|
||||
static void build_hmat_cache(GArray *table_data, uint8_t total_levels,
|
||||
NumaHmatCacheOptions *hmat_cache)
|
||||
{
|
||||
/*
|
||||
* Cache Attributes: Bits [3:0] – Total Cache Levels
|
||||
* for this Memory Proximity Domain
|
||||
*/
|
||||
uint32_t cache_attr = total_levels;
|
||||
|
||||
/* Bits [7:4] : Cache Level described in this structure */
|
||||
cache_attr |= (uint32_t) hmat_cache->level << 4;
|
||||
|
||||
/* Bits [11:8] - Cache Associativity */
|
||||
cache_attr |= (uint32_t) hmat_cache->associativity << 8;
|
||||
|
||||
/* Bits [15:12] - Write Policy */
|
||||
cache_attr |= (uint32_t) hmat_cache->policy << 12;
|
||||
|
||||
/* Bits [31:16] - Cache Line size in bytes */
|
||||
cache_attr |= (uint32_t) hmat_cache->line << 16;
|
||||
|
||||
/* Type */
|
||||
build_append_int_noprefix(table_data, 2, 2);
|
||||
/* Reserved */
|
||||
build_append_int_noprefix(table_data, 0, 2);
|
||||
/* Length */
|
||||
build_append_int_noprefix(table_data, 32, 4);
|
||||
/* Proximity Domain for the Memory */
|
||||
build_append_int_noprefix(table_data, hmat_cache->node_id, 4);
|
||||
/* Reserved */
|
||||
build_append_int_noprefix(table_data, 0, 4);
|
||||
/* Memory Side Cache Size */
|
||||
build_append_int_noprefix(table_data, hmat_cache->size, 8);
|
||||
/* Cache Attributes */
|
||||
build_append_int_noprefix(table_data, cache_attr, 4);
|
||||
/* Reserved */
|
||||
build_append_int_noprefix(table_data, 0, 2);
|
||||
/*
|
||||
* Number of SMBIOS handles (n)
|
||||
* Linux kernel uses Memory Side Cache Information Structure
|
||||
* without SMBIOS entries for now, so set Number of SMBIOS handles
|
||||
* as 0.
|
||||
*/
|
||||
build_append_int_noprefix(table_data, 0, 2);
|
||||
}
|
||||
|
||||
/* Build HMAT sub table structures */
|
||||
static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state)
|
||||
{
|
||||
uint16_t flags;
|
||||
uint32_t num_initiator = 0;
|
||||
uint32_t initiator_list[MAX_NODES];
|
||||
int i, hierarchy, type, cache_level, total_levels;
|
||||
HMAT_LB_Info *hmat_lb;
|
||||
NumaHmatCacheOptions *hmat_cache;
|
||||
|
||||
for (i = 0; i < numa_state->num_nodes; i++) {
|
||||
flags = 0;
|
||||
|
||||
if (numa_state->nodes[i].initiator < MAX_NODES) {
|
||||
flags |= HMAT_PROXIMITY_INITIATOR_VALID;
|
||||
}
|
||||
|
||||
build_hmat_mpda(table_data, flags, numa_state->nodes[i].initiator, i);
|
||||
}
|
||||
|
||||
for (i = 0; i < numa_state->num_nodes; i++) {
|
||||
if (numa_state->nodes[i].has_cpu) {
|
||||
initiator_list[num_initiator++] = i;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
|
||||
* Structure: Table 5-146
|
||||
*/
|
||||
for (hierarchy = HMAT_LB_MEM_MEMORY;
|
||||
hierarchy <= HMAT_LB_MEM_CACHE_3RD_LEVEL; hierarchy++) {
|
||||
for (type = HMAT_LB_DATA_ACCESS_LATENCY;
|
||||
type <= HMAT_LB_DATA_WRITE_BANDWIDTH; type++) {
|
||||
hmat_lb = numa_state->hmat_lb[hierarchy][type];
|
||||
|
||||
if (hmat_lb && hmat_lb->list->len) {
|
||||
build_hmat_lb(table_data, hmat_lb, num_initiator,
|
||||
numa_state->num_nodes, initiator_list);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* ACPI 6.3: 5.2.27.5 Memory Side Cache Information Structure:
|
||||
* Table 5-147
|
||||
*/
|
||||
for (i = 0; i < numa_state->num_nodes; i++) {
|
||||
total_levels = 0;
|
||||
for (cache_level = 1; cache_level < HMAT_LB_LEVELS; cache_level++) {
|
||||
if (numa_state->hmat_cache[i][cache_level]) {
|
||||
total_levels++;
|
||||
}
|
||||
}
|
||||
for (cache_level = 0; cache_level <= total_levels; cache_level++) {
|
||||
hmat_cache = numa_state->hmat_cache[i][cache_level];
|
||||
if (hmat_cache) {
|
||||
build_hmat_cache(table_data, total_levels, hmat_cache);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state)
|
||||
{
|
||||
int hmat_start = table_data->len;
|
||||
|
||||
/* reserve space for HMAT header */
|
||||
acpi_data_push(table_data, 40);
|
||||
|
||||
hmat_build_table_structs(table_data, numa_state);
|
||||
|
||||
build_header(linker, table_data,
|
||||
(void *)(table_data->data + hmat_start),
|
||||
"HMAT", table_data->len - hmat_start, 2, NULL, NULL);
|
||||
}
|
42
hw/acpi/hmat.h
Normal file
42
hw/acpi/hmat.h
Normal file
@ -0,0 +1,42 @@
|
||||
/*
|
||||
* HMAT ACPI Implementation Header
|
||||
*
|
||||
* Copyright(C) 2019 Intel Corporation.
|
||||
*
|
||||
* Author:
|
||||
* Liu jingqi <jingqi.liu@linux.intel.com>
|
||||
* Tao Xu <tao3.xu@intel.com>
|
||||
*
|
||||
* HMAT is defined in ACPI 6.3: 5.2.27 Heterogeneous Memory Attribute Table
|
||||
* (HMAT)
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>
|
||||
*/
|
||||
|
||||
#ifndef HMAT_H
|
||||
#define HMAT_H
|
||||
|
||||
#include "hw/acpi/aml-build.h"
|
||||
|
||||
/*
|
||||
* ACPI 6.3: 5.2.27.3 Memory Proximity Domain Attributes Structure,
|
||||
* Table 5-145, Field "flag", Bit [0]: set to 1 to indicate that data in
|
||||
* the Proximity Domain for the Attached Initiator field is valid.
|
||||
* Other bits reserved.
|
||||
*/
|
||||
#define HMAT_PROXIMITY_INITIATOR_VALID 0x1
|
||||
|
||||
void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state);
|
||||
|
||||
#endif
|
@ -764,13 +764,16 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
|
||||
{
|
||||
VirtIOBlockReq *req;
|
||||
MultiReqBuffer mrb = {};
|
||||
bool suppress_notifications = virtio_queue_get_notification(vq);
|
||||
bool progress = false;
|
||||
|
||||
aio_context_acquire(blk_get_aio_context(s->blk));
|
||||
blk_io_plug(s->blk);
|
||||
|
||||
do {
|
||||
virtio_queue_set_notification(vq, 0);
|
||||
if (suppress_notifications) {
|
||||
virtio_queue_set_notification(vq, 0);
|
||||
}
|
||||
|
||||
while ((req = virtio_blk_get_request(s, vq))) {
|
||||
progress = true;
|
||||
@ -781,7 +784,9 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
|
||||
}
|
||||
}
|
||||
|
||||
virtio_queue_set_notification(vq, 1);
|
||||
if (suppress_notifications) {
|
||||
virtio_queue_set_notification(vq, 1);
|
||||
}
|
||||
} while (!virtio_queue_empty(vq));
|
||||
|
||||
if (mrb.num_reqs) {
|
||||
@ -908,7 +913,8 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
|
||||
blk_get_geometry(s->blk, &capacity);
|
||||
memset(&blkcfg, 0, sizeof(blkcfg));
|
||||
virtio_stq_p(vdev, &blkcfg.capacity, capacity);
|
||||
virtio_stl_p(vdev, &blkcfg.seg_max, 128 - 2);
|
||||
virtio_stl_p(vdev, &blkcfg.seg_max,
|
||||
s->conf.seg_max_adjust ? s->conf.queue_size - 2 : 128 - 2);
|
||||
virtio_stw_p(vdev, &blkcfg.geometry.cylinders, conf->cyls);
|
||||
virtio_stl_p(vdev, &blkcfg.blk_size, blk_size);
|
||||
virtio_stw_p(vdev, &blkcfg.min_io_size, conf->min_io_size / blk_size);
|
||||
@ -1133,6 +1139,11 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
|
||||
error_setg(errp, "num-queues property must be larger than 0");
|
||||
return;
|
||||
}
|
||||
if (conf->queue_size <= 2) {
|
||||
error_setg(errp, "invalid queue-size property (%" PRIu16 "), "
|
||||
"must be > 2", conf->queue_size);
|
||||
return;
|
||||
}
|
||||
if (!is_power_of_2(conf->queue_size) ||
|
||||
conf->queue_size > VIRTQUEUE_MAX_SIZE) {
|
||||
error_setg(errp, "invalid queue-size property (%" PRIu16 "), "
|
||||
@ -1262,6 +1273,7 @@ static Property virtio_blk_properties[] = {
|
||||
true),
|
||||
DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, 1),
|
||||
DEFINE_PROP_UINT16("queue-size", VirtIOBlock, conf.queue_size, 128),
|
||||
DEFINE_PROP_BOOL("seg-max-adjust", VirtIOBlock, conf.seg_max_adjust, true),
|
||||
DEFINE_PROP_LINK("iothread", VirtIOBlock, conf.iothread, TYPE_IOTHREAD,
|
||||
IOThread *),
|
||||
DEFINE_PROP_BIT64("discard", VirtIOBlock, host_features,
|
||||
|
@ -1126,9 +1126,17 @@ static void virtio_serial_device_unrealize(DeviceState *dev, Error **errp)
|
||||
{
|
||||
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
|
||||
VirtIOSerial *vser = VIRTIO_SERIAL(dev);
|
||||
int i;
|
||||
|
||||
QLIST_REMOVE(vser, next);
|
||||
|
||||
virtio_delete_queue(vser->c_ivq);
|
||||
virtio_delete_queue(vser->c_ovq);
|
||||
for (i = 0; i < vser->bus.max_nr_ports; i++) {
|
||||
virtio_delete_queue(vser->ivqs[i]);
|
||||
virtio_delete_queue(vser->ovqs[i]);
|
||||
}
|
||||
|
||||
g_free(vser->ivqs);
|
||||
g_free(vser->ovqs);
|
||||
g_free(vser->ports_map);
|
||||
|
@ -29,11 +29,15 @@
|
||||
|
||||
GlobalProperty hw_compat_4_2[] = {
|
||||
{ "virtio-blk-device", "x-enable-wce-if-config-wce", "off" },
|
||||
{ "virtio-blk-device", "seg-max-adjust", "off"},
|
||||
{ "virtio-scsi-device", "seg_max_adjust", "off"},
|
||||
{ "vhost-blk-device", "seg_max_adjust", "off"},
|
||||
};
|
||||
const size_t hw_compat_4_2_len = G_N_ELEMENTS(hw_compat_4_2);
|
||||
|
||||
GlobalProperty hw_compat_4_1[] = {
|
||||
{ "virtio-pci", "x-pcie-flr-init", "off" },
|
||||
{ "virtio-device", "use-disabled-flag", "false" },
|
||||
};
|
||||
const size_t hw_compat_4_1_len = G_N_ELEMENTS(hw_compat_4_1);
|
||||
|
||||
@ -429,6 +433,20 @@ static void machine_set_nvdimm(Object *obj, bool value, Error **errp)
|
||||
ms->nvdimms_state->is_enabled = value;
|
||||
}
|
||||
|
||||
static bool machine_get_hmat(Object *obj, Error **errp)
|
||||
{
|
||||
MachineState *ms = MACHINE(obj);
|
||||
|
||||
return ms->numa_state->hmat_enabled;
|
||||
}
|
||||
|
||||
static void machine_set_hmat(Object *obj, bool value, Error **errp)
|
||||
{
|
||||
MachineState *ms = MACHINE(obj);
|
||||
|
||||
ms->numa_state->hmat_enabled = value;
|
||||
}
|
||||
|
||||
static char *machine_get_nvdimm_persistence(Object *obj, Error **errp)
|
||||
{
|
||||
MachineState *ms = MACHINE(obj);
|
||||
@ -556,6 +574,7 @@ void machine_set_cpu_numa_node(MachineState *machine,
|
||||
const CpuInstanceProperties *props, Error **errp)
|
||||
{
|
||||
MachineClass *mc = MACHINE_GET_CLASS(machine);
|
||||
NodeInfo *numa_info = machine->numa_state->nodes;
|
||||
bool match = false;
|
||||
int i;
|
||||
|
||||
@ -625,6 +644,17 @@ void machine_set_cpu_numa_node(MachineState *machine,
|
||||
match = true;
|
||||
slot->props.node_id = props->node_id;
|
||||
slot->props.has_node_id = props->has_node_id;
|
||||
|
||||
if (machine->numa_state->hmat_enabled) {
|
||||
if ((numa_info[props->node_id].initiator < MAX_NODES) &&
|
||||
(props->node_id != numa_info[props->node_id].initiator)) {
|
||||
error_setg(errp, "The initiator of CPU NUMA node %" PRId64
|
||||
" should be itself", props->node_id);
|
||||
return;
|
||||
}
|
||||
numa_info[props->node_id].has_cpu = true;
|
||||
numa_info[props->node_id].initiator = props->node_id;
|
||||
}
|
||||
}
|
||||
|
||||
if (!match) {
|
||||
@ -845,6 +875,13 @@ static void machine_initfn(Object *obj)
|
||||
|
||||
if (mc->cpu_index_to_instance_props && mc->get_default_cpu_node_id) {
|
||||
ms->numa_state = g_new0(NumaState, 1);
|
||||
object_property_add_bool(obj, "hmat",
|
||||
machine_get_hmat, machine_set_hmat,
|
||||
&error_abort);
|
||||
object_property_set_description(obj, "hmat",
|
||||
"Set on/off to enable/disable "
|
||||
"ACPI Heterogeneous Memory Attribute "
|
||||
"Table (HMAT)", NULL);
|
||||
}
|
||||
|
||||
/* Register notifier when init is done for sysbus sanity checks */
|
||||
@ -912,6 +949,32 @@ static char *cpu_slot_to_string(const CPUArchId *cpu)
|
||||
return g_string_free(s, false);
|
||||
}
|
||||
|
||||
static void numa_validate_initiator(NumaState *numa_state)
|
||||
{
|
||||
int i;
|
||||
NodeInfo *numa_info = numa_state->nodes;
|
||||
|
||||
for (i = 0; i < numa_state->num_nodes; i++) {
|
||||
if (numa_info[i].initiator == MAX_NODES) {
|
||||
error_report("The initiator of NUMA node %d is missing, use "
|
||||
"'-numa node,initiator' option to declare it", i);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (!numa_info[numa_info[i].initiator].present) {
|
||||
error_report("NUMA node %" PRIu16 " is missing, use "
|
||||
"'-numa node' option to declare it first",
|
||||
numa_info[i].initiator);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (!numa_info[numa_info[i].initiator].has_cpu) {
|
||||
error_report("The initiator of NUMA node %d is invalid", i);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void machine_numa_finish_cpu_init(MachineState *machine)
|
||||
{
|
||||
int i;
|
||||
@ -952,6 +1015,11 @@ static void machine_numa_finish_cpu_init(MachineState *machine)
|
||||
machine_set_cpu_numa_node(machine, &props, &error_fatal);
|
||||
}
|
||||
}
|
||||
|
||||
if (machine->numa_state->hmat_enabled) {
|
||||
numa_validate_initiator(machine->numa_state);
|
||||
}
|
||||
|
||||
if (s->len && !qtest_enabled()) {
|
||||
warn_report("CPU(s) not present in any NUMA nodes: %s",
|
||||
s->str);
|
||||
|
297
hw/core/numa.c
297
hw/core/numa.c
@ -23,6 +23,7 @@
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/units.h"
|
||||
#include "sysemu/hostmem.h"
|
||||
#include "sysemu/numa.h"
|
||||
#include "sysemu/sysemu.h"
|
||||
@ -129,6 +130,29 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node,
|
||||
numa_info[nodenr].node_mem = object_property_get_uint(o, "size", NULL);
|
||||
numa_info[nodenr].node_memdev = MEMORY_BACKEND(o);
|
||||
}
|
||||
|
||||
/*
|
||||
* If not set the initiator, set it to MAX_NODES. And if
|
||||
* HMAT is enabled and this node has no cpus, QEMU will raise error.
|
||||
*/
|
||||
numa_info[nodenr].initiator = MAX_NODES;
|
||||
if (node->has_initiator) {
|
||||
if (!ms->numa_state->hmat_enabled) {
|
||||
error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
|
||||
"(HMAT) is disabled, enable it with -machine hmat=on "
|
||||
"before using any of hmat specific options");
|
||||
return;
|
||||
}
|
||||
|
||||
if (node->initiator >= MAX_NODES) {
|
||||
error_report("The initiator id %" PRIu16 " expects an integer "
|
||||
"between 0 and %d", node->initiator,
|
||||
MAX_NODES - 1);
|
||||
return;
|
||||
}
|
||||
|
||||
numa_info[nodenr].initiator = node->initiator;
|
||||
}
|
||||
numa_info[nodenr].present = true;
|
||||
max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1);
|
||||
ms->numa_state->num_nodes++;
|
||||
@ -171,6 +195,253 @@ void parse_numa_distance(MachineState *ms, NumaDistOptions *dist, Error **errp)
|
||||
ms->numa_state->have_numa_distance = true;
|
||||
}
|
||||
|
||||
void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
|
||||
Error **errp)
|
||||
{
|
||||
int i, first_bit, last_bit;
|
||||
uint64_t max_entry, temp_base, bitmap_copy;
|
||||
NodeInfo *numa_info = numa_state->nodes;
|
||||
HMAT_LB_Info *hmat_lb =
|
||||
numa_state->hmat_lb[node->hierarchy][node->data_type];
|
||||
HMAT_LB_Data lb_data = {};
|
||||
HMAT_LB_Data *lb_temp;
|
||||
|
||||
/* Error checking */
|
||||
if (node->initiator > numa_state->num_nodes) {
|
||||
error_setg(errp, "Invalid initiator=%d, it should be less than %d",
|
||||
node->initiator, numa_state->num_nodes);
|
||||
return;
|
||||
}
|
||||
if (node->target > numa_state->num_nodes) {
|
||||
error_setg(errp, "Invalid target=%d, it should be less than %d",
|
||||
node->target, numa_state->num_nodes);
|
||||
return;
|
||||
}
|
||||
if (!numa_info[node->initiator].has_cpu) {
|
||||
error_setg(errp, "Invalid initiator=%d, it isn't an "
|
||||
"initiator proximity domain", node->initiator);
|
||||
return;
|
||||
}
|
||||
if (!numa_info[node->target].present) {
|
||||
error_setg(errp, "The target=%d should point to an existing node",
|
||||
node->target);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!hmat_lb) {
|
||||
hmat_lb = g_malloc0(sizeof(*hmat_lb));
|
||||
numa_state->hmat_lb[node->hierarchy][node->data_type] = hmat_lb;
|
||||
hmat_lb->list = g_array_new(false, true, sizeof(HMAT_LB_Data));
|
||||
}
|
||||
hmat_lb->hierarchy = node->hierarchy;
|
||||
hmat_lb->data_type = node->data_type;
|
||||
lb_data.initiator = node->initiator;
|
||||
lb_data.target = node->target;
|
||||
|
||||
if (node->data_type <= HMATLB_DATA_TYPE_WRITE_LATENCY) {
|
||||
/* Input latency data */
|
||||
|
||||
if (!node->has_latency) {
|
||||
error_setg(errp, "Missing 'latency' option");
|
||||
return;
|
||||
}
|
||||
if (node->has_bandwidth) {
|
||||
error_setg(errp, "Invalid option 'bandwidth' since "
|
||||
"the data type is latency");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Detect duplicate configuration */
|
||||
for (i = 0; i < hmat_lb->list->len; i++) {
|
||||
lb_temp = &g_array_index(hmat_lb->list, HMAT_LB_Data, i);
|
||||
|
||||
if (node->initiator == lb_temp->initiator &&
|
||||
node->target == lb_temp->target) {
|
||||
error_setg(errp, "Duplicate configuration of the latency for "
|
||||
"initiator=%d and target=%d", node->initiator,
|
||||
node->target);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
hmat_lb->base = hmat_lb->base ? hmat_lb->base : UINT64_MAX;
|
||||
|
||||
if (node->latency) {
|
||||
/* Calculate the temporary base and compressed latency */
|
||||
max_entry = node->latency;
|
||||
temp_base = 1;
|
||||
while (QEMU_IS_ALIGNED(max_entry, 10)) {
|
||||
max_entry /= 10;
|
||||
temp_base *= 10;
|
||||
}
|
||||
|
||||
/* Calculate the max compressed latency */
|
||||
temp_base = MIN(hmat_lb->base, temp_base);
|
||||
max_entry = node->latency / hmat_lb->base;
|
||||
max_entry = MAX(hmat_lb->range_bitmap, max_entry);
|
||||
|
||||
/*
|
||||
* For latency hmat_lb->range_bitmap record the max compressed
|
||||
* latency which should be less than 0xFFFF (UINT16_MAX)
|
||||
*/
|
||||
if (max_entry >= UINT16_MAX) {
|
||||
error_setg(errp, "Latency %" PRIu64 " between initiator=%d and "
|
||||
"target=%d should not differ from previously entered "
|
||||
"min or max values on more than %d", node->latency,
|
||||
node->initiator, node->target, UINT16_MAX - 1);
|
||||
return;
|
||||
} else {
|
||||
hmat_lb->base = temp_base;
|
||||
hmat_lb->range_bitmap = max_entry;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set lb_info_provided bit 0 as 1,
|
||||
* latency information is provided
|
||||
*/
|
||||
numa_info[node->target].lb_info_provided |= BIT(0);
|
||||
}
|
||||
lb_data.data = node->latency;
|
||||
} else if (node->data_type >= HMATLB_DATA_TYPE_ACCESS_BANDWIDTH) {
|
||||
/* Input bandwidth data */
|
||||
if (!node->has_bandwidth) {
|
||||
error_setg(errp, "Missing 'bandwidth' option");
|
||||
return;
|
||||
}
|
||||
if (node->has_latency) {
|
||||
error_setg(errp, "Invalid option 'latency' since "
|
||||
"the data type is bandwidth");
|
||||
return;
|
||||
}
|
||||
if (!QEMU_IS_ALIGNED(node->bandwidth, MiB)) {
|
||||
error_setg(errp, "Bandwidth %" PRIu64 " between initiator=%d and "
|
||||
"target=%d should be 1MB aligned", node->bandwidth,
|
||||
node->initiator, node->target);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Detect duplicate configuration */
|
||||
for (i = 0; i < hmat_lb->list->len; i++) {
|
||||
lb_temp = &g_array_index(hmat_lb->list, HMAT_LB_Data, i);
|
||||
|
||||
if (node->initiator == lb_temp->initiator &&
|
||||
node->target == lb_temp->target) {
|
||||
error_setg(errp, "Duplicate configuration of the bandwidth for "
|
||||
"initiator=%d and target=%d", node->initiator,
|
||||
node->target);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
hmat_lb->base = hmat_lb->base ? hmat_lb->base : 1;
|
||||
|
||||
if (node->bandwidth) {
|
||||
/* Keep bitmap unchanged when bandwidth out of range */
|
||||
bitmap_copy = hmat_lb->range_bitmap;
|
||||
bitmap_copy |= node->bandwidth;
|
||||
first_bit = ctz64(bitmap_copy);
|
||||
temp_base = UINT64_C(1) << first_bit;
|
||||
max_entry = node->bandwidth / temp_base;
|
||||
last_bit = 64 - clz64(bitmap_copy);
|
||||
|
||||
/*
|
||||
* For bandwidth, first_bit record the base unit of bandwidth bits,
|
||||
* last_bit record the last bit of the max bandwidth. The max
|
||||
* compressed bandwidth should be less than 0xFFFF (UINT16_MAX)
|
||||
*/
|
||||
if ((last_bit - first_bit) > UINT16_BITS ||
|
||||
max_entry >= UINT16_MAX) {
|
||||
error_setg(errp, "Bandwidth %" PRIu64 " between initiator=%d "
|
||||
"and target=%d should not differ from previously "
|
||||
"entered values on more than %d", node->bandwidth,
|
||||
node->initiator, node->target, UINT16_MAX - 1);
|
||||
return;
|
||||
} else {
|
||||
hmat_lb->base = temp_base;
|
||||
hmat_lb->range_bitmap = bitmap_copy;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set lb_info_provided bit 1 as 1,
|
||||
* bandwidth information is provided
|
||||
*/
|
||||
numa_info[node->target].lb_info_provided |= BIT(1);
|
||||
}
|
||||
lb_data.data = node->bandwidth;
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
g_array_append_val(hmat_lb->list, lb_data);
|
||||
}
|
||||
|
||||
void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node,
|
||||
Error **errp)
|
||||
{
|
||||
int nb_numa_nodes = ms->numa_state->num_nodes;
|
||||
NodeInfo *numa_info = ms->numa_state->nodes;
|
||||
NumaHmatCacheOptions *hmat_cache = NULL;
|
||||
|
||||
if (node->node_id >= nb_numa_nodes) {
|
||||
error_setg(errp, "Invalid node-id=%" PRIu32 ", it should be less "
|
||||
"than %d", node->node_id, nb_numa_nodes);
|
||||
return;
|
||||
}
|
||||
|
||||
if (numa_info[node->node_id].lb_info_provided != (BIT(0) | BIT(1))) {
|
||||
error_setg(errp, "The latency and bandwidth information of "
|
||||
"node-id=%" PRIu32 " should be provided before memory side "
|
||||
"cache attributes", node->node_id);
|
||||
return;
|
||||
}
|
||||
|
||||
if (node->level < 1 || node->level >= HMAT_LB_LEVELS) {
|
||||
error_setg(errp, "Invalid level=%" PRIu8 ", it should be larger than 0 "
|
||||
"and less than or equal to %d", node->level,
|
||||
HMAT_LB_LEVELS - 1);
|
||||
return;
|
||||
}
|
||||
|
||||
assert(node->associativity < HMAT_CACHE_ASSOCIATIVITY__MAX);
|
||||
assert(node->policy < HMAT_CACHE_WRITE_POLICY__MAX);
|
||||
if (ms->numa_state->hmat_cache[node->node_id][node->level]) {
|
||||
error_setg(errp, "Duplicate configuration of the side cache for "
|
||||
"node-id=%" PRIu32 " and level=%" PRIu8,
|
||||
node->node_id, node->level);
|
||||
return;
|
||||
}
|
||||
|
||||
if ((node->level > 1) &&
|
||||
ms->numa_state->hmat_cache[node->node_id][node->level - 1] &&
|
||||
(node->size >=
|
||||
ms->numa_state->hmat_cache[node->node_id][node->level - 1]->size)) {
|
||||
error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
|
||||
" should be less than the size(%" PRIu64 ") of "
|
||||
"level=%u", node->size, node->level,
|
||||
ms->numa_state->hmat_cache[node->node_id]
|
||||
[node->level - 1]->size,
|
||||
node->level - 1);
|
||||
return;
|
||||
}
|
||||
|
||||
if ((node->level < HMAT_LB_LEVELS - 1) &&
|
||||
ms->numa_state->hmat_cache[node->node_id][node->level + 1] &&
|
||||
(node->size <=
|
||||
ms->numa_state->hmat_cache[node->node_id][node->level + 1]->size)) {
|
||||
error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
|
||||
" should be larger than the size(%" PRIu64 ") of "
|
||||
"level=%u", node->size, node->level,
|
||||
ms->numa_state->hmat_cache[node->node_id]
|
||||
[node->level + 1]->size,
|
||||
node->level + 1);
|
||||
return;
|
||||
}
|
||||
|
||||
hmat_cache = g_malloc0(sizeof(*hmat_cache));
|
||||
memcpy(hmat_cache, node, sizeof(*hmat_cache));
|
||||
ms->numa_state->hmat_cache[node->node_id][node->level] = hmat_cache;
|
||||
}
|
||||
|
||||
void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
|
||||
{
|
||||
Error *err = NULL;
|
||||
@ -208,6 +479,32 @@ void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
|
||||
machine_set_cpu_numa_node(ms, qapi_NumaCpuOptions_base(&object->u.cpu),
|
||||
&err);
|
||||
break;
|
||||
case NUMA_OPTIONS_TYPE_HMAT_LB:
|
||||
if (!ms->numa_state->hmat_enabled) {
|
||||
error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
|
||||
"(HMAT) is disabled, enable it with -machine hmat=on "
|
||||
"before using any of hmat specific options");
|
||||
return;
|
||||
}
|
||||
|
||||
parse_numa_hmat_lb(ms->numa_state, &object->u.hmat_lb, &err);
|
||||
if (err) {
|
||||
goto end;
|
||||
}
|
||||
break;
|
||||
case NUMA_OPTIONS_TYPE_HMAT_CACHE:
|
||||
if (!ms->numa_state->hmat_enabled) {
|
||||
error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
|
||||
"(HMAT) is disabled, enable it with -machine hmat=on "
|
||||
"before using any of hmat specific options");
|
||||
return;
|
||||
}
|
||||
|
||||
parse_numa_hmat_cache(ms, &object->u.hmat_cache, &err);
|
||||
if (err) {
|
||||
goto end;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
|
@ -68,6 +68,7 @@
|
||||
#include "hw/i386/intel_iommu.h"
|
||||
|
||||
#include "hw/acpi/ipmi.h"
|
||||
#include "hw/acpi/hmat.h"
|
||||
|
||||
/* These are used to size the ACPI tables for -M pc-i440fx-1.7 and
|
||||
* -M pc-i440fx-2.0. Even if the actual amount of AML generated grows
|
||||
@ -2835,6 +2836,10 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
|
||||
acpi_add_table(table_offsets, tables_blob);
|
||||
build_slit(tables_blob, tables->linker, machine);
|
||||
}
|
||||
if (machine->numa_state->hmat_enabled) {
|
||||
acpi_add_table(table_offsets, tables_blob);
|
||||
build_hmat(tables_blob, tables->linker, machine->numa_state);
|
||||
}
|
||||
}
|
||||
if (acpi_get_mcfg(&mcfg)) {
|
||||
acpi_add_table(table_offsets, tables_blob);
|
||||
|
@ -686,9 +686,18 @@ static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu,
|
||||
return true;
|
||||
}
|
||||
|
||||
static int vtd_get_pasid_dire(dma_addr_t pasid_dir_base,
|
||||
uint32_t pasid,
|
||||
VTDPASIDDirEntry *pdire)
|
||||
static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
|
||||
{
|
||||
return pdire->val & 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Caller of this function should check present bit if wants
|
||||
* to use pdir entry for futher usage except for fpd bit check.
|
||||
*/
|
||||
static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base,
|
||||
uint32_t pasid,
|
||||
VTDPASIDDirEntry *pdire)
|
||||
{
|
||||
uint32_t index;
|
||||
dma_addr_t addr, entry_size;
|
||||
@ -703,18 +712,22 @@ static int vtd_get_pasid_dire(dma_addr_t pasid_dir_base,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vtd_get_pasid_entry(IntelIOMMUState *s,
|
||||
uint32_t pasid,
|
||||
VTDPASIDDirEntry *pdire,
|
||||
VTDPASIDEntry *pe)
|
||||
static inline bool vtd_pe_present(VTDPASIDEntry *pe)
|
||||
{
|
||||
return pe->val[0] & VTD_PASID_ENTRY_P;
|
||||
}
|
||||
|
||||
static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
|
||||
uint32_t pasid,
|
||||
dma_addr_t addr,
|
||||
VTDPASIDEntry *pe)
|
||||
{
|
||||
uint32_t index;
|
||||
dma_addr_t addr, entry_size;
|
||||
dma_addr_t entry_size;
|
||||
X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
|
||||
|
||||
index = VTD_PASID_TABLE_INDEX(pasid);
|
||||
entry_size = VTD_PASID_ENTRY_SIZE;
|
||||
addr = pdire->val & VTD_PASID_TABLE_BASE_ADDR_MASK;
|
||||
addr = addr + index * entry_size;
|
||||
if (dma_memory_read(&address_space_memory, addr, pe, entry_size)) {
|
||||
return -VTD_FR_PASID_TABLE_INV;
|
||||
@ -732,25 +745,54 @@ static int vtd_get_pasid_entry(IntelIOMMUState *s,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vtd_get_pasid_entry_from_pasid(IntelIOMMUState *s,
|
||||
dma_addr_t pasid_dir_base,
|
||||
uint32_t pasid,
|
||||
VTDPASIDEntry *pe)
|
||||
/**
|
||||
* Caller of this function should check present bit if wants
|
||||
* to use pasid entry for futher usage except for fpd bit check.
|
||||
*/
|
||||
static int vtd_get_pe_from_pdire(IntelIOMMUState *s,
|
||||
uint32_t pasid,
|
||||
VTDPASIDDirEntry *pdire,
|
||||
VTDPASIDEntry *pe)
|
||||
{
|
||||
dma_addr_t addr = pdire->val & VTD_PASID_TABLE_BASE_ADDR_MASK;
|
||||
|
||||
return vtd_get_pe_in_pasid_leaf_table(s, pasid, addr, pe);
|
||||
}
|
||||
|
||||
/**
|
||||
* This function gets a pasid entry from a specified pasid
|
||||
* table (includes dir and leaf table) with a specified pasid.
|
||||
* Sanity check should be done to ensure return a present
|
||||
* pasid entry to caller.
|
||||
*/
|
||||
static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
|
||||
dma_addr_t pasid_dir_base,
|
||||
uint32_t pasid,
|
||||
VTDPASIDEntry *pe)
|
||||
{
|
||||
int ret;
|
||||
VTDPASIDDirEntry pdire;
|
||||
|
||||
ret = vtd_get_pasid_dire(pasid_dir_base, pasid, &pdire);
|
||||
ret = vtd_get_pdire_from_pdir_table(pasid_dir_base,
|
||||
pasid, &pdire);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = vtd_get_pasid_entry(s, pasid, &pdire, pe);
|
||||
if (!vtd_pdire_present(&pdire)) {
|
||||
return -VTD_FR_PASID_TABLE_INV;
|
||||
}
|
||||
|
||||
ret = vtd_get_pe_from_pdire(s, pasid, &pdire, pe);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
if (!vtd_pe_present(pe)) {
|
||||
return -VTD_FR_PASID_TABLE_INV;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vtd_ce_get_rid2pasid_entry(IntelIOMMUState *s,
|
||||
@ -763,7 +805,7 @@ static int vtd_ce_get_rid2pasid_entry(IntelIOMMUState *s,
|
||||
|
||||
pasid = VTD_CE_GET_RID2PASID(ce);
|
||||
pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
|
||||
ret = vtd_get_pasid_entry_from_pasid(s, pasid_dir_base, pasid, pe);
|
||||
ret = vtd_get_pe_from_pasid_table(s, pasid_dir_base, pasid, pe);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -781,7 +823,11 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
|
||||
pasid = VTD_CE_GET_RID2PASID(ce);
|
||||
pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
|
||||
|
||||
ret = vtd_get_pasid_dire(pasid_dir_base, pasid, &pdire);
|
||||
/*
|
||||
* No present bit check since fpd is meaningful even
|
||||
* if the present bit is clear.
|
||||
*/
|
||||
ret = vtd_get_pdire_from_pdir_table(pasid_dir_base, pasid, &pdire);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
@ -791,7 +837,15 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = vtd_get_pasid_entry(s, pasid, &pdire, &pe);
|
||||
if (!vtd_pdire_present(&pdire)) {
|
||||
return -VTD_FR_PASID_TABLE_INV;
|
||||
}
|
||||
|
||||
/*
|
||||
* No present bit check since fpd is meaningful even
|
||||
* if the present bit is clear.
|
||||
*/
|
||||
ret = vtd_get_pe_from_pdire(s, pasid, &pdire, &pe);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
@ -948,6 +1002,7 @@ static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
|
||||
return vtd_bus;
|
||||
}
|
||||
}
|
||||
vtd_bus = NULL;
|
||||
}
|
||||
return vtd_bus;
|
||||
}
|
||||
@ -2610,16 +2665,15 @@ static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size)
|
||||
switch (addr) {
|
||||
/* Root Table Address Register, 64-bit */
|
||||
case DMAR_RTADDR_REG:
|
||||
val = vtd_get_quad_raw(s, DMAR_RTADDR_REG);
|
||||
if (size == 4) {
|
||||
val = s->root & ((1ULL << 32) - 1);
|
||||
} else {
|
||||
val = s->root;
|
||||
val = val & ((1ULL << 32) - 1);
|
||||
}
|
||||
break;
|
||||
|
||||
case DMAR_RTADDR_REG_HI:
|
||||
assert(size == 4);
|
||||
val = s->root >> 32;
|
||||
val = vtd_get_quad_raw(s, DMAR_RTADDR_REG) >> 32;
|
||||
break;
|
||||
|
||||
/* Invalidation Queue Address Register, 64-bit */
|
||||
|
@ -479,6 +479,7 @@ typedef struct VTDRootEntry VTDRootEntry;
|
||||
#define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing Disable */
|
||||
|
||||
/* PASID Granular Translation Type Mask */
|
||||
#define VTD_PASID_ENTRY_P 1ULL
|
||||
#define VTD_SM_PASID_ENTRY_PGTT (7ULL << 6)
|
||||
#define VTD_SM_PASID_ENTRY_FLT (1ULL << 6)
|
||||
#define VTD_SM_PASID_ENTRY_SLT (2ULL << 6)
|
||||
|
@ -425,7 +425,6 @@ static void pc_i440fx_5_0_machine_options(MachineClass *m)
|
||||
m->alias = "pc";
|
||||
m->is_default = 1;
|
||||
pcmc->default_cpu_version = 1;
|
||||
compat_props_add(m->compat_props, hw_compat_4_2, hw_compat_4_2_len);
|
||||
}
|
||||
|
||||
DEFINE_I440FX_MACHINE(v5_0, "pc-i440fx-5.0", NULL,
|
||||
|
@ -354,7 +354,6 @@ static void pc_q35_5_0_machine_options(MachineClass *m)
|
||||
pc_q35_machine_options(m);
|
||||
m->alias = "q35";
|
||||
pcmc->default_cpu_version = 1;
|
||||
compat_props_add(m->compat_props, hw_compat_4_2, hw_compat_4_2_len);
|
||||
}
|
||||
|
||||
DEFINE_Q35_MACHINE(v5_0, "pc-q35-5.0", NULL,
|
||||
|
@ -280,6 +280,7 @@ static void virtio_input_device_unrealize(DeviceState *dev, Error **errp)
|
||||
{
|
||||
VirtIOInputClass *vic = VIRTIO_INPUT_GET_CLASS(dev);
|
||||
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
|
||||
VirtIOInput *vinput = VIRTIO_INPUT(dev);
|
||||
Error *local_err = NULL;
|
||||
|
||||
if (vic->unrealize) {
|
||||
@ -289,8 +290,8 @@ static void virtio_input_device_unrealize(DeviceState *dev, Error **errp)
|
||||
return;
|
||||
}
|
||||
}
|
||||
virtio_del_queue(vdev, 0);
|
||||
virtio_del_queue(vdev, 1);
|
||||
virtio_delete_queue(vinput->evt);
|
||||
virtio_delete_queue(vinput->sts);
|
||||
virtio_cleanup(vdev);
|
||||
}
|
||||
|
||||
|
@ -3102,7 +3102,8 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
|
||||
for (i = 0; i < max_queues; i++) {
|
||||
virtio_net_del_queue(n, i);
|
||||
}
|
||||
|
||||
/* delete also control vq */
|
||||
virtio_del_queue(vdev, max_queues * 2);
|
||||
qemu_announce_timer_del(&n->announce_timer, false);
|
||||
g_free(n->vqs);
|
||||
qemu_del_nic(n->nic);
|
||||
|
@ -106,7 +106,7 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
|
||||
return ret;
|
||||
}
|
||||
|
||||
void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len)
|
||||
void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, unsigned len)
|
||||
{
|
||||
PCIDevice *pci_dev = pci_dev_find_by_addr(s, addr);
|
||||
uint32_t config_addr = addr & (PCI_CONFIG_SPACE_SIZE - 1);
|
||||
@ -115,28 +115,21 @@ void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len)
|
||||
return;
|
||||
}
|
||||
|
||||
PCI_DPRINTF("%s: %s: addr=%02" PRIx32 " val=%08" PRIx32 " len=%d\n",
|
||||
__func__, pci_dev->name, config_addr, val, len);
|
||||
pci_host_config_write_common(pci_dev, config_addr, PCI_CONFIG_SPACE_SIZE,
|
||||
val, len);
|
||||
}
|
||||
|
||||
uint32_t pci_data_read(PCIBus *s, uint32_t addr, int len)
|
||||
uint32_t pci_data_read(PCIBus *s, uint32_t addr, unsigned len)
|
||||
{
|
||||
PCIDevice *pci_dev = pci_dev_find_by_addr(s, addr);
|
||||
uint32_t config_addr = addr & (PCI_CONFIG_SPACE_SIZE - 1);
|
||||
uint32_t val;
|
||||
|
||||
if (!pci_dev) {
|
||||
return ~0x0;
|
||||
}
|
||||
|
||||
val = pci_host_config_read_common(pci_dev, config_addr,
|
||||
PCI_CONFIG_SPACE_SIZE, len);
|
||||
PCI_DPRINTF("%s: %s: addr=%02"PRIx32" val=%08"PRIx32" len=%d\n",
|
||||
__func__, pci_dev->name, config_addr, val, len);
|
||||
|
||||
return val;
|
||||
return pci_host_config_read_common(pci_dev, config_addr,
|
||||
PCI_CONFIG_SPACE_SIZE, len);
|
||||
}
|
||||
|
||||
static void pci_host_config_write(void *opaque, hwaddr addr,
|
||||
@ -167,8 +160,7 @@ static void pci_host_data_write(void *opaque, hwaddr addr,
|
||||
uint64_t val, unsigned len)
|
||||
{
|
||||
PCIHostState *s = opaque;
|
||||
PCI_DPRINTF("write addr " TARGET_FMT_plx " len %d val %x\n",
|
||||
addr, len, (unsigned)val);
|
||||
|
||||
if (s->config_reg & (1u << 31))
|
||||
pci_data_write(s->bus, s->config_reg | (addr & 3), val, len);
|
||||
}
|
||||
@ -177,14 +169,11 @@ static uint64_t pci_host_data_read(void *opaque,
|
||||
hwaddr addr, unsigned len)
|
||||
{
|
||||
PCIHostState *s = opaque;
|
||||
uint32_t val;
|
||||
|
||||
if (!(s->config_reg & (1U << 31))) {
|
||||
return 0xffffffff;
|
||||
}
|
||||
val = pci_data_read(s->bus, s->config_reg | (addr & 3), len);
|
||||
PCI_DPRINTF("read addr " TARGET_FMT_plx " len %d val %x\n",
|
||||
addr, len, val);
|
||||
return val;
|
||||
return pci_data_read(s->bus, s->config_reg | (addr & 3), len);
|
||||
}
|
||||
|
||||
const MemoryRegionOps pci_host_conf_le_ops = {
|
||||
|
@ -275,6 +275,8 @@ static Property vhost_scsi_properties[] = {
|
||||
DEFINE_PROP_UINT32("num_queues", VirtIOSCSICommon, conf.num_queues, 1),
|
||||
DEFINE_PROP_UINT32("virtqueue_size", VirtIOSCSICommon, conf.virtqueue_size,
|
||||
128),
|
||||
DEFINE_PROP_BOOL("seg_max_adjust", VirtIOSCSICommon, conf.seg_max_adjust,
|
||||
true),
|
||||
DEFINE_PROP_UINT32("max_sectors", VirtIOSCSICommon, conf.max_sectors,
|
||||
0xFFFF),
|
||||
DEFINE_PROP_UINT32("cmd_per_lun", VirtIOSCSICommon, conf.cmd_per_lun, 128),
|
||||
|
@ -39,6 +39,10 @@ static const int user_feature_bits[] = {
|
||||
VHOST_INVALID_FEATURE_BIT
|
||||
};
|
||||
|
||||
enum VhostUserProtocolFeature {
|
||||
VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
|
||||
};
|
||||
|
||||
static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
|
||||
{
|
||||
VHostUserSCSI *s = (VHostUserSCSI *)vdev;
|
||||
@ -62,6 +66,25 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
|
||||
}
|
||||
}
|
||||
|
||||
static void vhost_user_scsi_reset(VirtIODevice *vdev)
|
||||
{
|
||||
VHostSCSICommon *vsc = VHOST_SCSI_COMMON(vdev);
|
||||
struct vhost_dev *dev = &vsc->dev;
|
||||
|
||||
/*
|
||||
* Historically, reset was not implemented so only reset devices
|
||||
* that are expecting it.
|
||||
*/
|
||||
if (!virtio_has_feature(dev->protocol_features,
|
||||
VHOST_USER_PROTOCOL_F_RESET_DEVICE)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (dev->vhost_ops->vhost_reset_device) {
|
||||
dev->vhost_ops->vhost_reset_device(dev);
|
||||
}
|
||||
}
|
||||
|
||||
static void vhost_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq)
|
||||
{
|
||||
}
|
||||
@ -182,6 +205,7 @@ static void vhost_user_scsi_class_init(ObjectClass *klass, void *data)
|
||||
vdc->get_features = vhost_scsi_common_get_features;
|
||||
vdc->set_config = vhost_scsi_common_set_config;
|
||||
vdc->set_status = vhost_user_scsi_set_status;
|
||||
vdc->reset = vhost_user_scsi_reset;
|
||||
fwc->get_dev_path = vhost_scsi_common_get_fw_dev_path;
|
||||
}
|
||||
|
||||
|
@ -597,12 +597,15 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
|
||||
{
|
||||
VirtIOSCSIReq *req, *next;
|
||||
int ret = 0;
|
||||
bool suppress_notifications = virtio_queue_get_notification(vq);
|
||||
bool progress = false;
|
||||
|
||||
QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs);
|
||||
|
||||
do {
|
||||
virtio_queue_set_notification(vq, 0);
|
||||
if (suppress_notifications) {
|
||||
virtio_queue_set_notification(vq, 0);
|
||||
}
|
||||
|
||||
while ((req = virtio_scsi_pop_req(s, vq))) {
|
||||
progress = true;
|
||||
@ -622,7 +625,9 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
|
||||
}
|
||||
}
|
||||
|
||||
virtio_queue_set_notification(vq, 1);
|
||||
if (suppress_notifications) {
|
||||
virtio_queue_set_notification(vq, 1);
|
||||
}
|
||||
} while (ret != -EINVAL && !virtio_queue_empty(vq));
|
||||
|
||||
QTAILQ_FOREACH_SAFE(req, &reqs, next, next) {
|
||||
@ -654,7 +659,8 @@ static void virtio_scsi_get_config(VirtIODevice *vdev,
|
||||
VirtIOSCSICommon *s = VIRTIO_SCSI_COMMON(vdev);
|
||||
|
||||
virtio_stl_p(vdev, &scsiconf->num_queues, s->conf.num_queues);
|
||||
virtio_stl_p(vdev, &scsiconf->seg_max, 128 - 2);
|
||||
virtio_stl_p(vdev, &scsiconf->seg_max,
|
||||
s->conf.seg_max_adjust ? s->conf.virtqueue_size - 2 : 128 - 2);
|
||||
virtio_stl_p(vdev, &scsiconf->max_sectors, s->conf.max_sectors);
|
||||
virtio_stl_p(vdev, &scsiconf->cmd_per_lun, s->conf.cmd_per_lun);
|
||||
virtio_stl_p(vdev, &scsiconf->event_info_size, sizeof(VirtIOSCSIEvent));
|
||||
@ -893,6 +899,11 @@ void virtio_scsi_common_realize(DeviceState *dev,
|
||||
virtio_cleanup(vdev);
|
||||
return;
|
||||
}
|
||||
if (s->conf.virtqueue_size <= 2) {
|
||||
error_setg(errp, "invalid virtqueue_size property (= %" PRIu32 "), "
|
||||
"must be > 2", s->conf.virtqueue_size);
|
||||
return;
|
||||
}
|
||||
s->cmd_vqs = g_new0(VirtQueue *, s->conf.num_queues);
|
||||
s->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE;
|
||||
s->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE;
|
||||
@ -949,6 +960,8 @@ static Property virtio_scsi_properties[] = {
|
||||
DEFINE_PROP_UINT32("num_queues", VirtIOSCSI, parent_obj.conf.num_queues, 1),
|
||||
DEFINE_PROP_UINT32("virtqueue_size", VirtIOSCSI,
|
||||
parent_obj.conf.virtqueue_size, 128),
|
||||
DEFINE_PROP_BOOL("seg_max_adjust", VirtIOSCSI,
|
||||
parent_obj.conf.seg_max_adjust, true),
|
||||
DEFINE_PROP_UINT32("max_sectors", VirtIOSCSI, parent_obj.conf.max_sectors,
|
||||
0xFFFF),
|
||||
DEFINE_PROP_UINT32("cmd_per_lun", VirtIOSCSI, parent_obj.conf.cmd_per_lun,
|
||||
|
@ -58,6 +58,7 @@ enum VhostUserProtocolFeature {
|
||||
VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
|
||||
VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
|
||||
VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
|
||||
VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
|
||||
VHOST_USER_PROTOCOL_F_MAX
|
||||
};
|
||||
|
||||
@ -98,6 +99,7 @@ typedef enum VhostUserRequest {
|
||||
VHOST_USER_GET_INFLIGHT_FD = 31,
|
||||
VHOST_USER_SET_INFLIGHT_FD = 32,
|
||||
VHOST_USER_GPU_SET_SOCKET = 33,
|
||||
VHOST_USER_RESET_DEVICE = 34,
|
||||
VHOST_USER_MAX
|
||||
} VhostUserRequest;
|
||||
|
||||
@ -890,10 +892,14 @@ static int vhost_user_set_owner(struct vhost_dev *dev)
|
||||
static int vhost_user_reset_device(struct vhost_dev *dev)
|
||||
{
|
||||
VhostUserMsg msg = {
|
||||
.hdr.request = VHOST_USER_RESET_OWNER,
|
||||
.hdr.flags = VHOST_USER_VERSION,
|
||||
};
|
||||
|
||||
msg.hdr.request = virtio_has_feature(dev->protocol_features,
|
||||
VHOST_USER_PROTOCOL_F_RESET_DEVICE)
|
||||
? VHOST_USER_RESET_DEVICE
|
||||
: VHOST_USER_RESET_OWNER;
|
||||
|
||||
if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
@ -831,6 +831,13 @@ static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
|
||||
}
|
||||
balloon_stats_destroy_timer(s);
|
||||
qemu_remove_balloon_handler(s);
|
||||
|
||||
virtio_delete_queue(s->ivq);
|
||||
virtio_delete_queue(s->dvq);
|
||||
virtio_delete_queue(s->svq);
|
||||
if (s->free_page_vq) {
|
||||
virtio_delete_queue(s->free_page_vq);
|
||||
}
|
||||
virtio_cleanup(vdev);
|
||||
}
|
||||
|
||||
|
@ -65,6 +65,19 @@ static void virtio_mmio_stop_ioeventfd(VirtIOMMIOProxy *proxy)
|
||||
virtio_bus_stop_ioeventfd(&proxy->bus);
|
||||
}
|
||||
|
||||
static void virtio_mmio_soft_reset(VirtIOMMIOProxy *proxy)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (proxy->legacy) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
|
||||
proxy->vqs[i].enabled = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size)
|
||||
{
|
||||
VirtIOMMIOProxy *proxy = (VirtIOMMIOProxy *)opaque;
|
||||
@ -295,8 +308,9 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
|
||||
break;
|
||||
case VIRTIO_MMIO_QUEUE_NUM:
|
||||
trace_virtio_mmio_queue_write(value, VIRTQUEUE_MAX_SIZE);
|
||||
virtio_queue_set_num(vdev, vdev->queue_sel, value);
|
||||
|
||||
if (proxy->legacy) {
|
||||
virtio_queue_set_num(vdev, vdev->queue_sel, value);
|
||||
virtio_queue_update_rings(vdev, vdev->queue_sel);
|
||||
} else {
|
||||
proxy->vqs[vdev->queue_sel].num = value;
|
||||
@ -378,6 +392,7 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
|
||||
|
||||
if (vdev->status == 0) {
|
||||
virtio_reset(vdev);
|
||||
virtio_mmio_soft_reset(proxy);
|
||||
}
|
||||
break;
|
||||
case VIRTIO_MMIO_QUEUE_DESC_LOW:
|
||||
|
@ -608,10 +608,14 @@ static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
|
||||
pcie_cap_flr_write_config(pci_dev, address, val, len);
|
||||
}
|
||||
|
||||
if (range_covers_byte(address, len, PCI_COMMAND) &&
|
||||
!(pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
|
||||
virtio_pci_stop_ioeventfd(proxy);
|
||||
virtio_set_status(vdev, vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK);
|
||||
if (range_covers_byte(address, len, PCI_COMMAND)) {
|
||||
if (!(pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
|
||||
virtio_set_disabled(vdev, true);
|
||||
virtio_pci_stop_ioeventfd(proxy);
|
||||
virtio_set_status(vdev, vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK);
|
||||
} else {
|
||||
virtio_set_disabled(vdev, false);
|
||||
}
|
||||
}
|
||||
|
||||
if (proxy->config_cap &&
|
||||
@ -1256,6 +1260,8 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr,
|
||||
break;
|
||||
case VIRTIO_PCI_COMMON_Q_SIZE:
|
||||
proxy->vqs[vdev->queue_sel].num = val;
|
||||
virtio_queue_set_num(vdev, vdev->queue_sel,
|
||||
proxy->vqs[vdev->queue_sel].num);
|
||||
break;
|
||||
case VIRTIO_PCI_COMMON_Q_MSIX:
|
||||
msix_vector_unuse(&proxy->pci_dev,
|
||||
|
@ -432,6 +432,11 @@ static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
|
||||
}
|
||||
}
|
||||
|
||||
bool virtio_queue_get_notification(VirtQueue *vq)
|
||||
{
|
||||
return vq->notification;
|
||||
}
|
||||
|
||||
void virtio_queue_set_notification(VirtQueue *vq, int enable)
|
||||
{
|
||||
vq->notification = enable;
|
||||
@ -546,7 +551,7 @@ static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
|
||||
* Called within rcu_read_lock(). */
|
||||
static int virtio_queue_empty_rcu(VirtQueue *vq)
|
||||
{
|
||||
if (unlikely(vq->vdev->broken)) {
|
||||
if (virtio_device_disabled(vq->vdev)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -565,7 +570,7 @@ static int virtio_queue_split_empty(VirtQueue *vq)
|
||||
{
|
||||
bool empty;
|
||||
|
||||
if (unlikely(vq->vdev->broken)) {
|
||||
if (virtio_device_disabled(vq->vdev)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -783,7 +788,7 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
|
||||
|
||||
virtqueue_unmap_sg(vq, elem, len);
|
||||
|
||||
if (unlikely(vq->vdev->broken)) {
|
||||
if (virtio_device_disabled(vq->vdev)) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -839,7 +844,7 @@ static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
|
||||
|
||||
void virtqueue_flush(VirtQueue *vq, unsigned int count)
|
||||
{
|
||||
if (unlikely(vq->vdev->broken)) {
|
||||
if (virtio_device_disabled(vq->vdev)) {
|
||||
vq->inuse -= count;
|
||||
return;
|
||||
}
|
||||
@ -1602,7 +1607,7 @@ err_undo_map:
|
||||
|
||||
void *virtqueue_pop(VirtQueue *vq, size_t sz)
|
||||
{
|
||||
if (unlikely(vq->vdev->broken)) {
|
||||
if (virtio_device_disabled(vq->vdev)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -1698,7 +1703,7 @@ unsigned int virtqueue_drop_all(VirtQueue *vq)
|
||||
{
|
||||
struct VirtIODevice *vdev = vq->vdev;
|
||||
|
||||
if (unlikely(vdev->broken)) {
|
||||
if (virtio_device_disabled(vq->vdev)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1816,7 +1821,7 @@ static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
|
||||
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
|
||||
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
|
||||
|
||||
if (unlikely(vdev->broken)) {
|
||||
if (virtio_device_disabled(vdev)) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1920,6 +1925,7 @@ void virtio_reset(void *opaque)
|
||||
vdev->guest_features = 0;
|
||||
vdev->queue_sel = 0;
|
||||
vdev->status = 0;
|
||||
vdev->disabled = false;
|
||||
atomic_set(&vdev->isr, 0);
|
||||
vdev->config_vector = VIRTIO_NO_VECTOR;
|
||||
virtio_notify_vector(vdev, vdev->config_vector);
|
||||
@ -2330,17 +2336,24 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
|
||||
return &vdev->vq[i];
|
||||
}
|
||||
|
||||
void virtio_delete_queue(VirtQueue *vq)
|
||||
{
|
||||
vq->vring.num = 0;
|
||||
vq->vring.num_default = 0;
|
||||
vq->handle_output = NULL;
|
||||
vq->handle_aio_output = NULL;
|
||||
g_free(vq->used_elems);
|
||||
vq->used_elems = NULL;
|
||||
virtio_virtqueue_reset_region_cache(vq);
|
||||
}
|
||||
|
||||
void virtio_del_queue(VirtIODevice *vdev, int n)
|
||||
{
|
||||
if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
|
||||
abort();
|
||||
}
|
||||
|
||||
vdev->vq[n].vring.num = 0;
|
||||
vdev->vq[n].vring.num_default = 0;
|
||||
vdev->vq[n].handle_output = NULL;
|
||||
vdev->vq[n].handle_aio_output = NULL;
|
||||
g_free(vdev->vq[n].used_elems);
|
||||
virtio_delete_queue(&vdev->vq[n]);
|
||||
}
|
||||
|
||||
static void virtio_set_isr(VirtIODevice *vdev, int value)
|
||||
@ -2553,6 +2566,13 @@ static bool virtio_started_needed(void *opaque)
|
||||
return vdev->started;
|
||||
}
|
||||
|
||||
static bool virtio_disabled_needed(void *opaque)
|
||||
{
|
||||
VirtIODevice *vdev = opaque;
|
||||
|
||||
return vdev->disabled;
|
||||
}
|
||||
|
||||
static const VMStateDescription vmstate_virtqueue = {
|
||||
.name = "virtqueue_state",
|
||||
.version_id = 1,
|
||||
@ -2718,6 +2738,17 @@ static const VMStateDescription vmstate_virtio_started = {
|
||||
}
|
||||
};
|
||||
|
||||
static const VMStateDescription vmstate_virtio_disabled = {
|
||||
.name = "virtio/disabled",
|
||||
.version_id = 1,
|
||||
.minimum_version_id = 1,
|
||||
.needed = &virtio_disabled_needed,
|
||||
.fields = (VMStateField[]) {
|
||||
VMSTATE_BOOL(disabled, VirtIODevice),
|
||||
VMSTATE_END_OF_LIST()
|
||||
}
|
||||
};
|
||||
|
||||
static const VMStateDescription vmstate_virtio = {
|
||||
.name = "virtio",
|
||||
.version_id = 1,
|
||||
@ -2735,6 +2766,7 @@ static const VMStateDescription vmstate_virtio = {
|
||||
&vmstate_virtio_extra_state,
|
||||
&vmstate_virtio_started,
|
||||
&vmstate_virtio_packed_virtqueues,
|
||||
&vmstate_virtio_disabled,
|
||||
NULL
|
||||
}
|
||||
};
|
||||
@ -3384,17 +3416,12 @@ static bool virtio_queue_host_notifier_aio_poll(void *opaque)
|
||||
{
|
||||
EventNotifier *n = opaque;
|
||||
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
|
||||
bool progress;
|
||||
|
||||
if (!vq->vring.desc || virtio_queue_empty(vq)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
progress = virtio_queue_notify_aio_vq(vq);
|
||||
|
||||
/* In case the handler function re-enabled notifications */
|
||||
virtio_queue_set_notification(vq, 0);
|
||||
return progress;
|
||||
return virtio_queue_notify_aio_vq(vq);
|
||||
}
|
||||
|
||||
static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
|
||||
@ -3569,6 +3596,7 @@ static void virtio_device_instance_finalize(Object *obj)
|
||||
static Property virtio_properties[] = {
|
||||
DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
|
||||
DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
|
||||
DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
|
||||
DEFINE_PROP_END_OF_LIST(),
|
||||
};
|
||||
|
||||
|
@ -62,8 +62,8 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
|
||||
uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
|
||||
uint32_t limit, uint32_t len);
|
||||
|
||||
void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len);
|
||||
uint32_t pci_data_read(PCIBus *s, uint32_t addr, int len);
|
||||
void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, unsigned len);
|
||||
uint32_t pci_data_read(PCIBus *s, uint32_t addr, unsigned len);
|
||||
|
||||
extern const MemoryRegionOps pci_host_conf_le_ops;
|
||||
extern const MemoryRegionOps pci_host_conf_be_ops;
|
||||
|
@ -38,6 +38,7 @@ struct VirtIOBlkConf
|
||||
uint32_t request_merging;
|
||||
uint16_t num_queues;
|
||||
uint16_t queue_size;
|
||||
bool seg_max_adjust;
|
||||
uint32_t max_discard_sectors;
|
||||
uint32_t max_write_zeroes_sectors;
|
||||
bool x_enable_wce_if_config_wce;
|
||||
|
@ -48,6 +48,7 @@ typedef struct virtio_scsi_config VirtIOSCSIConfig;
|
||||
struct VirtIOSCSIConf {
|
||||
uint32_t num_queues;
|
||||
uint32_t virtqueue_size;
|
||||
bool seg_max_adjust;
|
||||
uint32_t max_sectors;
|
||||
uint32_t cmd_per_lun;
|
||||
#ifdef CONFIG_VHOST_SCSI
|
||||
|
@ -100,6 +100,8 @@ struct VirtIODevice
|
||||
uint16_t device_id;
|
||||
bool vm_running;
|
||||
bool broken; /* device in invalid state, needs reset */
|
||||
bool use_disabled_flag; /* allow use of 'disable' flag when needed */
|
||||
bool disabled; /* device in temporarily disabled state */
|
||||
bool use_started;
|
||||
bool started;
|
||||
bool start_on_kick; /* when virtio 1.0 feature has not been negotiated */
|
||||
@ -183,6 +185,8 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
|
||||
|
||||
void virtio_del_queue(VirtIODevice *vdev, int n);
|
||||
|
||||
void virtio_delete_queue(VirtQueue *vq);
|
||||
|
||||
void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
|
||||
unsigned int len);
|
||||
void virtqueue_flush(VirtQueue *vq, unsigned int count);
|
||||
@ -224,6 +228,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id);
|
||||
|
||||
void virtio_notify_config(VirtIODevice *vdev);
|
||||
|
||||
bool virtio_queue_get_notification(VirtQueue *vq);
|
||||
void virtio_queue_set_notification(VirtQueue *vq, int enable);
|
||||
|
||||
int virtio_queue_ready(VirtQueue *vq);
|
||||
@ -378,4 +383,17 @@ static inline void virtio_set_started(VirtIODevice *vdev, bool started)
|
||||
vdev->started = started;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void virtio_set_disabled(VirtIODevice *vdev, bool disable)
|
||||
{
|
||||
if (vdev->use_disabled_flag) {
|
||||
vdev->disabled = disable;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool virtio_device_disabled(VirtIODevice *vdev)
|
||||
{
|
||||
return unlikely(vdev->disabled || vdev->broken);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -14,10 +14,35 @@ struct CPUArchId;
|
||||
#define NUMA_DISTANCE_MAX 254
|
||||
#define NUMA_DISTANCE_UNREACHABLE 255
|
||||
|
||||
/* the value of AcpiHmatLBInfo flags */
|
||||
enum {
|
||||
HMAT_LB_MEM_MEMORY = 0,
|
||||
HMAT_LB_MEM_CACHE_1ST_LEVEL = 1,
|
||||
HMAT_LB_MEM_CACHE_2ND_LEVEL = 2,
|
||||
HMAT_LB_MEM_CACHE_3RD_LEVEL = 3,
|
||||
HMAT_LB_LEVELS /* must be the last entry */
|
||||
};
|
||||
|
||||
/* the value of AcpiHmatLBInfo data type */
|
||||
enum {
|
||||
HMAT_LB_DATA_ACCESS_LATENCY = 0,
|
||||
HMAT_LB_DATA_READ_LATENCY = 1,
|
||||
HMAT_LB_DATA_WRITE_LATENCY = 2,
|
||||
HMAT_LB_DATA_ACCESS_BANDWIDTH = 3,
|
||||
HMAT_LB_DATA_READ_BANDWIDTH = 4,
|
||||
HMAT_LB_DATA_WRITE_BANDWIDTH = 5,
|
||||
HMAT_LB_TYPES /* must be the last entry */
|
||||
};
|
||||
|
||||
#define UINT16_BITS 16
|
||||
|
||||
struct NodeInfo {
|
||||
uint64_t node_mem;
|
||||
struct HostMemoryBackend *node_memdev;
|
||||
bool present;
|
||||
bool has_cpu;
|
||||
uint8_t lb_info_provided;
|
||||
uint16_t initiator;
|
||||
uint8_t distance[MAX_NODES];
|
||||
};
|
||||
|
||||
@ -26,6 +51,31 @@ struct NumaNodeMem {
|
||||
uint64_t node_plugged_mem;
|
||||
};
|
||||
|
||||
struct HMAT_LB_Data {
|
||||
uint8_t initiator;
|
||||
uint8_t target;
|
||||
uint64_t data;
|
||||
};
|
||||
typedef struct HMAT_LB_Data HMAT_LB_Data;
|
||||
|
||||
struct HMAT_LB_Info {
|
||||
/* Indicates it's memory or the specified level memory side cache. */
|
||||
uint8_t hierarchy;
|
||||
|
||||
/* Present the type of data, access/read/write latency or bandwidth. */
|
||||
uint8_t data_type;
|
||||
|
||||
/* The range bitmap of bandwidth for calculating common base */
|
||||
uint64_t range_bitmap;
|
||||
|
||||
/* The common base unit for latencies or bandwidths */
|
||||
uint64_t base;
|
||||
|
||||
/* Array to store the latencies or bandwidths */
|
||||
GArray *list;
|
||||
};
|
||||
typedef struct HMAT_LB_Info HMAT_LB_Info;
|
||||
|
||||
struct NumaState {
|
||||
/* Number of NUMA nodes */
|
||||
int num_nodes;
|
||||
@ -33,13 +83,26 @@ struct NumaState {
|
||||
/* Allow setting NUMA distance for different NUMA nodes */
|
||||
bool have_numa_distance;
|
||||
|
||||
/* Detect if HMAT support is enabled. */
|
||||
bool hmat_enabled;
|
||||
|
||||
/* NUMA nodes information */
|
||||
NodeInfo nodes[MAX_NODES];
|
||||
|
||||
/* NUMA nodes HMAT Locality Latency and Bandwidth Information */
|
||||
HMAT_LB_Info *hmat_lb[HMAT_LB_LEVELS][HMAT_LB_TYPES];
|
||||
|
||||
/* Memory Side Cache Information Structure */
|
||||
NumaHmatCacheOptions *hmat_cache[MAX_NODES][HMAT_LB_LEVELS];
|
||||
};
|
||||
typedef struct NumaState NumaState;
|
||||
|
||||
void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp);
|
||||
void parse_numa_opts(MachineState *ms);
|
||||
void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
|
||||
Error **errp);
|
||||
void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node,
|
||||
Error **errp);
|
||||
void numa_complete_configuration(MachineState *ms);
|
||||
void query_numa_node_mem(NumaNodeMem node_mem[], MachineState *ms);
|
||||
extern QemuOptsList qemu_numa_opts;
|
||||
|
@ -426,10 +426,14 @@
|
||||
#
|
||||
# @cpu: property based CPU(s) to node mapping (Since: 2.10)
|
||||
#
|
||||
# @hmat-lb: memory latency and bandwidth information (Since: 5.0)
|
||||
#
|
||||
# @hmat-cache: memory side cache information (Since: 5.0)
|
||||
#
|
||||
# Since: 2.1
|
||||
##
|
||||
{ 'enum': 'NumaOptionsType',
|
||||
'data': [ 'node', 'dist', 'cpu' ] }
|
||||
'data': [ 'node', 'dist', 'cpu', 'hmat-lb', 'hmat-cache' ] }
|
||||
|
||||
##
|
||||
# @NumaOptions:
|
||||
@ -444,7 +448,9 @@
|
||||
'data': {
|
||||
'node': 'NumaNodeOptions',
|
||||
'dist': 'NumaDistOptions',
|
||||
'cpu': 'NumaCpuOptions' }}
|
||||
'cpu': 'NumaCpuOptions',
|
||||
'hmat-lb': 'NumaHmatLBOptions',
|
||||
'hmat-cache': 'NumaHmatCacheOptions' }}
|
||||
|
||||
##
|
||||
# @NumaNodeOptions:
|
||||
@ -463,6 +469,13 @@
|
||||
# @memdev: memory backend object. If specified for one node,
|
||||
# it must be specified for all nodes.
|
||||
#
|
||||
# @initiator: defined in ACPI 6.3 Chapter 5.2.27.3 Table 5-145,
|
||||
# points to the nodeid which has the memory controller
|
||||
# responsible for this NUMA node. This field provides
|
||||
# additional information as to the initiator node that
|
||||
# is closest (as in directly attached) to this node, and
|
||||
# therefore has the best performance (since 5.0)
|
||||
#
|
||||
# Since: 2.1
|
||||
##
|
||||
{ 'struct': 'NumaNodeOptions',
|
||||
@ -470,7 +483,8 @@
|
||||
'*nodeid': 'uint16',
|
||||
'*cpus': ['uint16'],
|
||||
'*mem': 'size',
|
||||
'*memdev': 'str' }}
|
||||
'*memdev': 'str',
|
||||
'*initiator': 'uint16' }}
|
||||
|
||||
##
|
||||
# @NumaDistOptions:
|
||||
@ -549,6 +563,166 @@
|
||||
'base': 'CpuInstanceProperties',
|
||||
'data' : {} }
|
||||
|
||||
##
|
||||
# @HmatLBMemoryHierarchy:
|
||||
#
|
||||
# The memory hierarchy in the System Locality Latency and Bandwidth
|
||||
# Information Structure of HMAT (Heterogeneous Memory Attribute Table)
|
||||
#
|
||||
# For more information about @HmatLBMemoryHierarchy, see chapter
|
||||
# 5.2.27.4: Table 5-146: Field "Flags" of ACPI 6.3 spec.
|
||||
#
|
||||
# @memory: the structure represents the memory performance
|
||||
#
|
||||
# @first-level: first level of memory side cache
|
||||
#
|
||||
# @second-level: second level of memory side cache
|
||||
#
|
||||
# @third-level: third level of memory side cache
|
||||
#
|
||||
# Since: 5.0
|
||||
##
|
||||
{ 'enum': 'HmatLBMemoryHierarchy',
|
||||
'data': [ 'memory', 'first-level', 'second-level', 'third-level' ] }
|
||||
|
||||
##
|
||||
# @HmatLBDataType:
|
||||
#
|
||||
# Data type in the System Locality Latency and Bandwidth
|
||||
# Information Structure of HMAT (Heterogeneous Memory Attribute Table)
|
||||
#
|
||||
# For more information about @HmatLBDataType, see chapter
|
||||
# 5.2.27.4: Table 5-146: Field "Data Type" of ACPI 6.3 spec.
|
||||
#
|
||||
# @access-latency: access latency (nanoseconds)
|
||||
#
|
||||
# @read-latency: read latency (nanoseconds)
|
||||
#
|
||||
# @write-latency: write latency (nanoseconds)
|
||||
#
|
||||
# @access-bandwidth: access bandwidth (Bytes per second)
|
||||
#
|
||||
# @read-bandwidth: read bandwidth (Bytes per second)
|
||||
#
|
||||
# @write-bandwidth: write bandwidth (Bytes per second)
|
||||
#
|
||||
# Since: 5.0
|
||||
##
|
||||
{ 'enum': 'HmatLBDataType',
|
||||
'data': [ 'access-latency', 'read-latency', 'write-latency',
|
||||
'access-bandwidth', 'read-bandwidth', 'write-bandwidth' ] }
|
||||
|
||||
##
|
||||
# @NumaHmatLBOptions:
|
||||
#
|
||||
# Set the system locality latency and bandwidth information
|
||||
# between Initiator and Target proximity Domains.
|
||||
#
|
||||
# For more information about @NumaHmatLBOptions, see chapter
|
||||
# 5.2.27.4: Table 5-146 of ACPI 6.3 spec.
|
||||
#
|
||||
# @initiator: the Initiator Proximity Domain.
|
||||
#
|
||||
# @target: the Target Proximity Domain.
|
||||
#
|
||||
# @hierarchy: the Memory Hierarchy. Indicates the performance
|
||||
# of memory or side cache.
|
||||
#
|
||||
# @data-type: presents the type of data, access/read/write
|
||||
# latency or hit latency.
|
||||
#
|
||||
# @latency: the value of latency from @initiator to @target
|
||||
# proximity domain, the latency unit is "ns(nanosecond)".
|
||||
#
|
||||
# @bandwidth: the value of bandwidth between @initiator and @target
|
||||
# proximity domain, the bandwidth unit is
|
||||
# "Bytes per second".
|
||||
#
|
||||
# Since: 5.0
|
||||
##
|
||||
{ 'struct': 'NumaHmatLBOptions',
|
||||
'data': {
|
||||
'initiator': 'uint16',
|
||||
'target': 'uint16',
|
||||
'hierarchy': 'HmatLBMemoryHierarchy',
|
||||
'data-type': 'HmatLBDataType',
|
||||
'*latency': 'uint64',
|
||||
'*bandwidth': 'size' }}
|
||||
|
||||
##
|
||||
# @HmatCacheAssociativity:
|
||||
#
|
||||
# Cache associativity in the Memory Side Cache Information Structure
|
||||
# of HMAT
|
||||
#
|
||||
# For more information of @HmatCacheAssociativity, see chapter
|
||||
# 5.2.27.5: Table 5-147 of ACPI 6.3 spec.
|
||||
#
|
||||
# @none: None (no memory side cache in this proximity domain,
|
||||
# or cache associativity unknown)
|
||||
#
|
||||
# @direct: Direct Mapped
|
||||
#
|
||||
# @complex: Complex Cache Indexing (implementation specific)
|
||||
#
|
||||
# Since: 5.0
|
||||
##
|
||||
{ 'enum': 'HmatCacheAssociativity',
|
||||
'data': [ 'none', 'direct', 'complex' ] }
|
||||
|
||||
##
|
||||
# @HmatCacheWritePolicy:
|
||||
#
|
||||
# Cache write policy in the Memory Side Cache Information Structure
|
||||
# of HMAT
|
||||
#
|
||||
# For more information of @HmatCacheWritePolicy, see chapter
|
||||
# 5.2.27.5: Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec.
|
||||
#
|
||||
# @none: None (no memory side cache in this proximity domain,
|
||||
# or cache write policy unknown)
|
||||
#
|
||||
# @write-back: Write Back (WB)
|
||||
#
|
||||
# @write-through: Write Through (WT)
|
||||
#
|
||||
# Since: 5.0
|
||||
##
|
||||
{ 'enum': 'HmatCacheWritePolicy',
|
||||
'data': [ 'none', 'write-back', 'write-through' ] }
|
||||
|
||||
##
|
||||
# @NumaHmatCacheOptions:
|
||||
#
|
||||
# Set the memory side cache information for a given memory domain.
|
||||
#
|
||||
# For more information of @NumaHmatCacheOptions, see chapter
|
||||
# 5.2.27.5: Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec.
|
||||
#
|
||||
# @node-id: the memory proximity domain to which the memory belongs.
|
||||
#
|
||||
# @size: the size of memory side cache in bytes.
|
||||
#
|
||||
# @level: the cache level described in this structure.
|
||||
#
|
||||
# @associativity: the cache associativity,
|
||||
# none/direct-mapped/complex(complex cache indexing).
|
||||
#
|
||||
# @policy: the write policy, none/write-back/write-through.
|
||||
#
|
||||
# @line: the cache Line size in bytes.
|
||||
#
|
||||
# Since: 5.0
|
||||
##
|
||||
{ 'struct': 'NumaHmatCacheOptions',
|
||||
'data': {
|
||||
'node-id': 'uint32',
|
||||
'size': 'size',
|
||||
'level': 'uint8',
|
||||
'associativity': 'HmatCacheAssociativity',
|
||||
'policy': 'HmatCacheWritePolicy',
|
||||
'line': 'uint16' }}
|
||||
|
||||
##
|
||||
# @HostMemPolicy:
|
||||
#
|
||||
|
@ -40,7 +40,8 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
|
||||
" suppress-vmdesc=on|off disables self-describing migration (default=off)\n"
|
||||
" nvdimm=on|off controls NVDIMM support (default=off)\n"
|
||||
" enforce-config-section=on|off enforce configuration section migration (default=off)\n"
|
||||
" memory-encryption=@var{} memory encryption object to use (default=none)\n",
|
||||
" memory-encryption=@var{} memory encryption object to use (default=none)\n"
|
||||
" hmat=on|off controls ACPI HMAT support (default=off)\n",
|
||||
QEMU_ARCH_ALL)
|
||||
STEXI
|
||||
@item -machine [type=]@var{name}[,prop=@var{value}[,...]]
|
||||
@ -94,6 +95,9 @@ NOTE: this parameter is deprecated. Please use @option{-global}
|
||||
@option{migration.send-configuration}=@var{on|off} instead.
|
||||
@item memory-encryption=@var{}
|
||||
Memory encryption object to use. The default is none.
|
||||
@item hmat=on|off
|
||||
Enables or disables ACPI Heterogeneous Memory Attribute Table (HMAT) support.
|
||||
The default is off.
|
||||
@end table
|
||||
ETEXI
|
||||
|
||||
@ -168,19 +172,24 @@ If any on the three values is given, the total number of CPUs @var{n} can be omi
|
||||
ETEXI
|
||||
|
||||
DEF("numa", HAS_ARG, QEMU_OPTION_numa,
|
||||
"-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n"
|
||||
"-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n"
|
||||
"-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n"
|
||||
"-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n"
|
||||
"-numa dist,src=source,dst=destination,val=distance\n"
|
||||
"-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n",
|
||||
"-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n"
|
||||
"-numa hmat-lb,initiator=node,target=node,hierarchy=memory|first-level|second-level|third-level,data-type=access-latency|read-latency|write-latency[,latency=lat][,bandwidth=bw]\n"
|
||||
"-numa hmat-cache,node-id=node,size=size,level=level[,associativity=none|direct|complex][,policy=none|write-back|write-through][,line=size]\n",
|
||||
QEMU_ARCH_ALL)
|
||||
STEXI
|
||||
@item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}]
|
||||
@itemx -numa node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}]
|
||||
@item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}]
|
||||
@itemx -numa node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}]
|
||||
@itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance}
|
||||
@itemx -numa cpu,node-id=@var{node}[,socket-id=@var{x}][,core-id=@var{y}][,thread-id=@var{z}]
|
||||
@itemx -numa hmat-lb,initiator=@var{node},target=@var{node},hierarchy=@var{hierarchy},data-type=@var{tpye}[,latency=@var{lat}][,bandwidth=@var{bw}]
|
||||
@itemx -numa hmat-cache,node-id=@var{node},size=@var{size},level=@var{level}[,associativity=@var{str}][,policy=@var{str}][,line=@var{size}]
|
||||
@findex -numa
|
||||
Define a NUMA node and assign RAM and VCPUs to it.
|
||||
Set the NUMA distance from a source node to a destination node.
|
||||
Set the ACPI Heterogeneous Memory Attributes for the given nodes.
|
||||
|
||||
Legacy VCPU assignment uses @samp{cpus} option where
|
||||
@var{firstcpu} and @var{lastcpu} are CPU indexes. Each
|
||||
@ -222,6 +231,27 @@ split equally between them.
|
||||
@samp{mem} and @samp{memdev} are mutually exclusive. Furthermore,
|
||||
if one node uses @samp{memdev}, all of them have to use it.
|
||||
|
||||
@samp{initiator} is an additional option that points to an @var{initiator}
|
||||
NUMA node that has best performance (the lowest latency or largest bandwidth)
|
||||
to this NUMA @var{node}. Note that this option can be set only when
|
||||
the machine property 'hmat' is set to 'on'.
|
||||
|
||||
Following example creates a machine with 2 NUMA nodes, node 0 has CPU.
|
||||
node 1 has only memory, and its initiator is node 0. Note that because
|
||||
node 0 has CPU, by default the initiator of node 0 is itself and must be
|
||||
itself.
|
||||
@example
|
||||
-machine hmat=on \
|
||||
-m 2G,slots=2,maxmem=4G \
|
||||
-object memory-backend-ram,size=1G,id=m0 \
|
||||
-object memory-backend-ram,size=1G,id=m1 \
|
||||
-numa node,nodeid=0,memdev=m0 \
|
||||
-numa node,nodeid=1,memdev=m1,initiator=0 \
|
||||
-smp 2,sockets=2,maxcpus=2 \
|
||||
-numa cpu,node-id=0,socket-id=0 \
|
||||
-numa cpu,node-id=0,socket-id=1
|
||||
@end example
|
||||
|
||||
@var{source} and @var{destination} are NUMA node IDs.
|
||||
@var{distance} is the NUMA distance from @var{source} to @var{destination}.
|
||||
The distance from a node to itself is always 10. If any pair of nodes is
|
||||
@ -238,6 +268,59 @@ specified resources, it just assigns existing resources to NUMA
|
||||
nodes. This means that one still has to use the @option{-m},
|
||||
@option{-smp} options to allocate RAM and VCPUs respectively.
|
||||
|
||||
Use @samp{hmat-lb} to set System Locality Latency and Bandwidth Information
|
||||
between initiator and target NUMA nodes in ACPI Heterogeneous Attribute Memory Table (HMAT).
|
||||
Initiator NUMA node can create memory requests, usually it has one or more processors.
|
||||
Target NUMA node contains addressable memory.
|
||||
|
||||
In @samp{hmat-lb} option, @var{node} are NUMA node IDs. @var{hierarchy} is the memory
|
||||
hierarchy of the target NUMA node: if @var{hierarchy} is 'memory', the structure
|
||||
represents the memory performance; if @var{hierarchy} is 'first-level|second-level|third-level',
|
||||
this structure represents aggregated performance of memory side caches for each domain.
|
||||
@var{type} of 'data-type' is type of data represented by this structure instance:
|
||||
if 'hierarchy' is 'memory', 'data-type' is 'access|read|write' latency or 'access|read|write'
|
||||
bandwidth of the target memory; if 'hierarchy' is 'first-level|second-level|third-level',
|
||||
'data-type' is 'access|read|write' hit latency or 'access|read|write' hit bandwidth of the
|
||||
target memory side cache.
|
||||
|
||||
@var{lat} is latency value in nanoseconds. @var{bw} is bandwidth value,
|
||||
the possible value and units are NUM[M|G|T], mean that the bandwidth value are
|
||||
NUM byte per second (or MB/s, GB/s or TB/s depending on used suffix).
|
||||
Note that if latency or bandwidth value is 0, means the corresponding latency or
|
||||
bandwidth information is not provided.
|
||||
|
||||
In @samp{hmat-cache} option, @var{node-id} is the NUMA-id of the memory belongs.
|
||||
@var{size} is the size of memory side cache in bytes. @var{level} is the cache
|
||||
level described in this structure, note that the cache level 0 should not be used
|
||||
with @samp{hmat-cache} option. @var{associativity} is the cache associativity,
|
||||
the possible value is 'none/direct(direct-mapped)/complex(complex cache indexing)'.
|
||||
@var{policy} is the write policy. @var{line} is the cache Line size in bytes.
|
||||
|
||||
For example, the following options describe 2 NUMA nodes. Node 0 has 2 cpus and
|
||||
a ram, node 1 has only a ram. The processors in node 0 access memory in node
|
||||
0 with access-latency 5 nanoseconds, access-bandwidth is 200 MB/s;
|
||||
The processors in NUMA node 0 access memory in NUMA node 1 with access-latency 10
|
||||
nanoseconds, access-bandwidth is 100 MB/s.
|
||||
And for memory side cache information, NUMA node 0 and 1 both have 1 level memory
|
||||
cache, size is 10KB, policy is write-back, the cache Line size is 8 bytes:
|
||||
@example
|
||||
-machine hmat=on \
|
||||
-m 2G \
|
||||
-object memory-backend-ram,size=1G,id=m0 \
|
||||
-object memory-backend-ram,size=1G,id=m1 \
|
||||
-smp 2 \
|
||||
-numa node,nodeid=0,memdev=m0 \
|
||||
-numa node,nodeid=1,memdev=m1,initiator=0 \
|
||||
-numa cpu,node-id=0,socket-id=0 \
|
||||
-numa cpu,node-id=0,socket-id=1 \
|
||||
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,latency=5 \
|
||||
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=200M \
|
||||
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,latency=10 \
|
||||
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M \
|
||||
-numa hmat-cache,node-id=0,size=10K,level=1,associativity=direct,policy=write-back,line=8 \
|
||||
-numa hmat-cache,node-id=1,size=10K,level=1,associativity=direct,policy=write-back,line=8
|
||||
@end example
|
||||
|
||||
ETEXI
|
||||
|
||||
DEF("add-fd", HAS_ARG, QEMU_OPTION_add_fd,
|
||||
|
134
tests/acceptance/virtio_seg_max_adjust.py
Executable file
134
tests/acceptance/virtio_seg_max_adjust.py
Executable file
@ -0,0 +1,134 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# Test virtio-scsi and virtio-blk queue settings for all machine types
|
||||
#
|
||||
# Copyright (c) 2019 Virtuozzo International GmbH
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
|
||||
from qemu.machine import QEMUMachine
|
||||
from avocado_qemu import Test
|
||||
|
||||
#list of machine types and virtqueue properties to test
|
||||
VIRTIO_SCSI_PROPS = {'seg_max_adjust': 'seg_max_adjust'}
|
||||
VIRTIO_BLK_PROPS = {'seg_max_adjust': 'seg-max-adjust'}
|
||||
|
||||
DEV_TYPES = {'virtio-scsi-pci': VIRTIO_SCSI_PROPS,
|
||||
'virtio-blk-pci': VIRTIO_BLK_PROPS}
|
||||
|
||||
VM_DEV_PARAMS = {'virtio-scsi-pci': ['-device', 'virtio-scsi-pci,id=scsi0'],
|
||||
'virtio-blk-pci': ['-device',
|
||||
'virtio-blk-pci,id=scsi0,drive=drive0',
|
||||
'-drive',
|
||||
'driver=null-co,id=drive0,if=none']}
|
||||
|
||||
|
||||
class VirtioMaxSegSettingsCheck(Test):
|
||||
@staticmethod
|
||||
def make_pattern(props):
|
||||
pattern_items = ['{0} = \w+'.format(prop) for prop in props]
|
||||
return '|'.join(pattern_items)
|
||||
|
||||
def query_virtqueue(self, vm, dev_type_name):
|
||||
query_ok = False
|
||||
error = None
|
||||
props = None
|
||||
|
||||
output = vm.command('human-monitor-command',
|
||||
command_line = 'info qtree')
|
||||
props_list = DEV_TYPES[dev_type_name].values();
|
||||
pattern = self.make_pattern(props_list)
|
||||
res = re.findall(pattern, output)
|
||||
|
||||
if len(res) != len(props_list):
|
||||
props_list = set(props_list)
|
||||
res = set(res)
|
||||
not_found = props_list.difference(res)
|
||||
not_found = ', '.join(not_found)
|
||||
error = '({0}): The following properties not found: {1}'\
|
||||
.format(dev_type_name, not_found)
|
||||
else:
|
||||
query_ok = True
|
||||
props = dict()
|
||||
for prop in res:
|
||||
p = prop.split(' = ')
|
||||
props[p[0]] = p[1]
|
||||
return query_ok, props, error
|
||||
|
||||
def check_mt(self, mt, dev_type_name):
|
||||
with QEMUMachine(self.qemu_bin) as vm:
|
||||
vm.set_machine(mt["name"])
|
||||
for s in VM_DEV_PARAMS[dev_type_name]:
|
||||
vm.add_args(s)
|
||||
vm.launch()
|
||||
query_ok, props, error = self.query_virtqueue(vm, dev_type_name)
|
||||
|
||||
if not query_ok:
|
||||
self.fail('machine type {0}: {1}'.format(mt['name'], error))
|
||||
|
||||
for prop_name, prop_val in props.items():
|
||||
expected_val = mt[prop_name]
|
||||
self.assertEqual(expected_val, prop_val)
|
||||
|
||||
@staticmethod
|
||||
def seg_max_adjust_enabled(mt):
|
||||
# machine types >= 5.0 should have seg_max_adjust = true
|
||||
# others seg_max_adjust = false
|
||||
mt = mt.split("-")
|
||||
|
||||
# machine types with one line name and name like pc-x.x
|
||||
if len(mt) <= 2:
|
||||
return False
|
||||
|
||||
# machine types like pc-<chip_name>-x.x[.x]
|
||||
ver = mt[2]
|
||||
ver = ver.split(".");
|
||||
|
||||
# versions >= 5.0 goes with seg_max_adjust enabled
|
||||
major = int(ver[0])
|
||||
|
||||
if major >= 5:
|
||||
return True
|
||||
return False
|
||||
|
||||
def test_machine_types(self):
|
||||
# collect all machine types except 'none', 'isapc', 'microvm'
|
||||
with QEMUMachine(self.qemu_bin) as vm:
|
||||
vm.launch()
|
||||
machines = [m['name'] for m in vm.command('query-machines')]
|
||||
vm.shutdown()
|
||||
machines.remove('none')
|
||||
machines.remove('isapc')
|
||||
machines.remove('microvm')
|
||||
|
||||
for dev_type in DEV_TYPES:
|
||||
# create the list of machine types and their parameters.
|
||||
mtypes = list()
|
||||
for m in machines:
|
||||
if self.seg_max_adjust_enabled(m):
|
||||
enabled = 'true'
|
||||
else:
|
||||
enabled = 'false'
|
||||
mtypes.append({'name': m,
|
||||
DEV_TYPES[dev_type]['seg_max_adjust']: enabled})
|
||||
|
||||
# test each machine type for a device type
|
||||
for mt in mtypes:
|
||||
self.check_mt(mt, dev_type)
|
@ -947,6 +947,48 @@ static void test_acpi_virt_tcg_numamem(void)
|
||||
|
||||
}
|
||||
|
||||
static void test_acpi_tcg_acpi_hmat(const char *machine)
|
||||
{
|
||||
test_data data;
|
||||
|
||||
memset(&data, 0, sizeof(data));
|
||||
data.machine = machine;
|
||||
data.variant = ".acpihmat";
|
||||
test_acpi_one(" -machine hmat=on"
|
||||
" -smp 2,sockets=2"
|
||||
" -m 128M,slots=2,maxmem=1G"
|
||||
" -object memory-backend-ram,size=64M,id=m0"
|
||||
" -object memory-backend-ram,size=64M,id=m1"
|
||||
" -numa node,nodeid=0,memdev=m0"
|
||||
" -numa node,nodeid=1,memdev=m1,initiator=0"
|
||||
" -numa cpu,node-id=0,socket-id=0"
|
||||
" -numa cpu,node-id=0,socket-id=1"
|
||||
" -numa hmat-lb,initiator=0,target=0,hierarchy=memory,"
|
||||
"data-type=access-latency,latency=1"
|
||||
" -numa hmat-lb,initiator=0,target=0,hierarchy=memory,"
|
||||
"data-type=access-bandwidth,bandwidth=65534M"
|
||||
" -numa hmat-lb,initiator=0,target=1,hierarchy=memory,"
|
||||
"data-type=access-latency,latency=65534"
|
||||
" -numa hmat-lb,initiator=0,target=1,hierarchy=memory,"
|
||||
"data-type=access-bandwidth,bandwidth=32767M"
|
||||
" -numa hmat-cache,node-id=0,size=10K,level=1,"
|
||||
"associativity=direct,policy=write-back,line=8"
|
||||
" -numa hmat-cache,node-id=1,size=10K,level=1,"
|
||||
"associativity=direct,policy=write-back,line=8",
|
||||
&data);
|
||||
free_test_data(&data);
|
||||
}
|
||||
|
||||
static void test_acpi_q35_tcg_acpi_hmat(void)
|
||||
{
|
||||
test_acpi_tcg_acpi_hmat(MACHINE_Q35);
|
||||
}
|
||||
|
||||
static void test_acpi_piix4_tcg_acpi_hmat(void)
|
||||
{
|
||||
test_acpi_tcg_acpi_hmat(MACHINE_PC);
|
||||
}
|
||||
|
||||
static void test_acpi_virt_tcg(void)
|
||||
{
|
||||
test_data data = {
|
||||
@ -991,6 +1033,8 @@ int main(int argc, char *argv[])
|
||||
qtest_add_func("acpi/q35/numamem", test_acpi_q35_tcg_numamem);
|
||||
qtest_add_func("acpi/piix4/dimmpxm", test_acpi_piix4_tcg_dimm_pxm);
|
||||
qtest_add_func("acpi/q35/dimmpxm", test_acpi_q35_tcg_dimm_pxm);
|
||||
qtest_add_func("acpi/piix4/acpihmat", test_acpi_piix4_tcg_acpi_hmat);
|
||||
qtest_add_func("acpi/q35/acpihmat", test_acpi_q35_tcg_acpi_hmat);
|
||||
} else if (strcmp(arch, "aarch64") == 0) {
|
||||
qtest_add_func("acpi/virt", test_acpi_virt_tcg);
|
||||
qtest_add_func("acpi/virt/numamem", test_acpi_virt_tcg_numamem);
|
||||
|
BIN
tests/data/acpi/pc/APIC.acpihmat
Normal file
BIN
tests/data/acpi/pc/APIC.acpihmat
Normal file
Binary file not shown.
BIN
tests/data/acpi/pc/DSDT.acpihmat
Normal file
BIN
tests/data/acpi/pc/DSDT.acpihmat
Normal file
Binary file not shown.
BIN
tests/data/acpi/pc/HMAT.acpihmat
Normal file
BIN
tests/data/acpi/pc/HMAT.acpihmat
Normal file
Binary file not shown.
BIN
tests/data/acpi/pc/SRAT.acpihmat
Normal file
BIN
tests/data/acpi/pc/SRAT.acpihmat
Normal file
Binary file not shown.
BIN
tests/data/acpi/q35/APIC.acpihmat
Normal file
BIN
tests/data/acpi/q35/APIC.acpihmat
Normal file
Binary file not shown.
BIN
tests/data/acpi/q35/DSDT.acpihmat
Normal file
BIN
tests/data/acpi/q35/DSDT.acpihmat
Normal file
Binary file not shown.
BIN
tests/data/acpi/q35/HMAT.acpihmat
Normal file
BIN
tests/data/acpi/q35/HMAT.acpihmat
Normal file
Binary file not shown.
BIN
tests/data/acpi/q35/SRAT.acpihmat
Normal file
BIN
tests/data/acpi/q35/SRAT.acpihmat
Normal file
Binary file not shown.
@ -327,6 +327,216 @@ static void pc_dynamic_cpu_cfg(const void *data)
|
||||
qtest_quit(qs);
|
||||
}
|
||||
|
||||
static void pc_hmat_build_cfg(const void *data)
|
||||
{
|
||||
QTestState *qs = qtest_initf("%s -nodefaults --preconfig -machine hmat=on "
|
||||
"-smp 2,sockets=2 "
|
||||
"-m 128M,slots=2,maxmem=1G "
|
||||
"-object memory-backend-ram,size=64M,id=m0 "
|
||||
"-object memory-backend-ram,size=64M,id=m1 "
|
||||
"-numa node,nodeid=0,memdev=m0 "
|
||||
"-numa node,nodeid=1,memdev=m1,initiator=0 "
|
||||
"-numa cpu,node-id=0,socket-id=0 "
|
||||
"-numa cpu,node-id=0,socket-id=1",
|
||||
data ? (char *)data : "");
|
||||
|
||||
/* Fail: Initiator should be less than the number of nodes */
|
||||
g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-lb', 'initiator': 2, 'target': 0,"
|
||||
" 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }")));
|
||||
|
||||
/* Fail: Target should be less than the number of nodes */
|
||||
g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 2,"
|
||||
" 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }")));
|
||||
|
||||
/* Fail: Initiator should contain cpu */
|
||||
g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-lb', 'initiator': 1, 'target': 0,"
|
||||
" 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }")));
|
||||
|
||||
/* Fail: Data-type mismatch */
|
||||
g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
|
||||
" 'hierarchy': \"memory\", 'data-type': \"write-latency\","
|
||||
" 'bandwidth': 524288000 } }")));
|
||||
g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
|
||||
" 'hierarchy': \"memory\", 'data-type': \"read-bandwidth\","
|
||||
" 'latency': 5 } }")));
|
||||
|
||||
/* Fail: Bandwidth should be 1MB (1048576) aligned */
|
||||
g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
|
||||
" 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\","
|
||||
" 'bandwidth': 1048575 } }")));
|
||||
|
||||
/* Configuring HMAT bandwidth and latency details */
|
||||
g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
|
||||
" 'hierarchy': \"memory\", 'data-type': \"access-latency\","
|
||||
" 'latency': 1 } }"))); /* 1 ns */
|
||||
g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
|
||||
" 'hierarchy': \"memory\", 'data-type': \"access-latency\","
|
||||
" 'latency': 5 } }"))); /* Fail: Duplicate configuration */
|
||||
g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
|
||||
" 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\","
|
||||
" 'bandwidth': 68717379584 } }"))); /* 65534 MB/s */
|
||||
g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 1,"
|
||||
" 'hierarchy': \"memory\", 'data-type': \"access-latency\","
|
||||
" 'latency': 65534 } }"))); /* 65534 ns */
|
||||
g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 1,"
|
||||
" 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\","
|
||||
" 'bandwidth': 34358689792 } }"))); /* 32767 MB/s */
|
||||
|
||||
/* Fail: node_id should be less than the number of nodes */
|
||||
g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-cache', 'node-id': 2, 'size': 10240,"
|
||||
" 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\","
|
||||
" 'line': 8 } }")));
|
||||
|
||||
/* Fail: level should be less than HMAT_LB_LEVELS (4) */
|
||||
g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
|
||||
" 'level': 4, 'associativity': \"direct\", 'policy': \"write-back\","
|
||||
" 'line': 8 } }")));
|
||||
|
||||
/* Fail: associativity option should be 'none', if level is 0 */
|
||||
g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
|
||||
" 'level': 0, 'associativity': \"direct\", 'policy': \"none\","
|
||||
" 'line': 0 } }")));
|
||||
/* Fail: policy option should be 'none', if level is 0 */
|
||||
g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
|
||||
" 'level': 0, 'associativity': \"none\", 'policy': \"write-back\","
|
||||
" 'line': 0 } }")));
|
||||
/* Fail: line option should be 0, if level is 0 */
|
||||
g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
|
||||
" 'level': 0, 'associativity': \"none\", 'policy': \"none\","
|
||||
" 'line': 8 } }")));
|
||||
|
||||
/* Configuring HMAT memory side cache attributes */
|
||||
g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
|
||||
" 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\","
|
||||
" 'line': 8 } }")));
|
||||
g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
|
||||
" 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\","
|
||||
" 'line': 8 } }"))); /* Fail: Duplicate configuration */
|
||||
/* Fail: The size of level 2 size should be small than level 1 */
|
||||
g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
|
||||
" 'level': 2, 'associativity': \"direct\", 'policy': \"write-back\","
|
||||
" 'line': 8 } }")));
|
||||
/* Fail: The size of level 0 size should be larger than level 1 */
|
||||
g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
|
||||
" 'level': 0, 'associativity': \"direct\", 'policy': \"write-back\","
|
||||
" 'line': 8 } }")));
|
||||
g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-cache', 'node-id': 1, 'size': 10240,"
|
||||
" 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\","
|
||||
" 'line': 8 } }")));
|
||||
|
||||
/* let machine initialization to complete and run */
|
||||
g_assert_false(qmp_rsp_is_err(qtest_qmp(qs,
|
||||
"{ 'execute': 'x-exit-preconfig' }")));
|
||||
qtest_qmp_eventwait(qs, "RESUME");
|
||||
|
||||
qtest_quit(qs);
|
||||
}
|
||||
|
||||
static void pc_hmat_off_cfg(const void *data)
|
||||
{
|
||||
QTestState *qs = qtest_initf("%s -nodefaults --preconfig "
|
||||
"-smp 2,sockets=2 "
|
||||
"-m 128M,slots=2,maxmem=1G "
|
||||
"-object memory-backend-ram,size=64M,id=m0 "
|
||||
"-object memory-backend-ram,size=64M,id=m1 "
|
||||
"-numa node,nodeid=0,memdev=m0",
|
||||
data ? (char *)data : "");
|
||||
|
||||
/*
|
||||
* Fail: Enable HMAT with -machine hmat=on
|
||||
* before using any of hmat specific options
|
||||
*/
|
||||
g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'node', 'nodeid': 1, 'memdev': \"m1\","
|
||||
" 'initiator': 0 } }")));
|
||||
g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'node', 'nodeid': 1, 'memdev': \"m1\" } }")));
|
||||
g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
|
||||
" 'hierarchy': \"memory\", 'data-type': \"access-latency\","
|
||||
" 'latency': 1 } }")));
|
||||
g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
|
||||
" 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\","
|
||||
" 'line': 8 } }")));
|
||||
|
||||
/* let machine initialization to complete and run */
|
||||
g_assert_false(qmp_rsp_is_err(qtest_qmp(qs,
|
||||
"{ 'execute': 'x-exit-preconfig' }")));
|
||||
qtest_qmp_eventwait(qs, "RESUME");
|
||||
|
||||
qtest_quit(qs);
|
||||
}
|
||||
|
||||
static void pc_hmat_erange_cfg(const void *data)
|
||||
{
|
||||
QTestState *qs = qtest_initf("%s -nodefaults --preconfig -machine hmat=on "
|
||||
"-smp 2,sockets=2 "
|
||||
"-m 128M,slots=2,maxmem=1G "
|
||||
"-object memory-backend-ram,size=64M,id=m0 "
|
||||
"-object memory-backend-ram,size=64M,id=m1 "
|
||||
"-numa node,nodeid=0,memdev=m0 "
|
||||
"-numa node,nodeid=1,memdev=m1,initiator=0 "
|
||||
"-numa cpu,node-id=0,socket-id=0 "
|
||||
"-numa cpu,node-id=0,socket-id=1",
|
||||
data ? (char *)data : "");
|
||||
|
||||
/* Can't store the compressed latency */
|
||||
g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
|
||||
" 'hierarchy': \"memory\", 'data-type': \"access-latency\","
|
||||
" 'latency': 1 } }"))); /* 1 ns */
|
||||
g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 1,"
|
||||
" 'hierarchy': \"memory\", 'data-type': \"access-latency\","
|
||||
" 'latency': 65535 } }"))); /* 65535 ns */
|
||||
|
||||
/* Test the 0 input (bandwidth not provided) */
|
||||
g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
|
||||
" 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\","
|
||||
" 'bandwidth': 0 } }"))); /* 0 MB/s */
|
||||
/* Fail: bandwidth should be provided before memory side cache attributes */
|
||||
g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
|
||||
" 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\","
|
||||
" 'line': 8 } }")));
|
||||
|
||||
/* Can't store the compressed bandwidth */
|
||||
g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
|
||||
" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 1,"
|
||||
" 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\","
|
||||
" 'bandwidth': 68718428160 } }"))); /* 65535 MB/s */
|
||||
|
||||
/* let machine initialization to complete and run */
|
||||
g_assert_false(qmp_rsp_is_err(qtest_qmp(qs,
|
||||
"{ 'execute': 'x-exit-preconfig' }")));
|
||||
qtest_qmp_eventwait(qs, "RESUME");
|
||||
|
||||
qtest_quit(qs);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
const char *args = NULL;
|
||||
@ -346,6 +556,9 @@ int main(int argc, char **argv)
|
||||
if (!strcmp(arch, "i386") || !strcmp(arch, "x86_64")) {
|
||||
qtest_add_data_func("/numa/pc/cpu/explicit", args, pc_numa_cpu);
|
||||
qtest_add_data_func("/numa/pc/dynamic/cpu", args, pc_dynamic_cpu_cfg);
|
||||
qtest_add_data_func("/numa/pc/hmat/build", args, pc_hmat_build_cfg);
|
||||
qtest_add_data_func("/numa/pc/hmat/off", args, pc_hmat_off_cfg);
|
||||
qtest_add_data_func("/numa/pc/hmat/erange", args, pc_hmat_erange_cfg);
|
||||
}
|
||||
|
||||
if (!strcmp(arch, "ppc64")) {
|
||||
|
Loading…
Reference in New Issue
Block a user