summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2022-07-26 12:57:20 -0700
committerRichard Henderson <richard.henderson@linaro.org>2022-07-26 12:57:20 -0700
commite5b6555fb8e8a91dd1d612e2e2d66bf5f43ad1dd (patch)
tree5328762ce117fbe6dbb2ac2a3b593c229f404326
parentf6cce6bcb2ef959cdd4da0e368f7c72045f21d6d (diff)
parent0522be9a0c0094088ccef7aab352c57f483ca250 (diff)
Merge tag 'for_upstream' of git://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging
pc,virtio: fixes Several fixes. From now on, regression fixes only. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # -----BEGIN PGP SIGNATURE----- # # iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmLgQr8PHG1zdEByZWRo # YXQuY29tAAoJECgfDbjSjVRpGUUIAKtNhrnKopGm4LlRpx8zN3Jc1Jo0nb648gaM # Oyi+Pl8+hpESUhaWN10XDk38/QuPQfIFeR2ZhfYjFTRlZE+n3X9LVlwL8ejjP8KH # AcWm78Ff/SLA45aMKMmw74pvEDNsoPYTp7TrfeIej5ub8BIXr8+8pqDdIR9WwtWO # PbhLNXkTT2yLEs6jCVT4/dyh7zivSkrY7G/RVmtUaFe3PgY8fdW2z3+Txz7UIMgw # CQoGuAucCO5ToBbs2CbT0V5yxY6G5VO6Qd8g0PzDW4M6GsY/Xr5QCnyJe0jTW0d6 # Dcc7UZFAzGNzyQCxHCic9xwTO+ZcJPJlH5TwknunxOb9xwCx4Qs= # =zN41 # -----END PGP SIGNATURE----- # gpg: Signature made Tue 26 Jul 2022 12:38:39 PM PDT # gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469 # gpg: issuer "mst@redhat.com" # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [undefined] # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [undefined] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * tag 'for_upstream' of git://git.kernel.org/pub/scm/virt/kvm/mst/qemu: hw/virtio/virtio-iommu: Enforce power-of-two notify for both MAP and UNMAP i386/pc: restrict AMD only enforcing of 1Tb hole to new machine type i386/pc: relocate 4g start to 1T where applicable i386/pc: bounds check phys-bits against max used GPA i386/pc: factor out device_memory base/size to helper i386/pc: handle unitialized mr in pc_get_cxl_range_end() i386/pc: factor out cxl range start to helper i386/pc: factor out cxl range end to helper i386/pc: factor out above-4g end to an helper i386/pc: pass pci_hole64_size to pc_memory_init() i386/pc: create pci-host qdev prior to pc_memory_init() hw/i386: add 4g boundary start to X86MachineState hw/cxl: Fix size of constant in interleave granularity function. hw/i386/pc: Always place CXL Memory Regions after device_memory hw/machine: Clear out left over CXL related pointer from move of state handling to machines. acpi/nvdimm: Define trace events for NVDIMM and substitute nvdimm_debug() Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r--hw/acpi/nvdimm.c35
-rw-r--r--hw/acpi/trace-events13
-rw-r--r--hw/i386/acpi-build.c2
-rw-r--r--hw/i386/pc.c209
-rw-r--r--hw/i386/pc_piix.c15
-rw-r--r--hw/i386/pc_q35.c15
-rw-r--r--hw/i386/sgx.c2
-rw-r--r--hw/i386/x86.c1
-rw-r--r--hw/pci-host/i440fx.c5
-rw-r--r--hw/virtio/virtio-iommu.c47
-rw-r--r--include/hw/boards.h1
-rw-r--r--include/hw/cxl/cxl_component.h2
-rw-r--r--include/hw/i386/pc.h4
-rw-r--r--include/hw/i386/x86.h3
-rw-r--r--include/hw/mem/nvdimm.h8
-rw-r--r--include/hw/pci-host/i440fx.h3
16 files changed, 258 insertions, 107 deletions
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 5f85b16327..31e46df0bd 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -35,6 +35,7 @@
#include "hw/nvram/fw_cfg.h"
#include "hw/mem/nvdimm.h"
#include "qemu/nvdimm-utils.h"
+#include "trace.h"
/*
* define Byte Addressable Persistent Memory (PM) Region according to
@@ -550,8 +551,8 @@ static void nvdimm_dsm_func_read_fit(NVDIMMState *state, NvdimmDsmIn *in,
fit = fit_buf->fit;
- nvdimm_debug("Read FIT: offset 0x%x FIT size 0x%x Dirty %s.\n",
- read_fit->offset, fit->len, fit_buf->dirty ? "Yes" : "No");
+ trace_acpi_nvdimm_read_fit(read_fit->offset, fit->len,
+ fit_buf->dirty ? "Yes" : "No");
if (read_fit->offset > fit->len) {
func_ret_status = NVDIMM_DSM_RET_STATUS_INVALID;
@@ -658,7 +659,7 @@ static void nvdimm_dsm_label_size(NVDIMMDevice *nvdimm, hwaddr dsm_mem_addr)
label_size = nvdimm->label_size;
mxfer = nvdimm_get_max_xfer_label_size();
- nvdimm_debug("label_size 0x%x, max_xfer 0x%x.\n", label_size, mxfer);
+ trace_acpi_nvdimm_label_info(label_size, mxfer);
label_size_out.func_ret_status = cpu_to_le32(NVDIMM_DSM_RET_STATUS_SUCCESS);
label_size_out.label_size = cpu_to_le32(label_size);
@@ -674,20 +675,18 @@ static uint32_t nvdimm_rw_label_data_check(NVDIMMDevice *nvdimm,
uint32_t ret = NVDIMM_DSM_RET_STATUS_INVALID;
if (offset + length < offset) {
- nvdimm_debug("offset 0x%x + length 0x%x is overflow.\n", offset,
- length);
+ trace_acpi_nvdimm_label_overflow(offset, length);
return ret;
}
if (nvdimm->label_size < offset + length) {
- nvdimm_debug("position 0x%x is beyond label data (len = %" PRIx64 ").\n",
- offset + length, nvdimm->label_size);
+ trace_acpi_nvdimm_label_oversize(offset + length, nvdimm->label_size);
return ret;
}
if (length > nvdimm_get_max_xfer_label_size()) {
- nvdimm_debug("length (0x%x) is larger than max_xfer (0x%x).\n",
- length, nvdimm_get_max_xfer_label_size());
+ trace_acpi_nvdimm_label_xfer_exceed(length,
+ nvdimm_get_max_xfer_label_size());
return ret;
}
@@ -710,8 +709,8 @@ static void nvdimm_dsm_get_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in,
get_label_data->offset = le32_to_cpu(get_label_data->offset);
get_label_data->length = le32_to_cpu(get_label_data->length);
- nvdimm_debug("Read Label Data: offset 0x%x length 0x%x.\n",
- get_label_data->offset, get_label_data->length);
+ trace_acpi_nvdimm_read_label(get_label_data->offset,
+ get_label_data->length);
status = nvdimm_rw_label_data_check(nvdimm, get_label_data->offset,
get_label_data->length);
@@ -749,8 +748,8 @@ static void nvdimm_dsm_set_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in,
set_label_data->offset = le32_to_cpu(set_label_data->offset);
set_label_data->length = le32_to_cpu(set_label_data->length);
- nvdimm_debug("Write Label Data: offset 0x%x length 0x%x.\n",
- set_label_data->offset, set_label_data->length);
+ trace_acpi_nvdimm_write_label(set_label_data->offset,
+ set_label_data->length);
status = nvdimm_rw_label_data_check(nvdimm, set_label_data->offset,
set_label_data->length);
@@ -821,7 +820,7 @@ static void nvdimm_dsm_device(NvdimmDsmIn *in, hwaddr dsm_mem_addr)
static uint64_t
nvdimm_dsm_read(void *opaque, hwaddr addr, unsigned size)
{
- nvdimm_debug("BUG: we never read _DSM IO Port.\n");
+ trace_acpi_nvdimm_read_io_port();
return 0;
}
@@ -832,7 +831,7 @@ nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
NvdimmDsmIn *in;
hwaddr dsm_mem_addr = val;
- nvdimm_debug("dsm memory address 0x%" HWADDR_PRIx ".\n", dsm_mem_addr);
+ trace_acpi_nvdimm_dsm_mem_addr(dsm_mem_addr);
/*
* The DSM memory is mapped to guest address space so an evil guest
@@ -846,12 +845,10 @@ nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
in->function = le32_to_cpu(in->function);
in->handle = le32_to_cpu(in->handle);
- nvdimm_debug("Revision 0x%x Handler 0x%x Function 0x%x.\n", in->revision,
- in->handle, in->function);
+ trace_acpi_nvdimm_dsm_info(in->revision, in->handle, in->function);
if (in->revision != 0x1 /* Currently we only support DSM Spec Rev1. */) {
- nvdimm_debug("Revision 0x%x is not supported, expect 0x%x.\n",
- in->revision, 0x1);
+ trace_acpi_nvdimm_invalid_revision(in->revision);
nvdimm_dsm_no_payload(NVDIMM_DSM_RET_STATUS_UNSUPPORT, dsm_mem_addr);
goto exit;
}
diff --git a/hw/acpi/trace-events b/hw/acpi/trace-events
index 2250126a22..eb60b04f9b 100644
--- a/hw/acpi/trace-events
+++ b/hw/acpi/trace-events
@@ -70,3 +70,16 @@ acpi_erst_reset_out(unsigned record_count) "record_count %u"
acpi_erst_post_load(void *header, unsigned slot_size) "header: 0x%p slot_size %u"
acpi_erst_class_init_in(void)
acpi_erst_class_init_out(void)
+
+# nvdimm.c
+acpi_nvdimm_read_fit(uint32_t offset, uint32_t len, const char *dirty) "Read FIT: offset 0x%" PRIx32 " FIT size 0x%" PRIx32 " Dirty %s"
+acpi_nvdimm_label_info(uint32_t label_size, uint32_t mxfer) "label_size 0x%" PRIx32 ", max_xfer 0x%" PRIx32
+acpi_nvdimm_label_overflow(uint32_t offset, uint32_t length) "offset 0x%" PRIx32 " + length 0x%" PRIx32 " is overflow"
+acpi_nvdimm_label_oversize(uint32_t pos, uint64_t size) "position 0x%" PRIx32 " is beyond label data (len = %" PRIu64 ")"
+acpi_nvdimm_label_xfer_exceed(uint32_t length, uint32_t max_xfer) "length (0x%" PRIx32 ") is larger than max_xfer (0x%" PRIx32 ")"
+acpi_nvdimm_read_label(uint32_t offset, uint32_t length) "Read Label Data: offset 0x%" PRIx32 " length 0x%" PRIx32
+acpi_nvdimm_write_label(uint32_t offset, uint32_t length) "Write Label Data: offset 0x%" PRIx32 " length 0x%" PRIx32
+acpi_nvdimm_read_io_port(void) "Alert: we never read _DSM IO Port"
+acpi_nvdimm_dsm_mem_addr(uint64_t dsm_mem_addr) "dsm memory address 0x%" PRIx64
+acpi_nvdimm_dsm_info(uint32_t revision, uint32_t handle, uint32_t function) "Revision 0x%" PRIx32 " Handle 0x%" PRIx32 " Function 0x%" PRIx32
+acpi_nvdimm_invalid_revision(uint32_t revision) "Revision 0x%" PRIx32 " is not supported, expect 0x1"
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index cad6f5ac41..0355bd3dda 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2024,7 +2024,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
build_srat_memory(table_data, mem_base, mem_len, i - 1,
MEM_AFFINITY_ENABLED);
}
- mem_base = 1ULL << 32;
+ mem_base = x86ms->above_4g_mem_start;
mem_len = next_base - x86ms->below_4g_mem_size;
next_base = mem_base + mem_len;
}
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index d2b5823ffb..7280c02ce3 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -814,10 +814,122 @@ void xen_load_linux(PCMachineState *pcms)
#define PC_ROM_ALIGN 0x800
#define PC_ROM_SIZE (PC_ROM_MAX - PC_ROM_MIN_VGA)
+static hwaddr pc_above_4g_end(PCMachineState *pcms)
+{
+ X86MachineState *x86ms = X86_MACHINE(pcms);
+
+ if (pcms->sgx_epc.size != 0) {
+ return sgx_epc_above_4g_end(&pcms->sgx_epc);
+ }
+
+ return x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
+}
+
+static void pc_get_device_memory_range(PCMachineState *pcms,
+ hwaddr *base,
+ ram_addr_t *device_mem_size)
+{
+ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
+ MachineState *machine = MACHINE(pcms);
+ ram_addr_t size;
+ hwaddr addr;
+
+ size = machine->maxram_size - machine->ram_size;
+ addr = ROUND_UP(pc_above_4g_end(pcms), 1 * GiB);
+
+ if (pcmc->enforce_aligned_dimm) {
+ /* size device region assuming 1G page max alignment per slot */
+ size += (1 * GiB) * machine->ram_slots;
+ }
+
+ *base = addr;
+ *device_mem_size = size;
+}
+
+static uint64_t pc_get_cxl_range_start(PCMachineState *pcms)
+{
+ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
+ hwaddr cxl_base;
+ ram_addr_t size;
+
+ if (pcmc->has_reserved_memory) {
+ pc_get_device_memory_range(pcms, &cxl_base, &size);
+ cxl_base += size;
+ } else {
+ cxl_base = pc_above_4g_end(pcms);
+ }
+
+ return cxl_base;
+}
+
+static uint64_t pc_get_cxl_range_end(PCMachineState *pcms)
+{
+ uint64_t start = pc_get_cxl_range_start(pcms) + MiB;
+
+ if (pcms->cxl_devices_state.fixed_windows) {
+ GList *it;
+
+ start = ROUND_UP(start, 256 * MiB);
+ for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) {
+ CXLFixedWindow *fw = it->data;
+ start += fw->size;
+ }
+ }
+
+ return start;
+}
+
+static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t pci_hole64_size)
+{
+ X86CPU *cpu = X86_CPU(first_cpu);
+
+ /* 32-bit systems don't have hole64 thus return max CPU address */
+ if (cpu->phys_bits <= 32) {
+ return ((hwaddr)1 << cpu->phys_bits) - 1;
+ }
+
+ return pc_pci_hole64_start() + pci_hole64_size - 1;
+}
+
+/*
+ * AMD systems with an IOMMU have an additional hole close to the
+ * 1Tb, which are special GPAs that cannot be DMA mapped. Depending
+ * on kernel version, VFIO may or may not let you DMA map those ranges.
+ * Starting Linux v5.4 we validate it, and can't create guests on AMD machines
+ * with certain memory sizes. It's also wrong to use those IOVA ranges
+ * in detriment of leading to IOMMU INVALID_DEVICE_REQUEST or worse.
+ * The ranges reserved for Hyper-Transport are:
+ *
+ * FD_0000_0000h - FF_FFFF_FFFFh
+ *
+ * The ranges represent the following:
+ *
+ * Base Address Top Address Use
+ *
+ * FD_0000_0000h FD_F7FF_FFFFh Reserved interrupt address space
+ * FD_F800_0000h FD_F8FF_FFFFh Interrupt/EOI IntCtl
+ * FD_F900_0000h FD_F90F_FFFFh Legacy PIC IACK
+ * FD_F910_0000h FD_F91F_FFFFh System Management
+ * FD_F920_0000h FD_FAFF_FFFFh Reserved Page Tables
+ * FD_FB00_0000h FD_FBFF_FFFFh Address Translation
+ * FD_FC00_0000h FD_FDFF_FFFFh I/O Space
+ * FD_FE00_0000h FD_FFFF_FFFFh Configuration
+ * FE_0000_0000h FE_1FFF_FFFFh Extended Configuration/Device Messages
+ * FE_2000_0000h FF_FFFF_FFFFh Reserved
+ *
+ * See AMD IOMMU spec, section 2.1.2 "IOMMU Logical Topology",
+ * Table 3: Special Address Controls (GPA) for more information.
+ */
+#define AMD_HT_START 0xfd00000000UL
+#define AMD_HT_END 0xffffffffffUL
+#define AMD_ABOVE_1TB_START (AMD_HT_END + 1)
+#define AMD_HT_SIZE (AMD_ABOVE_1TB_START - AMD_HT_START)
+
void pc_memory_init(PCMachineState *pcms,
MemoryRegion *system_memory,
MemoryRegion *rom_memory,
- MemoryRegion **ram_memory)
+ MemoryRegion **ram_memory,
+ uint64_t pci_hole64_size)
{
int linux_boot, i;
MemoryRegion *option_rom_mr;
@@ -827,7 +939,9 @@ void pc_memory_init(PCMachineState *pcms,
MachineClass *mc = MACHINE_GET_CLASS(machine);
PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
X86MachineState *x86ms = X86_MACHINE(pcms);
+ hwaddr maxphysaddr, maxusedaddr;
hwaddr cxl_base, cxl_resv_end = 0;
+ X86CPU *cpu = X86_CPU(first_cpu);
assert(machine->ram_size == x86ms->below_4g_mem_size +
x86ms->above_4g_mem_size);
@@ -835,6 +949,40 @@ void pc_memory_init(PCMachineState *pcms,
linux_boot = (machine->kernel_filename != NULL);
/*
+ * The HyperTransport range close to the 1T boundary is unique to AMD
+ * hosts with IOMMUs enabled. Restrict the ram-above-4g relocation
+ * to above 1T to AMD vCPUs only. @enforce_amd_1tb_hole is only false in
+ * older machine types (<= 7.0) for compatibility purposes.
+ */
+ if (IS_AMD_CPU(&cpu->env) && pcmc->enforce_amd_1tb_hole) {
+ /* Bail out if max possible address does not cross HT range */
+ if (pc_max_used_gpa(pcms, pci_hole64_size) >= AMD_HT_START) {
+ x86ms->above_4g_mem_start = AMD_ABOVE_1TB_START;
+ }
+
+ /*
+ * Advertise the HT region if address space covers the reserved
+ * region or if we relocate.
+ */
+ if (cpu->phys_bits >= 40) {
+ e820_add_entry(AMD_HT_START, AMD_HT_SIZE, E820_RESERVED);
+ }
+ }
+
+ /*
+ * phys-bits is required to be appropriately configured
+ * to make sure max used GPA is reachable.
+ */
+ maxusedaddr = pc_max_used_gpa(pcms, pci_hole64_size);
+ maxphysaddr = ((hwaddr)1 << cpu->phys_bits) - 1;
+ if (maxphysaddr < maxusedaddr) {
+ error_report("Address space limit 0x%"PRIx64" < 0x%"PRIx64
+ " phys-bits too low (%u)",
+ maxphysaddr, maxusedaddr, cpu->phys_bits);
+ exit(EXIT_FAILURE);
+ }
+
+ /*
* Split single memory region and use aliases to address portions of it,
* done for backwards compatibility with older qemus.
*/
@@ -850,9 +998,10 @@ void pc_memory_init(PCMachineState *pcms,
machine->ram,
x86ms->below_4g_mem_size,
x86ms->above_4g_mem_size);
- memory_region_add_subregion(system_memory, 0x100000000ULL,
+ memory_region_add_subregion(system_memory, x86ms->above_4g_mem_start,
ram_above_4g);
- e820_add_entry(0x100000000ULL, x86ms->above_4g_mem_size, E820_RAM);
+ e820_add_entry(x86ms->above_4g_mem_start, x86ms->above_4g_mem_size,
+ E820_RAM);
}
if (pcms->sgx_epc.size != 0) {
@@ -874,7 +1023,7 @@ void pc_memory_init(PCMachineState *pcms,
/* initialize device memory address space */
if (pcmc->has_reserved_memory &&
(machine->ram_size < machine->maxram_size)) {
- ram_addr_t device_mem_size = machine->maxram_size - machine->ram_size;
+ ram_addr_t device_mem_size;
if (machine->ram_slots > ACPI_MAX_RAM_SLOTS) {
error_report("unsupported amount of memory slots: %"PRIu64,
@@ -889,20 +1038,7 @@ void pc_memory_init(PCMachineState *pcms,
exit(EXIT_FAILURE);
}
- if (pcms->sgx_epc.size != 0) {
- machine->device_memory->base = sgx_epc_above_4g_end(&pcms->sgx_epc);
- } else {
- machine->device_memory->base =
- 0x100000000ULL + x86ms->above_4g_mem_size;
- }
-
- machine->device_memory->base =
- ROUND_UP(machine->device_memory->base, 1 * GiB);
-
- if (pcmc->enforce_aligned_dimm) {
- /* size device region assuming 1G page max alignment per slot */
- device_mem_size += (1 * GiB) * machine->ram_slots;
- }
+ pc_get_device_memory_range(pcms, &machine->device_memory->base, &device_mem_size);
if ((machine->device_memory->base + device_mem_size) <
device_mem_size) {
@@ -921,17 +1057,7 @@ void pc_memory_init(PCMachineState *pcms,
MemoryRegion *mr = &pcms->cxl_devices_state.host_mr;
hwaddr cxl_size = MiB;
- if (pcmc->has_reserved_memory && machine->device_memory->base) {
- cxl_base = machine->device_memory->base;
- if (!pcmc->broken_reserved_end) {
- cxl_base += memory_region_size(&machine->device_memory->mr);
- }
- } else if (pcms->sgx_epc.size != 0) {
- cxl_base = sgx_epc_above_4g_end(&pcms->sgx_epc);
- } else {
- cxl_base = 0x100000000ULL + x86ms->above_4g_mem_size;
- }
-
+ cxl_base = pc_get_cxl_range_start(pcms);
e820_add_entry(cxl_base, cxl_size, E820_RESERVED);
memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size);
memory_region_add_subregion(system_memory, cxl_base, mr);
@@ -1016,28 +1142,18 @@ uint64_t pc_pci_hole64_start(void)
PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
MachineState *ms = MACHINE(pcms);
- X86MachineState *x86ms = X86_MACHINE(pcms);
uint64_t hole64_start = 0;
+ ram_addr_t size = 0;
- if (pcms->cxl_devices_state.host_mr.addr) {
- hole64_start = pcms->cxl_devices_state.host_mr.addr +
- memory_region_size(&pcms->cxl_devices_state.host_mr);
- if (pcms->cxl_devices_state.fixed_windows) {
- GList *it;
- for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) {
- CXLFixedWindow *fw = it->data;
- hole64_start = fw->mr.addr + memory_region_size(&fw->mr);
- }
- }
- } else if (pcmc->has_reserved_memory && ms->device_memory->base) {
- hole64_start = ms->device_memory->base;
+ if (pcms->cxl_devices_state.is_enabled) {
+ hole64_start = pc_get_cxl_range_end(pcms);
+ } else if (pcmc->has_reserved_memory && (ms->ram_size < ms->maxram_size)) {
+ pc_get_device_memory_range(pcms, &hole64_start, &size);
if (!pcmc->broken_reserved_end) {
- hole64_start += memory_region_size(&ms->device_memory->mr);
+ hole64_start += size;
}
- } else if (pcms->sgx_epc.size != 0) {
- hole64_start = sgx_epc_above_4g_end(&pcms->sgx_epc);
} else {
- hole64_start = 0x100000000ULL + x86ms->above_4g_mem_size;
+ hole64_start = pc_above_4g_end(pcms);
}
return ROUND_UP(hole64_start, 1 * GiB);
@@ -1787,6 +1903,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
pcmc->has_reserved_memory = true;
pcmc->kvmclock_enabled = true;
pcmc->enforce_aligned_dimm = true;
+ pcmc->enforce_amd_1tb_hole = true;
/* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported
* to be used at the moment, 32K should be enough for a while. */
pcmc->acpi_data_size = 0x20000 + 0x8000;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index fbf9465318..a5c65c1c35 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -91,6 +91,8 @@ static void pc_init1(MachineState *machine,
MemoryRegion *pci_memory;
MemoryRegion *rom_memory;
ram_addr_t lowmem;
+ uint64_t hole64_size;
+ DeviceState *i440fx_host;
/*
* Calculate ram split, for memory below and above 4G. It's a bit
@@ -164,9 +166,15 @@ static void pc_init1(MachineState *machine,
pci_memory = g_new(MemoryRegion, 1);
memory_region_init(pci_memory, NULL, "pci", UINT64_MAX);
rom_memory = pci_memory;
+ i440fx_host = qdev_new(host_type);
+ hole64_size = object_property_get_uint(OBJECT(i440fx_host),
+ PCI_HOST_PROP_PCI_HOLE64_SIZE,
+ &error_abort);
} else {
pci_memory = NULL;
rom_memory = system_memory;
+ i440fx_host = NULL;
+ hole64_size = 0;
}
pc_guest_info_init(pcms);
@@ -183,7 +191,7 @@ static void pc_init1(MachineState *machine,
/* allocate ram and load rom/bios */
if (!xen_enabled()) {
pc_memory_init(pcms, system_memory,
- rom_memory, &ram_memory);
+ rom_memory, &ram_memory, hole64_size);
} else {
pc_system_flash_cleanup_unused(pcms);
if (machine->kernel_filename != NULL) {
@@ -200,8 +208,8 @@ static void pc_init1(MachineState *machine,
const char *type = xen_enabled() ? TYPE_PIIX3_XEN_DEVICE
: TYPE_PIIX3_DEVICE;
- pci_bus = i440fx_init(host_type,
- pci_type,
+ pci_bus = i440fx_init(pci_type,
+ i440fx_host,
system_memory, system_io, machine->ram_size,
x86ms->below_4g_mem_size,
x86ms->above_4g_mem_size,
@@ -443,6 +451,7 @@ static void pc_i440fx_7_0_machine_options(MachineClass *m)
m->alias = NULL;
m->is_default = false;
pcmc->legacy_no_rng_seed = true;
+ pcmc->enforce_amd_1tb_hole = false;
compat_props_add(m->compat_props, hw_compat_7_0, hw_compat_7_0_len);
compat_props_add(m->compat_props, pc_compat_7_0, pc_compat_7_0_len);
}
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 12cc76aaf8..3a35193ff7 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -138,6 +138,7 @@ static void pc_q35_init(MachineState *machine)
MachineClass *mc = MACHINE_GET_CLASS(machine);
bool acpi_pcihp;
bool keep_pci_slot_hpc;
+ uint64_t pci_hole64_size = 0;
/* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
* and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
@@ -203,12 +204,19 @@ static void pc_q35_init(MachineState *machine)
pcms->smbios_entry_point_type);
}
- /* allocate ram and load rom/bios */
- pc_memory_init(pcms, get_system_memory(), rom_memory, &ram_memory);
-
/* create pci host bus */
q35_host = Q35_HOST_DEVICE(qdev_new(TYPE_Q35_HOST_DEVICE));
+ if (pcmc->pci_enabled) {
+ pci_hole64_size = object_property_get_uint(OBJECT(q35_host),
+ PCI_HOST_PROP_PCI_HOLE64_SIZE,
+ &error_abort);
+ }
+
+ /* allocate ram and load rom/bios */
+ pc_memory_init(pcms, get_system_memory(), rom_memory, &ram_memory,
+ pci_hole64_size);
+
object_property_add_child(qdev_get_machine(), "q35", OBJECT(q35_host));
object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_RAM_MEM,
OBJECT(ram_memory), NULL);
@@ -379,6 +387,7 @@ static void pc_q35_7_0_machine_options(MachineClass *m)
pc_q35_7_1_machine_options(m);
m->alias = NULL;
pcmc->legacy_no_rng_seed = true;
+ pcmc->enforce_amd_1tb_hole = false;
compat_props_add(m->compat_props, hw_compat_7_0, hw_compat_7_0_len);
compat_props_add(m->compat_props, pc_compat_7_0, pc_compat_7_0_len);
}
diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c
index a44d66ba2a..09d9c7c73d 100644
--- a/hw/i386/sgx.c
+++ b/hw/i386/sgx.c
@@ -295,7 +295,7 @@ void pc_machine_init_sgx_epc(PCMachineState *pcms)
return;
}
- sgx_epc->base = 0x100000000ULL + x86ms->above_4g_mem_size;
+ sgx_epc->base = x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
memory_region_init(&sgx_epc->mr, OBJECT(pcms), "sgx-epc", UINT64_MAX);
memory_region_add_subregion(get_system_memory(), sgx_epc->base,
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index ecea25d249..050eedc0c8 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -1391,6 +1391,7 @@ static void x86_machine_initfn(Object *obj)
x86ms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
x86ms->bus_lock_ratelimit = 0;
+ x86ms->above_4g_mem_start = 4 * GiB;
}
static void x86_machine_class_init(ObjectClass *oc, void *data)
diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c
index 1c5ad5f918..d5426ef4a5 100644
--- a/hw/pci-host/i440fx.c
+++ b/hw/pci-host/i440fx.c
@@ -237,7 +237,8 @@ static void i440fx_realize(PCIDevice *dev, Error **errp)
}
}
-PCIBus *i440fx_init(const char *host_type, const char *pci_type,
+PCIBus *i440fx_init(const char *pci_type,
+ DeviceState *dev,
MemoryRegion *address_space_mem,
MemoryRegion *address_space_io,
ram_addr_t ram_size,
@@ -246,7 +247,6 @@ PCIBus *i440fx_init(const char *host_type, const char *pci_type,
MemoryRegion *pci_address_space,
MemoryRegion *ram_memory)
{
- DeviceState *dev;
PCIBus *b;
PCIDevice *d;
PCIHostState *s;
@@ -254,7 +254,6 @@ PCIBus *i440fx_init(const char *host_type, const char *pci_type,
unsigned i;
I440FXState *i440fx;
- dev = qdev_new(host_type);
s = PCI_HOST_BRIDGE(dev);
b = pci_root_bus_new(dev, NULL, pci_address_space,
address_space_io, 0, TYPE_PCI_BUS);
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index 281152d338..62e07ec2e4 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -197,6 +197,32 @@ static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
}
}
+static void virtio_iommu_notify_map_unmap(IOMMUMemoryRegion *mr,
+ IOMMUTLBEvent *event,
+ hwaddr virt_start, hwaddr virt_end)
+{
+ uint64_t delta = virt_end - virt_start;
+
+ event->entry.iova = virt_start;
+ event->entry.addr_mask = delta;
+
+ if (delta == UINT64_MAX) {
+ memory_region_notify_iommu(mr, 0, *event);
+ }
+
+ while (virt_start != virt_end + 1) {
+ uint64_t mask = dma_aligned_pow2_mask(virt_start, virt_end, 64);
+
+ event->entry.addr_mask = mask;
+ event->entry.iova = virt_start;
+ memory_region_notify_iommu(mr, 0, *event);
+ virt_start += mask + 1;
+ if (event->entry.perm != IOMMU_NONE) {
+ event->entry.translated_addr += mask + 1;
+ }
+ }
+}
+
static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start,
hwaddr virt_end, hwaddr paddr,
uint32_t flags)
@@ -215,19 +241,16 @@ static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start,
event.type = IOMMU_NOTIFIER_MAP;
event.entry.target_as = &address_space_memory;
- event.entry.addr_mask = virt_end - virt_start;
- event.entry.iova = virt_start;
event.entry.perm = perm;
event.entry.translated_addr = paddr;
- memory_region_notify_iommu(mr, 0, event);
+ virtio_iommu_notify_map_unmap(mr, &event, virt_start, virt_end);
}
static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start,
hwaddr virt_end)
{
IOMMUTLBEvent event;
- uint64_t delta = virt_end - virt_start;
if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_UNMAP)) {
return;
@@ -239,22 +262,8 @@ static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start,
event.entry.target_as = &address_space_memory;
event.entry.perm = IOMMU_NONE;
event.entry.translated_addr = 0;
- event.entry.addr_mask = delta;
- event.entry.iova = virt_start;
-
- if (delta == UINT64_MAX) {
- memory_region_notify_iommu(mr, 0, event);
- }
-
- while (virt_start != virt_end + 1) {
- uint64_t mask = dma_aligned_pow2_mask(virt_start, virt_end, 64);
-
- event.entry.addr_mask = mask;
- event.entry.iova = virt_start;
- memory_region_notify_iommu(mr, 0, event);
- virt_start += mask + 1;
- }
+ virtio_iommu_notify_map_unmap(mr, &event, virt_start, virt_end);
}
static gboolean virtio_iommu_notify_unmap_cb(gpointer key, gpointer value,
diff --git a/include/hw/boards.h b/include/hw/boards.h
index d94edcef28..7b416c9787 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -360,7 +360,6 @@ struct MachineState {
CpuTopology smp;
struct NVDIMMState *nvdimms_state;
struct NumaState *numa_state;
- CXLFixedMemoryWindowOptionsList *cfmws_list;
};
#define DEFINE_MACHINE(namestr, machine_initfn) \
diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h
index 70b5018156..94ec2f07d7 100644
--- a/include/hw/cxl/cxl_component.h
+++ b/include/hw/cxl/cxl_component.h
@@ -215,7 +215,7 @@ uint8_t cxl_interleave_granularity_enc(uint64_t gran, Error **errp);
static inline hwaddr cxl_decode_ig(int ig)
{
- return 1 << (ig + 8);
+ return 1ULL << (ig + 8);
}
CXLComponentState *cxl_get_hb_cstate(PCIHostState *hb);
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 2a8ffbcfa8..8435733bd6 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -118,6 +118,7 @@ struct PCMachineClass {
bool has_reserved_memory;
bool enforce_aligned_dimm;
bool broken_reserved_end;
+ bool enforce_amd_1tb_hole;
/* generate legacy CPU hotplug AML */
bool legacy_cpu_hotplug;
@@ -162,7 +163,8 @@ void xen_load_linux(PCMachineState *pcms);
void pc_memory_init(PCMachineState *pcms,
MemoryRegion *system_memory,
MemoryRegion *rom_memory,
- MemoryRegion **ram_memory);
+ MemoryRegion **ram_memory,
+ uint64_t pci_hole64_size);
uint64_t pc_pci_hole64_start(void);
DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus);
void pc_basic_device_init(struct PCMachineState *pcms,
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index 6bdf1f6ab2..62fa5774f8 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -56,6 +56,9 @@ struct X86MachineState {
/* RAM information (sizes, addresses, configuration): */
ram_addr_t below_4g_mem_size, above_4g_mem_size;
+ /* Start address of the initial RAM above 4G */
+ uint64_t above_4g_mem_start;
+
/* CPU and apic information: */
bool apic_xrupt_override;
unsigned pci_irq_mask;
diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
index cf8f59be44..acf887c83d 100644
--- a/include/hw/mem/nvdimm.h
+++ b/include/hw/mem/nvdimm.h
@@ -29,14 +29,6 @@
#include "hw/acpi/aml-build.h"
#include "qom/object.h"
-#define NVDIMM_DEBUG 0
-#define nvdimm_debug(fmt, ...) \
- do { \
- if (NVDIMM_DEBUG) { \
- fprintf(stderr, "nvdimm: " fmt, ## __VA_ARGS__); \
- } \
- } while (0)
-
/*
* The minimum label data size is required by NVDIMM Namespace
* specification, see the chapter 2 Namespaces:
diff --git a/include/hw/pci-host/i440fx.h b/include/hw/pci-host/i440fx.h
index 52518dbf08..d02bf1ed6b 100644
--- a/include/hw/pci-host/i440fx.h
+++ b/include/hw/pci-host/i440fx.h
@@ -35,7 +35,8 @@ struct PCII440FXState {
#define TYPE_IGD_PASSTHROUGH_I440FX_PCI_DEVICE "igd-passthrough-i440FX"
-PCIBus *i440fx_init(const char *host_type, const char *pci_type,
+PCIBus *i440fx_init(const char *pci_type,
+ DeviceState *dev,
MemoryRegion *address_space_mem,
MemoryRegion *address_space_io,
ram_addr_t ram_size,