diff --git a/block/nbd-client.c b/block/nbd-client.c index 1e2952fdae..87d19c7253 100644 --- a/block/nbd-client.c +++ b/block/nbd-client.c @@ -28,6 +28,7 @@ */ #include "qemu/osdep.h" +#include "qapi/error.h" #include "nbd-client.h" #define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs)) @@ -70,10 +71,14 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque) NBDClientSession *s = opaque; uint64_t i; int ret; + Error *local_err = NULL; for (;;) { assert(s->reply.handle == 0); - ret = nbd_receive_reply(s->ioc, &s->reply); + ret = nbd_receive_reply(s->ioc, &s->reply, &local_err); + if (ret < 0) { + error_report_err(local_err); + } if (ret <= 0) { break; } @@ -114,6 +119,10 @@ static int nbd_co_send_request(BlockDriverState *bs, int rc, ret, i; qemu_co_mutex_lock(&s->send_mutex); + while (s->in_flight == MAX_NBD_REQUESTS) { + qemu_co_queue_wait(&s->free_sema, &s->send_mutex); + } + s->in_flight++; for (i = 0; i < MAX_NBD_REQUESTS; i++) { if (s->recv_coroutine[i] == NULL) { @@ -136,7 +145,7 @@ static int nbd_co_send_request(BlockDriverState *bs, rc = nbd_send_request(s->ioc, request); if (rc >= 0) { ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len, - false); + false, NULL); if (ret != request->len) { rc = -EIO; } @@ -165,7 +174,7 @@ static void nbd_co_receive_reply(NBDClientSession *s, } else { if (qiov && reply->error == 0) { ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len, - true); + true, NULL); if (ret != request->len) { reply->error = EIO; } @@ -176,20 +185,6 @@ static void nbd_co_receive_reply(NBDClientSession *s, } } -static void nbd_coroutine_start(NBDClientSession *s, - NBDRequest *request) -{ - /* Poor man semaphore. The free_sema is locked when no other request - * can be accepted, and unlocked after receiving one reply. */ - if (s->in_flight == MAX_NBD_REQUESTS) { - qemu_co_queue_wait(&s->free_sema, NULL); - assert(s->in_flight < MAX_NBD_REQUESTS); - } - s->in_flight++; - - /* s->recv_coroutine[i] is set as soon as we get the send_lock. */ -} - static void nbd_coroutine_end(BlockDriverState *bs, NBDRequest *request) { @@ -197,13 +192,16 @@ static void nbd_coroutine_end(BlockDriverState *bs, int i = HANDLE_TO_INDEX(s, request->handle); s->recv_coroutine[i] = NULL; - s->in_flight--; - qemu_co_queue_next(&s->free_sema); /* Kick the read_reply_co to get the next reply. */ if (s->read_reply_co) { aio_co_wake(s->read_reply_co); } + + qemu_co_mutex_lock(&s->send_mutex); + s->in_flight--; + qemu_co_queue_next(&s->free_sema); + qemu_co_mutex_unlock(&s->send_mutex); } int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset, @@ -221,7 +219,6 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset, assert(bytes <= NBD_MAX_BUFFER_SIZE); assert(!flags); - nbd_coroutine_start(client, &request); ret = nbd_co_send_request(bs, &request, NULL); if (ret < 0) { reply.error = -ret; @@ -251,7 +248,6 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, assert(bytes <= NBD_MAX_BUFFER_SIZE); - nbd_coroutine_start(client, &request); ret = nbd_co_send_request(bs, &request, qiov); if (ret < 0) { reply.error = -ret; @@ -286,7 +282,6 @@ int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, request.flags |= NBD_CMD_FLAG_NO_HOLE; } - nbd_coroutine_start(client, &request); ret = nbd_co_send_request(bs, &request, NULL); if (ret < 0) { reply.error = -ret; @@ -311,7 +306,6 @@ int nbd_client_co_flush(BlockDriverState *bs) request.from = 0; request.len = 0; - nbd_coroutine_start(client, &request); ret = nbd_co_send_request(bs, &request, NULL); if (ret < 0) { reply.error = -ret; @@ -337,7 +331,6 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count) return 0; } - nbd_coroutine_start(client, &request); ret = nbd_co_send_request(bs, &request, NULL); if (ret < 0) { reply.error = -ret; diff --git a/configure b/configure index 13e040d28c..71f5612a65 100755 --- a/configure +++ b/configure @@ -6027,11 +6027,11 @@ TARGET_ABI_DIR="" case "$target_name" in i386) - gdb_xml_files="i386-32bit-core.xml" + gdb_xml_files="i386-32bit.xml i386-32bit-core.xml i386-32bit-sse.xml" ;; x86_64) TARGET_BASE_ARCH=i386 - gdb_xml_files="i386-64bit-core.xml" + gdb_xml_files="i386-64bit.xml i386-64bit-core.xml i386-64bit-sse.xml" ;; alpha) mttcg="yes" diff --git a/cpus.c b/cpus.c index 6398439946..14bb8d552e 100644 --- a/cpus.c +++ b/cpus.c @@ -677,9 +677,9 @@ static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque) sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS); qemu_mutex_unlock_iothread(); - atomic_set(&cpu->throttle_thread_scheduled, 0); g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */ qemu_mutex_lock_iothread(); + atomic_set(&cpu->throttle_thread_scheduled, 0); } static void cpu_throttle_timer_tick(void *opaque) diff --git a/docs/ich9-ehci-uhci.cfg b/docs/config/ich9-ehci-uhci.cfg similarity index 100% rename from docs/ich9-ehci-uhci.cfg rename to docs/config/ich9-ehci-uhci.cfg diff --git a/docs/mach-virt-graphical.cfg b/docs/config/mach-virt-graphical.cfg similarity index 100% rename from docs/mach-virt-graphical.cfg rename to docs/config/mach-virt-graphical.cfg diff --git a/docs/mach-virt-serial.cfg b/docs/config/mach-virt-serial.cfg similarity index 100% rename from docs/mach-virt-serial.cfg rename to docs/config/mach-virt-serial.cfg diff --git a/docs/q35-emulated.cfg b/docs/config/q35-emulated.cfg similarity index 100% rename from docs/q35-emulated.cfg rename to docs/config/q35-emulated.cfg diff --git a/docs/q35-virtio-graphical.cfg b/docs/config/q35-virtio-graphical.cfg similarity index 100% rename from docs/q35-virtio-graphical.cfg rename to docs/config/q35-virtio-graphical.cfg diff --git a/docs/q35-virtio-serial.cfg b/docs/config/q35-virtio-serial.cfg similarity index 100% rename from docs/q35-virtio-serial.cfg rename to docs/config/q35-virtio-serial.cfg diff --git a/docs/atomics.txt b/docs/devel/atomics.txt similarity index 100% rename from docs/atomics.txt rename to docs/devel/atomics.txt diff --git a/docs/bitmaps.md b/docs/devel/bitmaps.md similarity index 100% rename from docs/bitmaps.md rename to docs/devel/bitmaps.md diff --git a/docs/blkdebug.txt b/docs/devel/blkdebug.txt similarity index 100% rename from docs/blkdebug.txt rename to docs/devel/blkdebug.txt diff --git a/docs/blkverify.txt b/docs/devel/blkverify.txt similarity index 100% rename from docs/blkverify.txt rename to docs/devel/blkverify.txt diff --git a/docs/build-system.txt b/docs/devel/build-system.txt similarity index 100% rename from docs/build-system.txt rename to docs/devel/build-system.txt diff --git a/docs/lockcnt.txt b/docs/devel/lockcnt.txt similarity index 100% rename from docs/lockcnt.txt rename to docs/devel/lockcnt.txt diff --git a/docs/memory.txt b/docs/devel/memory.txt similarity index 100% rename from docs/memory.txt rename to docs/devel/memory.txt diff --git a/docs/migration.txt b/docs/devel/migration.txt similarity index 100% rename from docs/migration.txt rename to docs/devel/migration.txt diff --git a/docs/multi-thread-tcg.txt b/docs/devel/multi-thread-tcg.txt similarity index 100% rename from docs/multi-thread-tcg.txt rename to docs/devel/multi-thread-tcg.txt diff --git a/docs/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt similarity index 100% rename from docs/multiple-iothreads.txt rename to docs/devel/multiple-iothreads.txt diff --git a/docs/qapi-code-gen.txt b/docs/devel/qapi-code-gen.txt similarity index 100% rename from docs/qapi-code-gen.txt rename to docs/devel/qapi-code-gen.txt diff --git a/docs/rcu.txt b/docs/devel/rcu.txt similarity index 100% rename from docs/rcu.txt rename to docs/devel/rcu.txt diff --git a/docs/tracing.txt b/docs/devel/tracing.txt similarity index 100% rename from docs/tracing.txt rename to docs/devel/tracing.txt diff --git a/docs/virtio-migration.txt b/docs/devel/virtio-migration.txt similarity index 100% rename from docs/virtio-migration.txt rename to docs/devel/virtio-migration.txt diff --git a/docs/writing-qmp-commands.txt b/docs/devel/writing-qmp-commands.txt similarity index 100% rename from docs/writing-qmp-commands.txt rename to docs/devel/writing-qmp-commands.txt diff --git a/docs/aio_notify.promela b/docs/spin/aio_notify.promela similarity index 100% rename from docs/aio_notify.promela rename to docs/spin/aio_notify.promela diff --git a/docs/aio_notify_accept.promela b/docs/spin/aio_notify_accept.promela similarity index 100% rename from docs/aio_notify_accept.promela rename to docs/spin/aio_notify_accept.promela diff --git a/docs/aio_notify_bug.promela b/docs/spin/aio_notify_bug.promela similarity index 100% rename from docs/aio_notify_bug.promela rename to docs/spin/aio_notify_bug.promela diff --git a/docs/tcg-exclusive.promela b/docs/spin/tcg-exclusive.promela similarity index 100% rename from docs/tcg-exclusive.promela rename to docs/spin/tcg-exclusive.promela diff --git a/docs/win32-qemu-event.promela b/docs/spin/win32-qemu-event.promela similarity index 100% rename from docs/win32-qemu-event.promela rename to docs/spin/win32-qemu-event.promela diff --git a/exec.c b/exec.c index b1db12fe36..a93e209625 100644 --- a/exec.c +++ b/exec.c @@ -374,10 +374,11 @@ static inline bool section_covers_addr(const MemoryRegionSection *section, int128_getlo(section->size), addr); } -static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr, - Node *nodes, MemoryRegionSection *sections) +static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr addr) { - PhysPageEntry *p; + PhysPageEntry lp = d->phys_map, *p; + Node *nodes = d->map.nodes; + MemoryRegionSection *sections = d->map.sections; hwaddr index = addr >> TARGET_PAGE_BITS; int i; @@ -415,8 +416,7 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, section_covers_addr(section, addr)) { update = false; } else { - section = phys_page_find(d->phys_map, addr, d->map.nodes, - d->map.sections); + section = phys_page_find(d, addr); update = true; } if (resolve_subpage && section->mr->subpage) { @@ -1285,8 +1285,7 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti subpage_t *subpage; hwaddr base = section->offset_within_address_space & TARGET_PAGE_MASK; - MemoryRegionSection *existing = phys_page_find(d->phys_map, base, - d->map.nodes, d->map.sections); + MemoryRegionSection *existing = phys_page_find(d, base); MemoryRegionSection subsection = { .offset_within_address_space = base, .size = int128_make64(TARGET_PAGE_SIZE), diff --git a/gdb-xml/i386-32bit-sse.xml b/gdb-xml/i386-32bit-sse.xml new file mode 100644 index 0000000000..57678473d6 --- /dev/null +++ b/gdb-xml/i386-32bit-sse.xml @@ -0,0 +1,52 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/gdb-xml/i386-32bit.xml b/gdb-xml/i386-32bit.xml new file mode 100644 index 0000000000..956fc7f45f --- /dev/null +++ b/gdb-xml/i386-32bit.xml @@ -0,0 +1,14 @@ + + + + + + + + + + diff --git a/gdb-xml/i386-64bit-sse.xml b/gdb-xml/i386-64bit-sse.xml new file mode 100644 index 0000000000..e86efc9ce5 --- /dev/null +++ b/gdb-xml/i386-64bit-sse.xml @@ -0,0 +1,60 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/gdb-xml/i386-64bit.xml b/gdb-xml/i386-64bit.xml new file mode 100644 index 0000000000..0b2f00ccbe --- /dev/null +++ b/gdb-xml/i386-64bit.xml @@ -0,0 +1,14 @@ + + + + + + + + + + diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c index 13eca374cd..363d1b5743 100644 --- a/hw/i386/kvm/clock.c +++ b/hw/i386/kvm/clock.c @@ -19,6 +19,7 @@ #include "qemu/host-utils.h" #include "sysemu/sysemu.h" #include "sysemu/kvm.h" +#include "sysemu/hw_accel.h" #include "kvm_i386.h" #include "hw/sysbus.h" #include "hw/kvm/clock.h" @@ -69,6 +70,8 @@ static uint64_t kvmclock_current_nsec(KVMClockState *s) uint64_t nsec_hi; uint64_t nsec; + cpu_synchronize_state(cpu); + if (!(env->system_time_msr & 1ULL)) { /* KVM clock not active */ return 0; diff --git a/hw/misc/edu.c b/hw/misc/edu.c index 401039c100..01acacf142 100644 --- a/hw/misc/edu.c +++ b/hw/misc/edu.c @@ -343,6 +343,12 @@ static void pci_edu_realize(PCIDevice *pdev, Error **errp) EduState *edu = DO_UPCAST(EduState, pdev, pdev); uint8_t *pci_conf = pdev->config; + pci_config_set_interrupt_pin(pci_conf, 1); + + if (msi_init(pdev, 0, 1, true, false, errp)) { + return; + } + timer_init_ms(&edu->dma_timer, QEMU_CLOCK_VIRTUAL, edu_dma_timer, edu); qemu_mutex_init(&edu->thr_mutex); @@ -350,12 +356,6 @@ static void pci_edu_realize(PCIDevice *pdev, Error **errp) qemu_thread_create(&edu->thread, "edu", edu_fact_thread, edu, QEMU_THREAD_JOINABLE); - pci_config_set_interrupt_pin(pci_conf, 1); - - if (msi_init(pdev, 0, 1, true, false, errp)) { - return; - } - memory_region_init_io(&edu->mmio, OBJECT(edu), &edu_mmio_ops, edu, "edu-mmio", 1 << 20); pci_register_bar(pdev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &edu->mmio); diff --git a/hw/pci/msix.c b/hw/pci/msix.c index bb54e8b0ac..fc5fe511b3 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -22,6 +22,7 @@ #include "hw/xen/xen.h" #include "qemu/range.h" #include "qapi/error.h" +#include "trace.h" #define MSIX_CAP_LENGTH 12 @@ -130,10 +131,14 @@ static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked) } } +static bool msix_masked(PCIDevice *dev) +{ + return dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK; +} + static void msix_update_function_masked(PCIDevice *dev) { - dev->msix_function_masked = !msix_enabled(dev) || - (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK); + dev->msix_function_masked = !msix_enabled(dev) || msix_masked(dev); } /* Handle MSI-X capability config write. */ @@ -148,6 +153,8 @@ void msix_write_config(PCIDevice *dev, uint32_t addr, return; } + trace_msix_write_config(dev->name, msix_enabled(dev), msix_masked(dev)); + was_masked = dev->msix_function_masked; msix_update_function_masked(dev); diff --git a/hw/pci/trace-events b/hw/pci/trace-events index 2b9cf24405..83c8f5ace7 100644 --- a/hw/pci/trace-events +++ b/hw/pci/trace-events @@ -7,3 +7,6 @@ pci_update_mappings_add(void *d, uint32_t bus, uint32_t slot, uint32_t func, int # hw/pci/pci_host.c pci_cfg_read(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x -> 0x%x" pci_cfg_write(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x <- 0x%x" + +# hw/pci/msix.c +msix_write_config(char *name, bool enabled, bool masked) "dev %s enabled %d masked %d" diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 46a3e3f280..f46f06d055 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -918,6 +918,9 @@ void virtio_scsi_common_unrealize(DeviceState *dev, Error **errp) static void virtio_scsi_device_unrealize(DeviceState *dev, Error **errp) { + VirtIOSCSI *s = VIRTIO_SCSI(dev); + + qbus_set_hotplug_handler(BUS(&s->bus), NULL, &error_abort); virtio_scsi_common_unrealize(dev, errp); } diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c index 93de3e1cc5..1b8d3d7d4c 100644 --- a/hw/timer/mc146818rtc.c +++ b/hw/timer/mc146818rtc.c @@ -112,7 +112,6 @@ static uint64_t get_guest_rtc_ns(RTCState *s) guest_clock - s->last_update + s->offset; } -#ifdef TARGET_I386 static void rtc_coalesced_timer_update(RTCState *s) { if (s->irq_coalesced == 0) { @@ -121,21 +120,39 @@ static void rtc_coalesced_timer_update(RTCState *s) /* divide each RTC interval to 2 - 8 smaller intervals */ int c = MIN(s->irq_coalesced, 7) + 1; int64_t next_clock = qemu_clock_get_ns(rtc_clock) + - muldiv64(s->period / c, NANOSECONDS_PER_SECOND, RTC_CLOCK_RATE); + periodic_clock_to_ns(s->period / c); timer_mod(s->coalesced_timer, next_clock); } } +static QLIST_HEAD(, RTCState) rtc_devices = + QLIST_HEAD_INITIALIZER(rtc_devices); + +#ifdef TARGET_I386 +void qmp_rtc_reset_reinjection(Error **errp) +{ + RTCState *s; + + QLIST_FOREACH(s, &rtc_devices, link) { + s->irq_coalesced = 0; + } +} + +static bool rtc_policy_slew_deliver_irq(RTCState *s) +{ + apic_reset_irq_delivered(); + qemu_irq_raise(s->irq); + return apic_get_irq_delivered(); +} + static void rtc_coalesced_timer(void *opaque) { RTCState *s = opaque; if (s->irq_coalesced != 0) { - apic_reset_irq_delivered(); s->cmos_data[RTC_REG_C] |= 0xc0; DPRINTF_C("cmos: injecting from timer\n"); - qemu_irq_raise(s->irq); - if (apic_get_irq_delivered()) { + if (rtc_policy_slew_deliver_irq(s)) { s->irq_coalesced--; DPRINTF_C("cmos: coalesced irqs decreased to %d\n", s->irq_coalesced); @@ -144,40 +161,101 @@ static void rtc_coalesced_timer(void *opaque) rtc_coalesced_timer_update(s); } +#else +static bool rtc_policy_slew_deliver_irq(RTCState *s) +{ + assert(0); + return false; +} #endif -/* handle periodic timer */ -static void periodic_timer_update(RTCState *s, int64_t current_time) +static uint32_t rtc_periodic_clock_ticks(RTCState *s) { - int period_code, period; - int64_t cur_clock, next_irq_clock; + int period_code; + + if (!(s->cmos_data[RTC_REG_B] & REG_B_PIE)) { + return 0; + } period_code = s->cmos_data[RTC_REG_A] & 0x0f; - if (period_code != 0 - && (s->cmos_data[RTC_REG_B] & REG_B_PIE)) { - if (period_code <= 2) - period_code += 7; - /* period in 32 Khz cycles */ - period = 1 << (period_code - 1); -#ifdef TARGET_I386 - if (period != s->period) { - s->irq_coalesced = (s->irq_coalesced * s->period) / period; - DPRINTF_C("cmos: coalesced irqs scaled to %d\n", s->irq_coalesced); - } - s->period = period; -#endif + + return periodic_period_to_clock(period_code); +} + +/* + * handle periodic timer. @old_period indicates the periodic timer update + * is just due to period adjustment. + */ +static void +periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period) +{ + uint32_t period; + int64_t cur_clock, next_irq_clock, lost_clock = 0; + + period = rtc_periodic_clock_ticks(s); + + if (period) { /* compute 32 khz clock */ cur_clock = muldiv64(current_time, RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND); - next_irq_clock = (cur_clock & ~(period - 1)) + period; - s->next_periodic_time = muldiv64(next_irq_clock, NANOSECONDS_PER_SECOND, - RTC_CLOCK_RATE) + 1; + /* + * if the periodic timer's update is due to period re-configuration, + * we should count the clock since last interrupt. + */ + if (old_period) { + int64_t last_periodic_clock, next_periodic_clock; + + next_periodic_clock = muldiv64(s->next_periodic_time, + RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND); + last_periodic_clock = next_periodic_clock - old_period; + lost_clock = cur_clock - last_periodic_clock; + assert(lost_clock >= 0); + } + + /* + * s->irq_coalesced can change for two reasons: + * + * a) if one or more periodic timer interrupts have been lost, + * lost_clock will be more that a period. + * + * b) when the period may be reconfigured, we expect the OS to + * treat delayed tick as the new period. So, when switching + * from a shorter to a longer period, scale down the missing, + * because the OS will treat past delayed ticks as longer + * (leftovers are put back into lost_clock). When switching + * to a shorter period, scale up the missing ticks since the + * OS handler will treat past delayed ticks as shorter. + */ + if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) { + uint32_t old_irq_coalesced = s->irq_coalesced; + + s->period = period; + lost_clock += old_irq_coalesced * old_period; + s->irq_coalesced = lost_clock / s->period; + lost_clock %= s->period; + if (old_irq_coalesced != s->irq_coalesced || + old_period != s->period) { + DPRINTF_C("cmos: coalesced irqs scaled from %d to %d, " + "period scaled from %d to %d\n", old_irq_coalesced, + s->irq_coalesced, old_period, s->period); + rtc_coalesced_timer_update(s); + } + } else { + /* + * no way to compensate the interrupt if LOST_TICK_POLICY_SLEW + * is not used, we should make the time progress anyway. + */ + lost_clock = MIN(lost_clock, period); + } + + assert(lost_clock >= 0 && lost_clock <= period); + + next_irq_clock = cur_clock + period - lost_clock; + s->next_periodic_time = periodic_clock_to_ns(next_irq_clock) + 1; timer_mod(s->periodic_timer, s->next_periodic_time); } else { -#ifdef TARGET_I386 s->irq_coalesced = 0; -#endif timer_del(s->periodic_timer); } } @@ -186,25 +264,21 @@ static void rtc_periodic_timer(void *opaque) { RTCState *s = opaque; - periodic_timer_update(s, s->next_periodic_time); + periodic_timer_update(s, s->next_periodic_time, 0); s->cmos_data[RTC_REG_C] |= REG_C_PF; if (s->cmos_data[RTC_REG_B] & REG_B_PIE) { s->cmos_data[RTC_REG_C] |= REG_C_IRQF; -#ifdef TARGET_I386 if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) { if (s->irq_reinject_on_ack_count >= RTC_REINJECT_ON_ACK_COUNT) - s->irq_reinject_on_ack_count = 0; - apic_reset_irq_delivered(); - qemu_irq_raise(s->irq); - if (!apic_get_irq_delivered()) { + s->irq_reinject_on_ack_count = 0; + if (!rtc_policy_slew_deliver_irq(s)) { s->irq_coalesced++; rtc_coalesced_timer_update(s); DPRINTF_C("cmos: coalesced irqs increased to %d\n", s->irq_coalesced); } } else -#endif - qemu_irq_raise(s->irq); + qemu_irq_raise(s->irq); } } @@ -391,6 +465,8 @@ static void cmos_ioport_write(void *opaque, hwaddr addr, uint64_t data, unsigned size) { RTCState *s = opaque; + uint32_t old_period; + bool update_periodic_timer; if ((addr & 1) == 0) { s->cmos_index = data & 0x7f; @@ -423,6 +499,9 @@ static void cmos_ioport_write(void *opaque, hwaddr addr, } break; case RTC_REG_A: + update_periodic_timer = (s->cmos_data[RTC_REG_A] ^ data) & 0x0f; + old_period = rtc_periodic_clock_ticks(s); + if ((data & 0x60) == 0x60) { if (rtc_running(s)) { rtc_update_time(s); @@ -445,10 +524,19 @@ static void cmos_ioport_write(void *opaque, hwaddr addr, /* UIP bit is read only */ s->cmos_data[RTC_REG_A] = (data & ~REG_A_UIP) | (s->cmos_data[RTC_REG_A] & REG_A_UIP); - periodic_timer_update(s, qemu_clock_get_ns(rtc_clock)); + + if (update_periodic_timer) { + periodic_timer_update(s, qemu_clock_get_ns(rtc_clock), + old_period); + } + check_update_timer(s); break; case RTC_REG_B: + update_periodic_timer = (s->cmos_data[RTC_REG_B] ^ data) + & REG_B_PIE; + old_period = rtc_periodic_clock_ticks(s); + if (data & REG_B_SET) { /* update cmos to when the rtc was stopping */ if (rtc_running(s)) { @@ -475,7 +563,12 @@ static void cmos_ioport_write(void *opaque, hwaddr addr, qemu_irq_lower(s->irq); } s->cmos_data[RTC_REG_B] = data; - periodic_timer_update(s, qemu_clock_get_ns(rtc_clock)); + + if (update_periodic_timer) { + periodic_timer_update(s, qemu_clock_get_ns(rtc_clock), + old_period); + } + check_update_timer(s); break; case RTC_REG_C: @@ -529,20 +622,6 @@ static void rtc_get_time(RTCState *s, struct tm *tm) rtc_from_bcd(s, s->cmos_data[RTC_CENTURY]) * 100 - 1900; } -static QLIST_HEAD(, RTCState) rtc_devices = - QLIST_HEAD_INITIALIZER(rtc_devices); - -#ifdef TARGET_I386 -void qmp_rtc_reset_reinjection(Error **errp) -{ - RTCState *s; - - QLIST_FOREACH(s, &rtc_devices, link) { - s->irq_coalesced = 0; - } -} -#endif - static void rtc_set_time(RTCState *s) { struct tm tm; @@ -662,22 +741,19 @@ static uint64_t cmos_ioport_read(void *opaque, hwaddr addr, if (ret & (REG_C_UF | REG_C_AF)) { check_update_timer(s); } -#ifdef TARGET_I386 + if(s->irq_coalesced && (s->cmos_data[RTC_REG_B] & REG_B_PIE) && s->irq_reinject_on_ack_count < RTC_REINJECT_ON_ACK_COUNT) { s->irq_reinject_on_ack_count++; s->cmos_data[RTC_REG_C] |= REG_C_IRQF | REG_C_PF; - apic_reset_irq_delivered(); DPRINTF_C("cmos: injecting on ack\n"); - qemu_irq_raise(s->irq); - if (apic_get_irq_delivered()) { + if (rtc_policy_slew_deliver_irq(s)) { s->irq_coalesced--; DPRINTF_C("cmos: coalesced irqs decreased to %d\n", s->irq_coalesced); } } -#endif break; default: ret = s->cmos_data[s->cmos_index]; @@ -743,17 +819,15 @@ static int rtc_post_load(void *opaque, int version_id) uint64_t now = qemu_clock_get_ns(rtc_clock); if (now < s->next_periodic_time || now > (s->next_periodic_time + get_max_clock_jump())) { - periodic_timer_update(s, qemu_clock_get_ns(rtc_clock)); + periodic_timer_update(s, qemu_clock_get_ns(rtc_clock), 0); } } -#ifdef TARGET_I386 if (version_id >= 2) { if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) { rtc_coalesced_timer_update(s); } } -#endif return 0; } @@ -808,13 +882,12 @@ static void rtc_notify_clock_reset(Notifier *notifier, void *data) int64_t now = *(int64_t *)data; rtc_set_date_from_host(ISA_DEVICE(s)); - periodic_timer_update(s, now); + periodic_timer_update(s, now, 0); check_update_timer(s); -#ifdef TARGET_I386 + if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) { rtc_coalesced_timer_update(s); } -#endif } /* set CMOS shutdown status register (index 0xF) as S3_resume(0xFE) @@ -835,12 +908,10 @@ static void rtc_reset(void *opaque) qemu_irq_lower(s->irq); -#ifdef TARGET_I386 if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) { s->irq_coalesced = 0; s->irq_reinject_on_ack_count = 0; } -#endif } static const MemoryRegionOps cmos_ops = { @@ -886,19 +957,19 @@ static void rtc_realizefn(DeviceState *dev, Error **errp) rtc_set_date_from_host(isadev); -#ifdef TARGET_I386 switch (s->lost_tick_policy) { +#ifdef TARGET_I386 case LOST_TICK_POLICY_SLEW: s->coalesced_timer = timer_new_ns(rtc_clock, rtc_coalesced_timer, s); break; +#endif case LOST_TICK_POLICY_DISCARD: break; default: error_setg(errp, "Invalid lost tick policy."); return; } -#endif s->periodic_timer = timer_new_ns(rtc_clock, rtc_periodic_timer, s); s->update_timer = timer_new_ns(rtc_clock, rtc_update_timer, s); diff --git a/include/block/nbd.h b/include/block/nbd.h index 0ed077502e..416257abca 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -127,14 +127,16 @@ ssize_t nbd_wr_syncv(QIOChannel *ioc, struct iovec *iov, size_t niov, size_t length, - bool do_read); + bool do_read, + Error **errp); int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, QCryptoTLSCreds *tlscreds, const char *hostname, QIOChannel **outioc, off_t *size, Error **errp); -int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t flags, off_t size); +int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t flags, off_t size, + Error **errp); ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request); -ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply); +ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp); int nbd_client(int fd); int nbd_disconnect(int fd); diff --git a/include/hw/timer/mc146818rtc_regs.h b/include/hw/timer/mc146818rtc_regs.h index 6ede6c832e..c62f17bf2d 100644 --- a/include/hw/timer/mc146818rtc_regs.h +++ b/include/hw/timer/mc146818rtc_regs.h @@ -65,4 +65,24 @@ #define REG_C_AF 0x20 #define REG_C_MASK 0x70 +static inline uint32_t periodic_period_to_clock(int period_code) +{ + if (!period_code) { + return 0; + } + + if (period_code <= 2) { + period_code += 7; + } + /* period in 32 Khz cycles */ + return 1 << (period_code - 1); +} + +#define RTC_CLOCK_RATE 32768 + +static inline int64_t periodic_clock_to_ns(int64_t clocks) +{ + return muldiv64(clocks, NANOSECONDS_PER_SECOND, RTC_CLOCK_RATE); +} + #endif diff --git a/kvm-all.c b/kvm-all.c index 494b9256aa..44b3cf43cc 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1144,6 +1144,7 @@ void kvm_irqchip_release_virq(KVMState *s, int virq) } clear_gsi(s, virq); kvm_arch_release_virq_post(virq); + trace_kvm_irqchip_release_virq(virq); } static unsigned int kvm_hash_msi(uint32_t data) @@ -1287,7 +1288,8 @@ int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev) return -EINVAL; } - trace_kvm_irqchip_add_msi_route(virq); + trace_kvm_irqchip_add_msi_route(dev ? dev->name : (char *)"N/A", + vector, virq); kvm_add_routing_entry(s, &kroute); kvm_arch_add_msi_route_post(&kroute, vector, dev); @@ -1746,6 +1748,8 @@ static int kvm_init(MachineState *ms) kvm_ioeventfd_any_length_allowed = (kvm_check_extension(s, KVM_CAP_IOEVENTFD_ANY_LENGTH) > 0); + kvm_state = s; + ret = kvm_arch_init(ms, s); if (ret < 0) { goto err; @@ -1755,8 +1759,6 @@ static int kvm_init(MachineState *ms) kvm_irqchip_create(ms, s); } - kvm_state = s; - if (kvm_eventfds_allowed) { s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add; s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del; diff --git a/nbd/client.c b/nbd/client.c index a58fb02cb4..595d99ed30 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -86,9 +86,9 @@ static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); */ -/* Discard length bytes from channel. Return -errno on failure, or - * the amount of bytes consumed. */ -static ssize_t drop_sync(QIOChannel *ioc, size_t size) +/* Discard length bytes from channel. Return -errno on failure and 0 on + * success*/ +static int drop_sync(QIOChannel *ioc, size_t size, Error **errp) { ssize_t ret = 0; char small[1024]; @@ -96,14 +96,13 @@ static ssize_t drop_sync(QIOChannel *ioc, size_t size) buffer = sizeof(small) >= size ? small : g_malloc(MIN(65536, size)); while (size > 0) { - ssize_t count = read_sync(ioc, buffer, MIN(65536, size)); + ssize_t count = MIN(65536, size); + ret = read_sync(ioc, buffer, MIN(65536, size), errp); - if (count <= 0) { + if (ret < 0) { goto cleanup; } - assert(count <= size); size -= count; - ret += count; } cleanup: @@ -136,13 +135,13 @@ static int nbd_send_option_request(QIOChannel *ioc, uint32_t opt, stl_be_p(&req.option, opt); stl_be_p(&req.length, len); - if (write_sync(ioc, &req, sizeof(req)) != sizeof(req)) { - error_setg(errp, "Failed to send option request header"); + if (write_sync(ioc, &req, sizeof(req), errp) < 0) { + error_prepend(errp, "Failed to send option request header"); return -1; } - if (len && write_sync(ioc, (char *) data, len) != len) { - error_setg(errp, "Failed to send option request data"); + if (len && write_sync(ioc, (char *) data, len, errp) < 0) { + error_prepend(errp, "Failed to send option request data"); return -1; } @@ -170,8 +169,8 @@ static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt, nbd_opt_reply *reply, Error **errp) { QEMU_BUILD_BUG_ON(sizeof(*reply) != 20); - if (read_sync(ioc, reply, sizeof(*reply)) != sizeof(*reply)) { - error_setg(errp, "failed to read option reply"); + if (read_sync(ioc, reply, sizeof(*reply), errp) < 0) { + error_prepend(errp, "failed to read option reply"); nbd_send_opt_abort(ioc); return -1; } @@ -219,8 +218,8 @@ static int nbd_handle_reply_err(QIOChannel *ioc, nbd_opt_reply *reply, goto cleanup; } msg = g_malloc(reply->length + 1); - if (read_sync(ioc, msg, reply->length) != reply->length) { - error_setg(errp, "failed to read option error message"); + if (read_sync(ioc, msg, reply->length, errp) < 0) { + error_prepend(errp, "failed to read option error message"); goto cleanup; } msg[reply->length] = '\0'; @@ -321,8 +320,8 @@ static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match, nbd_send_opt_abort(ioc); return -1; } - if (read_sync(ioc, &namelen, sizeof(namelen)) != sizeof(namelen)) { - error_setg(errp, "failed to read option name length"); + if (read_sync(ioc, &namelen, sizeof(namelen), errp) < 0) { + error_prepend(errp, "failed to read option name length"); nbd_send_opt_abort(ioc); return -1; } @@ -334,8 +333,8 @@ static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match, return -1; } if (namelen != strlen(want)) { - if (drop_sync(ioc, len) != len) { - error_setg(errp, "failed to skip export name with wrong length"); + if (drop_sync(ioc, len, errp) < 0) { + error_prepend(errp, "failed to skip export name with wrong length"); nbd_send_opt_abort(ioc); return -1; } @@ -343,15 +342,15 @@ static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match, } assert(namelen < sizeof(name)); - if (read_sync(ioc, name, namelen) != namelen) { - error_setg(errp, "failed to read export name"); + if (read_sync(ioc, name, namelen, errp) < 0) { + error_prepend(errp, "failed to read export name"); nbd_send_opt_abort(ioc); return -1; } name[namelen] = '\0'; len -= namelen; - if (drop_sync(ioc, len) != len) { - error_setg(errp, "failed to read export description"); + if (drop_sync(ioc, len, errp) < 0) { + error_prepend(errp, "failed to read export description"); nbd_send_opt_abort(ioc); return -1; } @@ -477,8 +476,8 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, goto fail; } - if (read_sync(ioc, buf, 8) != 8) { - error_setg(errp, "Failed to read data"); + if (read_sync(ioc, buf, 8, errp) < 0) { + error_prepend(errp, "Failed to read data"); goto fail; } @@ -503,8 +502,8 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, goto fail; } - if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) { - error_setg(errp, "Failed to read magic"); + if (read_sync(ioc, &magic, sizeof(magic), errp) < 0) { + error_prepend(errp, "Failed to read magic"); goto fail; } magic = be64_to_cpu(magic); @@ -515,9 +514,8 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, uint16_t globalflags; bool fixedNewStyle = false; - if (read_sync(ioc, &globalflags, sizeof(globalflags)) != - sizeof(globalflags)) { - error_setg(errp, "Failed to read server flags"); + if (read_sync(ioc, &globalflags, sizeof(globalflags), errp) < 0) { + error_prepend(errp, "Failed to read server flags"); goto fail; } globalflags = be16_to_cpu(globalflags); @@ -534,9 +532,8 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, } /* client requested flags */ clientflags = cpu_to_be32(clientflags); - if (write_sync(ioc, &clientflags, sizeof(clientflags)) != - sizeof(clientflags)) { - error_setg(errp, "Failed to send clientflags field"); + if (write_sync(ioc, &clientflags, sizeof(clientflags), errp) < 0) { + error_prepend(errp, "Failed to send clientflags field"); goto fail; } if (tlscreds) { @@ -573,14 +570,14 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, } /* Read the response */ - if (read_sync(ioc, &s, sizeof(s)) != sizeof(s)) { - error_setg(errp, "Failed to read export length"); + if (read_sync(ioc, &s, sizeof(s), errp) < 0) { + error_prepend(errp, "Failed to read export length"); goto fail; } *size = be64_to_cpu(s); - if (read_sync(ioc, flags, sizeof(*flags)) != sizeof(*flags)) { - error_setg(errp, "Failed to read export flags"); + if (read_sync(ioc, flags, sizeof(*flags), errp) < 0) { + error_prepend(errp, "Failed to read export flags"); goto fail; } be16_to_cpus(flags); @@ -596,15 +593,15 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, goto fail; } - if (read_sync(ioc, &s, sizeof(s)) != sizeof(s)) { - error_setg(errp, "Failed to read export length"); + if (read_sync(ioc, &s, sizeof(s), errp) < 0) { + error_prepend(errp, "Failed to read export length"); goto fail; } *size = be64_to_cpu(s); TRACE("Size is %" PRIu64, *size); - if (read_sync(ioc, &oldflags, sizeof(oldflags)) != sizeof(oldflags)) { - error_setg(errp, "Failed to read export flags"); + if (read_sync(ioc, &oldflags, sizeof(oldflags), errp) < 0) { + error_prepend(errp, "Failed to read export flags"); goto fail; } be32_to_cpus(&oldflags); @@ -619,8 +616,8 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, } TRACE("Size is %" PRIu64 ", export flags %" PRIx16, *size, *flags); - if (zeroes && drop_sync(ioc, 124) != 124) { - error_setg(errp, "Failed to read reserved block"); + if (zeroes && drop_sync(ioc, 124, errp) < 0) { + error_prepend(errp, "Failed to read reserved block"); goto fail; } rc = 0; @@ -630,11 +627,13 @@ fail: } #ifdef __linux__ -int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t flags, off_t size) +int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t flags, off_t size, + Error **errp) { unsigned long sectors = size / BDRV_SECTOR_SIZE; if (size / BDRV_SECTOR_SIZE != sectors) { - LOG("Export size %lld too large for 32-bit kernel", (long long) size); + error_setg(errp, "Export size %lld too large for 32-bit kernel", + (long long) size); return -E2BIG; } @@ -642,7 +641,7 @@ int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t flags, off_t size) if (ioctl(fd, NBD_SET_SOCK, (unsigned long) sioc->fd) < 0) { int serrno = errno; - LOG("Failed to set NBD socket"); + error_setg(errp, "Failed to set NBD socket"); return -serrno; } @@ -650,7 +649,7 @@ int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t flags, off_t size) if (ioctl(fd, NBD_SET_BLKSIZE, (unsigned long)BDRV_SECTOR_SIZE) < 0) { int serrno = errno; - LOG("Failed setting NBD block size"); + error_setg(errp, "Failed setting NBD block size"); return -serrno; } @@ -662,7 +661,7 @@ int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t flags, off_t size) if (ioctl(fd, NBD_SET_SIZE_BLOCKS, sectors) < 0) { int serrno = errno; - LOG("Failed setting size (in blocks)"); + error_setg(errp, "Failed setting size (in blocks)"); return -serrno; } @@ -673,12 +672,12 @@ int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t flags, off_t size) if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) { int serrno = errno; - LOG("Failed setting read-only attribute"); + error_setg(errp, "Failed setting read-only attribute"); return -serrno; } } else { int serrno = errno; - LOG("Failed setting flags"); + error_setg(errp, "Failed setting flags"); return -serrno; } } @@ -726,8 +725,10 @@ int nbd_disconnect(int fd) } #else -int nbd_init(int fd, QIOChannelSocket *ioc, uint16_t flags, off_t size) +int nbd_init(int fd, QIOChannelSocket *ioc, uint16_t flags, off_t size, + Error **errp) { + error_setg(errp, "nbd_init is only supported on Linux"); return -ENOTSUP; } @@ -744,7 +745,6 @@ int nbd_disconnect(int fd) ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request) { uint8_t buf[NBD_REQUEST_SIZE]; - ssize_t ret; TRACE("Sending request to server: " "{ .from = %" PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64 @@ -759,31 +759,22 @@ ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request) stq_be_p(buf + 16, request->from); stl_be_p(buf + 24, request->len); - ret = write_sync(ioc, buf, sizeof(buf)); - if (ret < 0) { - return ret; - } - - if (ret != sizeof(buf)) { - LOG("writing to socket failed"); - return -EINVAL; - } - return 0; + return write_sync(ioc, buf, sizeof(buf), NULL); } -ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply) +ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp) { uint8_t buf[NBD_REPLY_SIZE]; uint32_t magic; ssize_t ret; - ret = read_sync(ioc, buf, sizeof(buf)); + ret = read_sync_eof(ioc, buf, sizeof(buf), errp); if (ret <= 0) { return ret; } if (ret != sizeof(buf)) { - LOG("read failed"); + error_setg(errp, "read failed"); return -EINVAL; } @@ -801,7 +792,7 @@ ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply) if (reply->error == ESHUTDOWN) { /* This works even on mingw which lacks a native ESHUTDOWN */ - LOG("server shutting down"); + error_setg(errp, "server shutting down"); return -EINVAL; } TRACE("Got reply: { magic = 0x%" PRIx32 ", .error = % " PRId32 @@ -809,7 +800,7 @@ ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply) magic, reply->error, reply->handle); if (magic != NBD_REPLY_MAGIC) { - LOG("invalid magic (got 0x%" PRIx32 ")", magic); + error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic); return -EINVAL; } return sizeof(buf); diff --git a/nbd/common.c b/nbd/common.c index dccbb8e9de..bd81637ab9 100644 --- a/nbd/common.c +++ b/nbd/common.c @@ -20,14 +20,18 @@ #include "qapi/error.h" #include "nbd-internal.h" +/* nbd_wr_syncv + * The function may be called from coroutine or from non-coroutine context. + * When called from non-coroutine context @ioc must be in blocking mode. + */ ssize_t nbd_wr_syncv(QIOChannel *ioc, struct iovec *iov, size_t niov, size_t length, - bool do_read) + bool do_read, + Error **errp) { ssize_t done = 0; - Error *local_err = NULL; struct iovec *local_iov = g_new(struct iovec, niov); struct iovec *local_iov_head = local_iov; unsigned int nlocal_iov = niov; @@ -37,22 +41,17 @@ ssize_t nbd_wr_syncv(QIOChannel *ioc, while (nlocal_iov > 0) { ssize_t len; if (do_read) { - len = qio_channel_readv(ioc, local_iov, nlocal_iov, &local_err); + len = qio_channel_readv(ioc, local_iov, nlocal_iov, errp); } else { - len = qio_channel_writev(ioc, local_iov, nlocal_iov, &local_err); + len = qio_channel_writev(ioc, local_iov, nlocal_iov, errp); } if (len == QIO_CHANNEL_ERR_BLOCK) { - if (qemu_in_coroutine()) { - qio_channel_yield(ioc, do_read ? G_IO_IN : G_IO_OUT); - } else { - return -EAGAIN; - } + /* errp should not be set */ + assert(qemu_in_coroutine()); + qio_channel_yield(ioc, do_read ? G_IO_IN : G_IO_OUT); continue; } if (len < 0) { - TRACE("I/O error: %s", error_get_pretty(local_err)); - error_free(local_err); - /* XXX handle Error objects */ done = -EIO; goto cleanup; } diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h index f43d990a05..d6071640a0 100644 --- a/nbd/nbd-internal.h +++ b/nbd/nbd-internal.h @@ -94,7 +94,14 @@ #define NBD_ENOSPC 28 #define NBD_ESHUTDOWN 108 -static inline ssize_t read_sync(QIOChannel *ioc, void *buffer, size_t size) +/* read_sync_eof + * Tries to read @size bytes from @ioc. Returns number of bytes actually read. + * May return a value >= 0 and < size only on EOF, i.e. when iteratively called + * qio_channel_readv() returns 0. So, there are no needs to call read_sync_eof + * iteratively. + */ +static inline ssize_t read_sync_eof(QIOChannel *ioc, void *buffer, size_t size, + Error **errp) { struct iovec iov = { .iov_base = buffer, .iov_len = size }; /* Sockets are kept in blocking mode in the negotiation phase. After @@ -102,15 +109,38 @@ static inline ssize_t read_sync(QIOChannel *ioc, void *buffer, size_t size) * our request/reply. Synchronization is done with recv_coroutine, so * that this is coroutine-safe. */ - return nbd_wr_syncv(ioc, &iov, 1, size, true); + return nbd_wr_syncv(ioc, &iov, 1, size, true, errp); } -static inline ssize_t write_sync(QIOChannel *ioc, const void *buffer, - size_t size) +/* read_sync + * Reads @size bytes from @ioc. Returns 0 on success. + */ +static inline int read_sync(QIOChannel *ioc, void *buffer, size_t size, + Error **errp) +{ + ssize_t ret = read_sync_eof(ioc, buffer, size, errp); + + if (ret >= 0 && ret != size) { + ret = -EINVAL; + error_setg(errp, "End of file"); + } + + return ret < 0 ? ret : 0; +} + +/* write_sync + * Writes @size bytes to @ioc. Returns 0 on success. + */ +static inline int write_sync(QIOChannel *ioc, const void *buffer, size_t size, + Error **errp) { struct iovec iov = { .iov_base = (void *) buffer, .iov_len = size }; - return nbd_wr_syncv(ioc, &iov, 1, size, false); + ssize_t ret = nbd_wr_syncv(ioc, &iov, 1, size, false, errp); + + assert(ret < 0 || ret == size); + + return ret < 0 ? ret : 0; } struct NBDTLSHandshakeData { diff --git a/nbd/server.c b/nbd/server.c index 924a1fe2db..49b55f6ede 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -112,7 +112,7 @@ static gboolean nbd_negotiate_continue(QIOChannel *ioc, return TRUE; } -static ssize_t nbd_negotiate_read(QIOChannel *ioc, void *buffer, size_t size) +static int nbd_negotiate_read(QIOChannel *ioc, void *buffer, size_t size) { ssize_t ret; guint watch; @@ -124,14 +124,13 @@ static ssize_t nbd_negotiate_read(QIOChannel *ioc, void *buffer, size_t size) nbd_negotiate_continue, qemu_coroutine_self(), NULL); - ret = read_sync(ioc, buffer, size); + ret = read_sync(ioc, buffer, size, NULL); g_source_remove(watch); return ret; } -static ssize_t nbd_negotiate_write(QIOChannel *ioc, const void *buffer, - size_t size) +static int nbd_negotiate_write(QIOChannel *ioc, const void *buffer, size_t size) { ssize_t ret; guint watch; @@ -143,29 +142,29 @@ static ssize_t nbd_negotiate_write(QIOChannel *ioc, const void *buffer, nbd_negotiate_continue, qemu_coroutine_self(), NULL); - ret = write_sync(ioc, buffer, size); + ret = write_sync(ioc, buffer, size, NULL); g_source_remove(watch); return ret; } -static ssize_t nbd_negotiate_drop_sync(QIOChannel *ioc, size_t size) +static int nbd_negotiate_drop_sync(QIOChannel *ioc, size_t size) { - ssize_t ret, dropped = size; + ssize_t ret; uint8_t *buffer = g_malloc(MIN(65536, size)); while (size > 0) { - ret = nbd_negotiate_read(ioc, buffer, MIN(65536, size)); + size_t count = MIN(65536, size); + ret = nbd_negotiate_read(ioc, buffer, count); if (ret < 0) { g_free(buffer); return ret; } - assert(ret <= size); - size -= ret; + size -= count; } g_free(buffer); - return dropped; + return 0; } /* Basic flow for negotiation @@ -206,22 +205,22 @@ static int nbd_negotiate_send_rep_len(QIOChannel *ioc, uint32_t type, type, opt, len); magic = cpu_to_be64(NBD_REP_MAGIC); - if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) { + if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) < 0) { LOG("write failed (rep magic)"); return -EINVAL; } opt = cpu_to_be32(opt); - if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) != sizeof(opt)) { + if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) < 0) { LOG("write failed (rep opt)"); return -EINVAL; } type = cpu_to_be32(type); - if (nbd_negotiate_write(ioc, &type, sizeof(type)) != sizeof(type)) { + if (nbd_negotiate_write(ioc, &type, sizeof(type)) < 0) { LOG("write failed (rep type)"); return -EINVAL; } len = cpu_to_be32(len); - if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) { + if (nbd_negotiate_write(ioc, &len, sizeof(len)) < 0) { LOG("write failed (rep data length)"); return -EINVAL; } @@ -256,7 +255,7 @@ nbd_negotiate_send_rep_err(QIOChannel *ioc, uint32_t type, if (ret < 0) { goto out; } - if (nbd_negotiate_write(ioc, msg, len) != len) { + if (nbd_negotiate_write(ioc, msg, len) < 0) { LOG("write failed (error message)"); ret = -EIO; } else { @@ -287,15 +286,15 @@ static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp) } len = cpu_to_be32(name_len); - if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) { + if (nbd_negotiate_write(ioc, &len, sizeof(len)) < 0) { LOG("write failed (name length)"); return -EINVAL; } - if (nbd_negotiate_write(ioc, name, name_len) != name_len) { + if (nbd_negotiate_write(ioc, name, name_len) < 0) { LOG("write failed (name buffer)"); return -EINVAL; } - if (nbd_negotiate_write(ioc, desc, desc_len) != desc_len) { + if (nbd_negotiate_write(ioc, desc, desc_len) < 0) { LOG("write failed (description buffer)"); return -EINVAL; } @@ -309,7 +308,7 @@ static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length) NBDExport *exp; if (length) { - if (nbd_negotiate_drop_sync(client->ioc, length) != length) { + if (nbd_negotiate_drop_sync(client->ioc, length) < 0) { return -EIO; } return nbd_negotiate_send_rep_err(client->ioc, @@ -340,7 +339,7 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length) LOG("Bad length received"); goto fail; } - if (nbd_negotiate_read(client->ioc, name, length) != length) { + if (nbd_negotiate_read(client->ioc, name, length) < 0) { LOG("read failed"); goto fail; } @@ -373,7 +372,7 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, TRACE("Setting up TLS"); ioc = client->ioc; if (length) { - if (nbd_negotiate_drop_sync(ioc, length) != length) { + if (nbd_negotiate_drop_sync(ioc, length) < 0) { return NULL; } nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS, @@ -437,8 +436,7 @@ static int nbd_negotiate_options(NBDClient *client) ... Rest of request */ - if (nbd_negotiate_read(client->ioc, &flags, sizeof(flags)) != - sizeof(flags)) { + if (nbd_negotiate_read(client->ioc, &flags, sizeof(flags)) < 0) { LOG("read failed"); return -EIO; } @@ -464,8 +462,7 @@ static int nbd_negotiate_options(NBDClient *client) uint32_t clientflags, length; uint64_t magic; - if (nbd_negotiate_read(client->ioc, &magic, sizeof(magic)) != - sizeof(magic)) { + if (nbd_negotiate_read(client->ioc, &magic, sizeof(magic)) < 0) { LOG("read failed"); return -EINVAL; } @@ -476,14 +473,14 @@ static int nbd_negotiate_options(NBDClient *client) } if (nbd_negotiate_read(client->ioc, &clientflags, - sizeof(clientflags)) != sizeof(clientflags)) { + sizeof(clientflags)) < 0) + { LOG("read failed"); return -EINVAL; } clientflags = be32_to_cpu(clientflags); - if (nbd_negotiate_read(client->ioc, &length, sizeof(length)) != - sizeof(length)) { + if (nbd_negotiate_read(client->ioc, &length, sizeof(length)) < 0) { LOG("read failed"); return -EINVAL; } @@ -513,7 +510,7 @@ static int nbd_negotiate_options(NBDClient *client) return -EINVAL; default: - if (nbd_negotiate_drop_sync(client->ioc, length) != length) { + if (nbd_negotiate_drop_sync(client->ioc, length) < 0) { return -EIO; } ret = nbd_negotiate_send_rep_err(client->ioc, @@ -551,7 +548,7 @@ static int nbd_negotiate_options(NBDClient *client) return nbd_negotiate_handle_export_name(client, length); case NBD_OPT_STARTTLS: - if (nbd_negotiate_drop_sync(client->ioc, length) != length) { + if (nbd_negotiate_drop_sync(client->ioc, length) < 0) { return -EIO; } if (client->tlscreds) { @@ -570,7 +567,7 @@ static int nbd_negotiate_options(NBDClient *client) } break; default: - if (nbd_negotiate_drop_sync(client->ioc, length) != length) { + if (nbd_negotiate_drop_sync(client->ioc, length) < 0) { return -EIO; } ret = nbd_negotiate_send_rep_err(client->ioc, @@ -659,12 +656,12 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) TRACE("TLS cannot be enabled with oldstyle protocol"); goto fail; } - if (nbd_negotiate_write(client->ioc, buf, sizeof(buf)) != sizeof(buf)) { + if (nbd_negotiate_write(client->ioc, buf, sizeof(buf)) < 0) { LOG("write failed"); goto fail; } } else { - if (nbd_negotiate_write(client->ioc, buf, 18) != 18) { + if (nbd_negotiate_write(client->ioc, buf, 18) < 0) { LOG("write failed"); goto fail; } @@ -679,7 +676,7 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) stq_be_p(buf + 18, client->exp->size); stw_be_p(buf + 26, client->exp->nbdflags | myflags); len = client->no_zeroes ? 10 : sizeof(buf) - 18; - if (nbd_negotiate_write(client->ioc, buf + 18, len) != len) { + if (nbd_negotiate_write(client->ioc, buf + 18, len) < 0) { LOG("write failed"); goto fail; } @@ -697,16 +694,11 @@ static ssize_t nbd_receive_request(QIOChannel *ioc, NBDRequest *request) uint32_t magic; ssize_t ret; - ret = read_sync(ioc, buf, sizeof(buf)); + ret = read_sync(ioc, buf, sizeof(buf), NULL); if (ret < 0) { return ret; } - if (ret != sizeof(buf)) { - LOG("read failed"); - return -EINVAL; - } - /* Request [ 0 .. 3] magic (NBD_REQUEST_MAGIC) [ 4 .. 5] flags (NBD_CMD_FLAG_FUA, ...) @@ -737,7 +729,6 @@ static ssize_t nbd_receive_request(QIOChannel *ioc, NBDRequest *request) static ssize_t nbd_send_reply(QIOChannel *ioc, NBDReply *reply) { uint8_t buf[NBD_REPLY_SIZE]; - ssize_t ret; reply->error = system_errno_to_nbd_errno(reply->error); @@ -754,16 +745,7 @@ static ssize_t nbd_send_reply(QIOChannel *ioc, NBDReply *reply) stl_be_p(buf + 4, reply->error); stq_be_p(buf + 8, reply->handle); - ret = write_sync(ioc, buf, sizeof(buf)); - if (ret < 0) { - return ret; - } - - if (ret != sizeof(buf)) { - LOG("writing to socket failed"); - return -EINVAL; - } - return 0; + return write_sync(ioc, buf, sizeof(buf), NULL); } #define MAX_NBD_REQUESTS 16 @@ -1066,8 +1048,8 @@ static ssize_t nbd_co_send_reply(NBDRequestData *req, NBDReply *reply, qio_channel_set_cork(client->ioc, true); rc = nbd_send_reply(client->ioc, reply); if (rc >= 0) { - ret = write_sync(client->ioc, req->data, len); - if (ret != len) { + ret = write_sync(client->ioc, req->data, len, NULL); + if (ret < 0) { rc = -EIO; } } @@ -1141,7 +1123,7 @@ static ssize_t nbd_co_receive_request(NBDRequestData *req, if (request->type == NBD_CMD_WRITE) { TRACE("Reading %" PRIu32 " byte(s)", request->len); - if (read_sync(client->ioc, req->data, request->len) != request->len) { + if (read_sync(client->ioc, req->data, request->len, NULL) < 0) { LOG("reading from socket failed"); rc = -EIO; goto out; @@ -1376,15 +1358,13 @@ static coroutine_fn void nbd_co_client_start(void *opaque) if (exp) { nbd_export_get(exp); - } - if (nbd_negotiate(data)) { - client_close(client); - goto out; + QTAILQ_INSERT_TAIL(&exp->clients, client, next); } qemu_co_mutex_init(&client->send_lock); - if (exp) { - QTAILQ_INSERT_TAIL(&exp->clients, client, next); + if (nbd_negotiate(data)) { + client_close(client); + goto out; } nbd_client_receive_next_request(client); diff --git a/pc-bios/linuxboot_dma.bin b/pc-bios/linuxboot_dma.bin index 218d3ab4a2..d176f62797 100644 Binary files a/pc-bios/linuxboot_dma.bin and b/pc-bios/linuxboot_dma.bin differ diff --git a/pc-bios/optionrom/Makefile b/pc-bios/optionrom/Makefile index fa53d9e58e..a9a9e5e7eb 100644 --- a/pc-bios/optionrom/Makefile +++ b/pc-bios/optionrom/Makefile @@ -13,6 +13,7 @@ $(call set-vpath, $(SRC_PATH)/pc-bios/optionrom) ifeq ($(lastword $(filter -O%, -O0 $(CFLAGS))),-O0) override CFLAGS += -O2 endif +override CFLAGS += -march=i486 # Drop -fstack-protector and the like QEMU_CFLAGS := $(filter -W%, $(QEMU_CFLAGS)) $(CFLAGS_NOPIE) -ffreestanding diff --git a/qemu-nbd.c b/qemu-nbd.c index b7ab86bfa7..651f85ecc1 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -288,8 +288,9 @@ static void *nbd_client_thread(void *arg) goto out_socket; } - ret = nbd_init(fd, sioc, nbdflags, size); + ret = nbd_init(fd, sioc, nbdflags, size, &local_error); if (ret < 0) { + error_report_err(local_error); goto out_fd; } @@ -324,7 +325,7 @@ out: static int nbd_can_accept(void) { - return nb_fds < shared; + return state == RUNNING && nb_fds < shared; } static void nbd_export_closed(NBDExport *exp) diff --git a/target/i386/arch_memory_mapping.c b/target/i386/arch_memory_mapping.c index 826aee597b..647cff2829 100644 --- a/target/i386/arch_memory_mapping.c +++ b/target/i386/arch_memory_mapping.c @@ -272,25 +272,27 @@ void x86_cpu_get_memory_mapping(CPUState *cs, MemoryMappingList *list, { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; + int32_t a20_mask; if (!cpu_paging_enabled(cs)) { /* paging is disabled */ return; } + a20_mask = x86_get_a20_mask(env); if (env->cr[4] & CR4_PAE_MASK) { #ifdef TARGET_X86_64 if (env->hflags & HF_LMA_MASK) { if (env->cr[4] & CR4_LA57_MASK) { hwaddr pml5e_addr; - pml5e_addr = (env->cr[3] & PLM4_ADDR_MASK) & env->a20_mask; - walk_pml5e(list, cs->as, pml5e_addr, env->a20_mask); + pml5e_addr = (env->cr[3] & PLM4_ADDR_MASK) & a20_mask; + walk_pml5e(list, cs->as, pml5e_addr, a20_mask); } else { hwaddr pml4e_addr; - pml4e_addr = (env->cr[3] & PLM4_ADDR_MASK) & env->a20_mask; - walk_pml4e(list, cs->as, pml4e_addr, env->a20_mask, + pml4e_addr = (env->cr[3] & PLM4_ADDR_MASK) & a20_mask; + walk_pml4e(list, cs->as, pml4e_addr, a20_mask, 0xffffULL << 48); } } else @@ -298,16 +300,16 @@ void x86_cpu_get_memory_mapping(CPUState *cs, MemoryMappingList *list, { hwaddr pdpe_addr; - pdpe_addr = (env->cr[3] & ~0x1f) & env->a20_mask; - walk_pdpe2(list, cs->as, pdpe_addr, env->a20_mask); + pdpe_addr = (env->cr[3] & ~0x1f) & a20_mask; + walk_pdpe2(list, cs->as, pdpe_addr, a20_mask); } } else { hwaddr pde_addr; bool pse; - pde_addr = (env->cr[3] & ~0xfff) & env->a20_mask; + pde_addr = (env->cr[3] & ~0xfff) & a20_mask; pse = !!(env->cr[4] & CR4_PSE_MASK); - walk_pde2(list, cs->as, pde_addr, env->a20_mask, pse); + walk_pde2(list, cs->as, pde_addr, a20_mask, pse); } } diff --git a/target/i386/cpu.c b/target/i386/cpu.c index ffb5267162..b2b1d20cee 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -3239,7 +3239,7 @@ static void x86_cpu_machine_done(Notifier *n, void *unused) cpu->smram = g_new(MemoryRegion, 1); memory_region_init_alias(cpu->smram, OBJECT(cpu), "smram", smram, 0, 1ull << 32); - memory_region_set_enabled(cpu->smram, false); + memory_region_set_enabled(cpu->smram, true); memory_region_add_subregion_overlap(cpu->cpu_as_root, 0, cpu->smram, 1); } } @@ -3619,7 +3619,9 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) #ifndef CONFIG_USER_ONLY if (tcg_enabled()) { - AddressSpace *newas = g_new(AddressSpace, 1); + AddressSpace *as_normal = address_space_init_shareable(cs->memory, + "cpu-memory"); + AddressSpace *as_smm = g_new(AddressSpace, 1); cpu->cpu_as_mem = g_new(MemoryRegion, 1); cpu->cpu_as_root = g_new(MemoryRegion, 1); @@ -3635,9 +3637,11 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) get_system_memory(), 0, ~0ull); memory_region_add_subregion_overlap(cpu->cpu_as_root, 0, cpu->cpu_as_mem, 0); memory_region_set_enabled(cpu->cpu_as_mem, true); - address_space_init(newas, cpu->cpu_as_root, "CPU"); - cs->num_ases = 1; - cpu_address_space_init(cs, newas, 0); + address_space_init(as_smm, cpu->cpu_as_root, "CPU"); + + cs->num_ases = 2; + cpu_address_space_init(cs, as_normal, 0); + cpu_address_space_init(cs, as_smm, 1); /* ... SMRAM with higher priority, linked from /machine/smram. */ cpu->machine_done.notify = x86_cpu_machine_done; @@ -4053,6 +4057,7 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data) #ifdef CONFIG_USER_ONLY cc->handle_mmu_fault = x86_cpu_handle_mmu_fault; #else + cc->asidx_from_attrs = x86_asidx_from_attrs; cc->get_memory_mapping = x86_cpu_get_memory_mapping; cc->get_phys_page_debug = x86_cpu_get_phys_page_debug; cc->write_elf64_note = x86_cpu_write_elf64_note; @@ -4063,11 +4068,11 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data) #endif cc->gdb_arch_name = x86_gdb_arch_name; #ifdef TARGET_X86_64 - cc->gdb_core_xml_file = "i386-64bit-core.xml"; - cc->gdb_num_core_regs = 40; + cc->gdb_core_xml_file = "i386-64bit.xml"; + cc->gdb_num_core_regs = 57; #else - cc->gdb_core_xml_file = "i386-32bit-core.xml"; - cc->gdb_num_core_regs = 32; + cc->gdb_core_xml_file = "i386-32bit.xml"; + cc->gdb_num_core_regs = 41; #endif #ifndef CONFIG_USER_ONLY cc->debug_excp_handler = breakpoint_handler; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index cfe825f0a4..de0551f775 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1451,6 +1451,16 @@ int x86_cpu_handle_mmu_fault(CPUState *cpu, vaddr addr, void x86_cpu_set_a20(X86CPU *cpu, int a20_state); #ifndef CONFIG_USER_ONLY +static inline int x86_asidx_from_attrs(CPUState *cs, MemTxAttrs attrs) +{ + return !!attrs.secure; +} + +static inline AddressSpace *cpu_addressspace(CPUState *cs, MemTxAttrs attrs) +{ + return cpu_get_address_space(cs, cpu_asidx_from_attrs(cs, attrs)); +} + uint8_t x86_ldub_phys(CPUState *cs, hwaddr addr); uint32_t x86_lduw_phys(CPUState *cs, hwaddr addr); uint32_t x86_ldl_phys(CPUState *cs, hwaddr addr); @@ -1625,6 +1635,15 @@ static inline MemTxAttrs cpu_get_mem_attrs(CPUX86State *env) return ((MemTxAttrs) { .secure = (env->hflags & HF_SMM_MASK) != 0 }); } +static inline int32_t x86_get_a20_mask(CPUX86State *env) +{ + if (env->hflags & HF_SMM_MASK) { + return -1; + } else { + return env->a20_mask; + } +} + /* fpu_helper.c */ void cpu_set_mxcsr(CPUX86State *env, uint32_t val); void cpu_set_fpuc(CPUX86State *env, uint16_t val); @@ -1644,7 +1663,6 @@ void do_interrupt_x86_hardirq(CPUX86State *env, int intno, int is_hw); /* smm_helper.c */ void do_smm_enter(X86CPU *cpu); -void cpu_smm_update(X86CPU *cpu); /* apic.c */ void cpu_report_tpr_access(CPUX86State *env, TPRAccess access); diff --git a/target/i386/helper.c b/target/i386/helper.c index ee7eff2f6f..ef0505949a 100644 --- a/target/i386/helper.c +++ b/target/i386/helper.c @@ -724,6 +724,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; uint64_t ptep, pte; + int32_t a20_mask; target_ulong pde_addr, pte_addr; int error_code = 0; int is_dirty, prot, page_size, is_write, is_user; @@ -739,6 +740,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, #endif is_write = is_write1 & 1; + a20_mask = x86_get_a20_mask(env); if (!(env->cr[0] & CR0_PG_MASK)) { pte = addr; #ifdef TARGET_X86_64 @@ -777,7 +779,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, if (la57) { pml5e_addr = ((env->cr[3] & ~0xfff) + - (((addr >> 48) & 0x1ff) << 3)) & env->a20_mask; + (((addr >> 48) & 0x1ff) << 3)) & a20_mask; pml5e = x86_ldq_phys(cs, pml5e_addr); if (!(pml5e & PG_PRESENT_MASK)) { goto do_fault; @@ -796,7 +798,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, } pml4e_addr = ((pml5e & PG_ADDRESS_MASK) + - (((addr >> 39) & 0x1ff) << 3)) & env->a20_mask; + (((addr >> 39) & 0x1ff) << 3)) & a20_mask; pml4e = x86_ldq_phys(cs, pml4e_addr); if (!(pml4e & PG_PRESENT_MASK)) { goto do_fault; @@ -810,7 +812,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, } ptep &= pml4e ^ PG_NX_MASK; pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3)) & - env->a20_mask; + a20_mask; pdpe = x86_ldq_phys(cs, pdpe_addr); if (!(pdpe & PG_PRESENT_MASK)) { goto do_fault; @@ -835,7 +837,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, { /* XXX: load them when cr3 is loaded ? */ pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) & - env->a20_mask; + a20_mask; pdpe = x86_ldq_phys(cs, pdpe_addr); if (!(pdpe & PG_PRESENT_MASK)) { goto do_fault; @@ -848,7 +850,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, } pde_addr = ((pdpe & PG_ADDRESS_MASK) + (((addr >> 21) & 0x1ff) << 3)) & - env->a20_mask; + a20_mask; pde = x86_ldq_phys(cs, pde_addr); if (!(pde & PG_PRESENT_MASK)) { goto do_fault; @@ -870,7 +872,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, x86_stl_phys_notdirty(cs, pde_addr, pde); } pte_addr = ((pde & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 3)) & - env->a20_mask; + a20_mask; pte = x86_ldq_phys(cs, pte_addr); if (!(pte & PG_PRESENT_MASK)) { goto do_fault; @@ -886,7 +888,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, /* page directory entry */ pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & - env->a20_mask; + a20_mask; pde = x86_ldl_phys(cs, pde_addr); if (!(pde & PG_PRESENT_MASK)) { goto do_fault; @@ -913,7 +915,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, /* page directory entry */ pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & - env->a20_mask; + a20_mask; pte = x86_ldl_phys(cs, pte_addr); if (!(pte & PG_PRESENT_MASK)) { goto do_fault; @@ -992,7 +994,7 @@ do_check_protect_pse36: } do_mapping: - pte = pte & env->a20_mask; + pte = pte & a20_mask; /* align to page_size */ pte &= PG_ADDRESS_MASK & ~(page_size - 1); @@ -1039,11 +1041,13 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) CPUX86State *env = &cpu->env; target_ulong pde_addr, pte_addr; uint64_t pte; + int32_t a20_mask; uint32_t page_offset; int page_size; + a20_mask = x86_get_a20_mask(env); if (!(env->cr[0] & CR0_PG_MASK)) { - pte = addr & env->a20_mask; + pte = addr & a20_mask; page_size = 4096; } else if (env->cr[4] & CR4_PAE_MASK) { target_ulong pdpe_addr; @@ -1064,7 +1068,7 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) if (la57) { pml5e_addr = ((env->cr[3] & ~0xfff) + - (((addr >> 48) & 0x1ff) << 3)) & env->a20_mask; + (((addr >> 48) & 0x1ff) << 3)) & a20_mask; pml5e = x86_ldq_phys(cs, pml5e_addr); if (!(pml5e & PG_PRESENT_MASK)) { return -1; @@ -1074,13 +1078,13 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) } pml4e_addr = ((pml5e & PG_ADDRESS_MASK) + - (((addr >> 39) & 0x1ff) << 3)) & env->a20_mask; + (((addr >> 39) & 0x1ff) << 3)) & a20_mask; pml4e = x86_ldq_phys(cs, pml4e_addr); if (!(pml4e & PG_PRESENT_MASK)) { return -1; } pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + - (((addr >> 30) & 0x1ff) << 3)) & env->a20_mask; + (((addr >> 30) & 0x1ff) << 3)) & a20_mask; pdpe = x86_ldq_phys(cs, pdpe_addr); if (!(pdpe & PG_PRESENT_MASK)) { return -1; @@ -1095,14 +1099,14 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) #endif { pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) & - env->a20_mask; + a20_mask; pdpe = x86_ldq_phys(cs, pdpe_addr); if (!(pdpe & PG_PRESENT_MASK)) return -1; } pde_addr = ((pdpe & PG_ADDRESS_MASK) + - (((addr >> 21) & 0x1ff) << 3)) & env->a20_mask; + (((addr >> 21) & 0x1ff) << 3)) & a20_mask; pde = x86_ldq_phys(cs, pde_addr); if (!(pde & PG_PRESENT_MASK)) { return -1; @@ -1114,7 +1118,7 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) } else { /* 4 KB page */ pte_addr = ((pde & PG_ADDRESS_MASK) + - (((addr >> 12) & 0x1ff) << 3)) & env->a20_mask; + (((addr >> 12) & 0x1ff) << 3)) & a20_mask; page_size = 4096; pte = x86_ldq_phys(cs, pte_addr); } @@ -1125,7 +1129,7 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) uint32_t pde; /* page directory entry */ - pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & env->a20_mask; + pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & a20_mask; pde = x86_ldl_phys(cs, pde_addr); if (!(pde & PG_PRESENT_MASK)) return -1; @@ -1134,14 +1138,14 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) page_size = 4096 * 1024; } else { /* page directory entry */ - pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & env->a20_mask; + pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & a20_mask; pte = x86_ldl_phys(cs, pte_addr); if (!(pte & PG_PRESENT_MASK)) { return -1; } page_size = 4096; } - pte = pte & env->a20_mask; + pte = pte & a20_mask; } #ifdef TARGET_X86_64 @@ -1399,89 +1403,89 @@ uint8_t x86_ldub_phys(CPUState *cs, hwaddr addr) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; + MemTxAttrs attrs = cpu_get_mem_attrs(env); + AddressSpace *as = cpu_addressspace(cs, attrs); - return address_space_ldub(cs->as, addr, - cpu_get_mem_attrs(env), - NULL); + return address_space_ldub(as, addr, attrs, NULL); } uint32_t x86_lduw_phys(CPUState *cs, hwaddr addr) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; + MemTxAttrs attrs = cpu_get_mem_attrs(env); + AddressSpace *as = cpu_addressspace(cs, attrs); - return address_space_lduw(cs->as, addr, - cpu_get_mem_attrs(env), - NULL); + return address_space_lduw(as, addr, attrs, NULL); } uint32_t x86_ldl_phys(CPUState *cs, hwaddr addr) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; + MemTxAttrs attrs = cpu_get_mem_attrs(env); + AddressSpace *as = cpu_addressspace(cs, attrs); - return address_space_ldl(cs->as, addr, - cpu_get_mem_attrs(env), - NULL); + return address_space_ldl(as, addr, attrs, NULL); } uint64_t x86_ldq_phys(CPUState *cs, hwaddr addr) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; + MemTxAttrs attrs = cpu_get_mem_attrs(env); + AddressSpace *as = cpu_addressspace(cs, attrs); - return address_space_ldq(cs->as, addr, - cpu_get_mem_attrs(env), - NULL); + return address_space_ldq(as, addr, attrs, NULL); } void x86_stb_phys(CPUState *cs, hwaddr addr, uint8_t val) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; + MemTxAttrs attrs = cpu_get_mem_attrs(env); + AddressSpace *as = cpu_addressspace(cs, attrs); - address_space_stb(cs->as, addr, val, - cpu_get_mem_attrs(env), - NULL); + address_space_stb(as, addr, val, attrs, NULL); } void x86_stl_phys_notdirty(CPUState *cs, hwaddr addr, uint32_t val) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; + MemTxAttrs attrs = cpu_get_mem_attrs(env); + AddressSpace *as = cpu_addressspace(cs, attrs); - address_space_stl_notdirty(cs->as, addr, val, - cpu_get_mem_attrs(env), - NULL); + address_space_stl_notdirty(as, addr, val, attrs, NULL); } void x86_stw_phys(CPUState *cs, hwaddr addr, uint32_t val) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; + MemTxAttrs attrs = cpu_get_mem_attrs(env); + AddressSpace *as = cpu_addressspace(cs, attrs); - address_space_stw(cs->as, addr, val, - cpu_get_mem_attrs(env), - NULL); + address_space_stw(as, addr, val, attrs, NULL); } void x86_stl_phys(CPUState *cs, hwaddr addr, uint32_t val) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; + MemTxAttrs attrs = cpu_get_mem_attrs(env); + AddressSpace *as = cpu_addressspace(cs, attrs); - address_space_stl(cs->as, addr, val, - cpu_get_mem_attrs(env), - NULL); + address_space_stl(as, addr, val, attrs, NULL); } void x86_stq_phys(CPUState *cs, hwaddr addr, uint64_t val) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; + MemTxAttrs attrs = cpu_get_mem_attrs(env); + AddressSpace *as = cpu_addressspace(cs, attrs); - address_space_stq(cs->as, addr, val, - cpu_get_mem_attrs(env), - NULL); + address_space_stq(as, addr, val, attrs, NULL); } #endif diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 49b6115eae..ee36502789 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -43,6 +43,7 @@ #include "standard-headers/asm-x86/hyperv.h" #include "hw/pci/pci.h" #include "hw/pci/msi.h" +#include "hw/pci/msix.h" #include "migration/blocker.h" #include "exec/memattrs.h" #include "trace.h" @@ -1254,7 +1255,9 @@ int kvm_arch_init(MachineState *ms, KVMState *s) } } - if (kvm_check_extension(s, KVM_CAP_X86_SMM)) { + if (kvm_check_extension(s, KVM_CAP_X86_SMM) && + object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE) && + pc_machine_is_smm_enabled(PC_MACHINE(ms))) { smram_machine_done.notify = register_smram_listener; qemu_add_machine_init_done_notifier(&smram_machine_done); } @@ -1300,18 +1303,14 @@ static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs) lhs->selector = rhs->selector; lhs->base = rhs->base; lhs->limit = rhs->limit; - if (rhs->unusable) { - lhs->flags = 0; - } else { - lhs->flags = (rhs->type << DESC_TYPE_SHIFT) | - (rhs->present * DESC_P_MASK) | - (rhs->dpl << DESC_DPL_SHIFT) | - (rhs->db << DESC_B_SHIFT) | - (rhs->s * DESC_S_MASK) | - (rhs->l << DESC_L_SHIFT) | - (rhs->g * DESC_G_MASK) | - (rhs->avl * DESC_AVL_MASK); - } + lhs->flags = (rhs->type << DESC_TYPE_SHIFT) | + ((rhs->present && !rhs->unusable) * DESC_P_MASK) | + (rhs->dpl << DESC_DPL_SHIFT) | + (rhs->db << DESC_B_SHIFT) | + (rhs->s * DESC_S_MASK) | + (rhs->l << DESC_L_SHIFT) | + (rhs->g * DESC_G_MASK) | + (rhs->avl * DESC_AVL_MASK); } static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set) @@ -3510,12 +3509,17 @@ static void kvm_update_msi_routes_all(void *private, bool global, int cnt = 0; MSIRouteEntry *entry; MSIMessage msg; + PCIDevice *dev; + /* TODO: explicit route update */ QLIST_FOREACH(entry, &msi_route_list, list) { cnt++; - msg = pci_get_msi_message(entry->dev, entry->vector); - kvm_irqchip_update_msi_route(kvm_state, entry->virq, - msg, entry->dev); + dev = entry->dev; + if (!msix_enabled(dev) && !msi_enabled(dev)) { + continue; + } + msg = pci_get_msi_message(dev, entry->vector); + kvm_irqchip_update_msi_route(kvm_state, entry->virq, msg, dev); } kvm_irqchip_commit_routes(kvm_state); trace_kvm_x86_update_msi_routes(cnt); diff --git a/target/i386/machine.c b/target/i386/machine.c index 3cb272948e..8c7a822e9f 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -274,10 +274,6 @@ static int cpu_post_load(void *opaque, int version_id) cpu_x86_update_dr7(env, dr7); } tlb_flush(cs); - - if (tcg_enabled()) { - cpu_smm_update(cpu); - } return 0; } diff --git a/target/i386/smm_helper.c b/target/i386/smm_helper.c index f051a77c4a..90621e5977 100644 --- a/target/i386/smm_helper.c +++ b/target/i386/smm_helper.c @@ -43,19 +43,6 @@ void helper_rsm(CPUX86State *env) #define SMM_REVISION_ID 0x00020000 #endif -/* Called with iothread lock taken */ -void cpu_smm_update(X86CPU *cpu) -{ - CPUX86State *env = &cpu->env; - bool smm_enabled = (env->hflags & HF_SMM_MASK); - - g_assert(qemu_mutex_iothread_locked()); - - if (cpu->smram) { - memory_region_set_enabled(cpu->smram, smm_enabled); - } -} - void do_smm_enter(X86CPU *cpu) { CPUX86State *env = &cpu->env; @@ -73,7 +60,6 @@ void do_smm_enter(X86CPU *cpu) } else { env->hflags2 |= HF2_NMI_MASK; } - cpu_smm_update(cpu); sm_state = env->smbase + 0x8000; @@ -338,10 +324,6 @@ void helper_rsm(CPUX86State *env) env->hflags2 &= ~HF2_SMM_INSIDE_NMI_MASK; env->hflags &= ~HF_SMM_MASK; - qemu_mutex_lock_iothread(); - cpu_smm_update(cpu); - qemu_mutex_unlock_iothread(); - qemu_log_mask(CPU_LOG_INT, "SMM: after RSM\n"); log_cpu_state_mask(CPU_LOG_INT, CPU(cpu), CPU_DUMP_CCOP); } diff --git a/target/i386/translate.c b/target/i386/translate.c index 674ec96d5a..ed3b896db4 100644 --- a/target/i386/translate.c +++ b/target/i386/translate.c @@ -7939,14 +7939,26 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); if (b & 2) { + if (s->tb->cflags & CF_USE_ICOUNT) { + gen_io_start(); + } gen_op_mov_v_reg(ot, cpu_T0, rm); gen_helper_write_crN(cpu_env, tcg_const_i32(reg), cpu_T0); + if (s->tb->cflags & CF_USE_ICOUNT) { + gen_io_end(); + } gen_jmp_im(s->pc - s->cs_base); gen_eob(s); } else { + if (s->tb->cflags & CF_USE_ICOUNT) { + gen_io_start(); + } gen_helper_read_crN(cpu_T0, cpu_env, tcg_const_i32(reg)); gen_op_mov_reg_v(ot, rm, cpu_T0); + if (s->tb->cflags & CF_USE_ICOUNT) { + gen_io_end(); + } } break; default: diff --git a/tests/qemu-iotests/083.out b/tests/qemu-iotests/083.out index 0c13888ba1..a24c6bfece 100644 --- a/tests/qemu-iotests/083.out +++ b/tests/qemu-iotests/083.out @@ -69,10 +69,12 @@ read failed: Input/output error === Check disconnect 4 reply === +read failed read failed: Input/output error === Check disconnect 8 reply === +read failed read failed: Input/output error === Check disconnect before data === diff --git a/tests/rtc-test.c b/tests/rtc-test.c index a086efd120..e78f701afb 100644 --- a/tests/rtc-test.c +++ b/tests/rtc-test.c @@ -14,6 +14,7 @@ #include "qemu/osdep.h" #include "libqtest.h" +#include "qemu/timer.h" #include "hw/timer/mc146818rtc_regs.h" static uint8_t base = 0x70; @@ -542,6 +543,52 @@ static void register_b_set_flag(void) g_assert_cmpint(cmos_read(RTC_CENTURY), ==, 0x20); } +#define RTC_PERIOD_CODE1 13 /* 8 Hz */ +#define RTC_PERIOD_CODE2 15 /* 2 Hz */ + +#define RTC_PERIOD_TEST_NR 50 + +static uint64_t wait_periodic_interrupt(uint64_t real_time) +{ + while (!get_irq(RTC_ISA_IRQ)) { + real_time = clock_step_next(); + } + + g_assert((cmos_read(RTC_REG_C) & REG_C_PF) != 0); + return real_time; +} + +static void periodic_timer(void) +{ + int i; + uint64_t period_clocks, period_time, start_time, real_time; + + /* disable all interrupts. */ + cmos_write(RTC_REG_B, cmos_read(RTC_REG_B) & + ~(REG_B_PIE | REG_B_AIE | REG_B_UIE)); + cmos_write(RTC_REG_A, RTC_PERIOD_CODE1); + /* enable periodic interrupt after properly configure the period. */ + cmos_write(RTC_REG_B, cmos_read(RTC_REG_B) | REG_B_PIE); + + start_time = real_time = clock_step_next(); + + for (i = 0; i < RTC_PERIOD_TEST_NR; i++) { + cmos_write(RTC_REG_A, RTC_PERIOD_CODE1); + real_time = wait_periodic_interrupt(real_time); + cmos_write(RTC_REG_A, RTC_PERIOD_CODE2); + real_time = wait_periodic_interrupt(real_time); + } + + period_clocks = periodic_period_to_clock(RTC_PERIOD_CODE1) + + periodic_period_to_clock(RTC_PERIOD_CODE2); + period_clocks *= RTC_PERIOD_TEST_NR; + period_time = periodic_clock_to_ns(period_clocks); + + real_time -= start_time; + g_assert_cmpint(ABS((int64_t)(real_time - period_time)), <=, + NANOSECONDS_PER_SECOND * 0.5); +} + int main(int argc, char **argv) { QTestState *s = NULL; @@ -564,6 +611,8 @@ int main(int argc, char **argv) qtest_add_func("/rtc/set-year/1980", set_year_1980); qtest_add_func("/rtc/misc/register_b_set_flag", register_b_set_flag); qtest_add_func("/rtc/misc/fuzz-registers", fuzz_registers); + qtest_add_func("/rtc/periodic/interrupt", periodic_timer); + ret = g_test_run(); if (s) { diff --git a/trace-events b/trace-events index d7a4d94168..b496be94d4 100644 --- a/trace-events +++ b/trace-events @@ -62,8 +62,9 @@ kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p" kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s" kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s" kvm_irqchip_commit_routes(void) "" -kvm_irqchip_add_msi_route(int virq) "Adding MSI route virq=%d" +kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d" kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" +kvm_irqchip_release_virq(int virq) "virq %d" # TCG related tracing (mostly disabled by default) # cpu-exec.c diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 7e28c161b2..048d40d9de 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -182,7 +182,9 @@ void qemu_set_cloexec(int fd) { int f; f = fcntl(fd, F_GETFD); - fcntl(fd, F_SETFD, f | FD_CLOEXEC); + assert(f != -1); + f = fcntl(fd, F_SETFD, f | FD_CLOEXEC); + assert(f != -1); } /* diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index b39ae74fe0..82290cb687 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -845,6 +845,8 @@ static int unix_listen_saddr(UnixSocketAddress *saddr, { struct sockaddr_un un; int sock, fd; + char *pathbuf = NULL; + const char *path; sock = qemu_socket(PF_UNIX, SOCK_STREAM, 0); if (sock < 0) { @@ -852,20 +854,22 @@ static int unix_listen_saddr(UnixSocketAddress *saddr, return -1; } - memset(&un, 0, sizeof(un)); - un.sun_family = AF_UNIX; - if (saddr->path && strlen(saddr->path)) { - snprintf(un.sun_path, sizeof(un.sun_path), "%s", saddr->path); + if (saddr->path && saddr->path[0]) { + path = saddr->path; } else { const char *tmpdir = getenv("TMPDIR"); tmpdir = tmpdir ? tmpdir : "/tmp"; - if (snprintf(un.sun_path, sizeof(un.sun_path), "%s/qemu-socket-XXXXXX", - tmpdir) >= sizeof(un.sun_path)) { - error_setg_errno(errp, errno, - "TMPDIR environment variable (%s) too large", tmpdir); - goto err; - } + path = pathbuf = g_strdup_printf("%s/qemu-socket-XXXXXX", tmpdir); + } + if (strlen(path) > sizeof(un.sun_path)) { + error_setg(errp, "UNIX socket path '%s' is too long", path); + error_append_hint(errp, "Path must be less than %zu bytes\n", + sizeof(un.sun_path)); + goto err; + } + + if (pathbuf != NULL) { /* * This dummy fd usage silences the mktemp() unsecure warning. * Using mkstemp() doesn't make things more secure here @@ -873,24 +877,25 @@ static int unix_listen_saddr(UnixSocketAddress *saddr, * to unlink first and thus re-open the race window. The * worst case possible is bind() failing, i.e. a DoS attack. */ - fd = mkstemp(un.sun_path); + fd = mkstemp(pathbuf); if (fd < 0) { error_setg_errno(errp, errno, - "Failed to make a temporary socket name in %s", tmpdir); + "Failed to make a temporary socket %s", pathbuf); goto err; } close(fd); - if (update_addr) { - g_free(saddr->path); - saddr->path = g_strdup(un.sun_path); - } } - if (unlink(un.sun_path) < 0 && errno != ENOENT) { + if (unlink(path) < 0 && errno != ENOENT) { error_setg_errno(errp, errno, - "Failed to unlink socket %s", un.sun_path); + "Failed to unlink socket %s", path); goto err; } + + memset(&un, 0, sizeof(un)); + un.sun_family = AF_UNIX; + strncpy(un.sun_path, path, sizeof(un.sun_path)); + if (bind(sock, (struct sockaddr*) &un, sizeof(un)) < 0) { error_setg_errno(errp, errno, "Failed to bind socket to %s", un.sun_path); goto err; @@ -900,9 +905,16 @@ static int unix_listen_saddr(UnixSocketAddress *saddr, goto err; } + if (update_addr && pathbuf) { + g_free(saddr->path); + saddr->path = pathbuf; + } else { + g_free(pathbuf); + } return sock; err: + g_free(pathbuf); closesocket(sock); return -1; } @@ -932,9 +944,16 @@ static int unix_connect_saddr(UnixSocketAddress *saddr, qemu_set_nonblock(sock); } + if (strlen(saddr->path) > sizeof(un.sun_path)) { + error_setg(errp, "UNIX socket path '%s' is too long", saddr->path); + error_append_hint(errp, "Path must be less than %zu bytes\n", + sizeof(un.sun_path)); + goto err; + } + memset(&un, 0, sizeof(un)); un.sun_family = AF_UNIX; - snprintf(un.sun_path, sizeof(un.sun_path), "%s", saddr->path); + strncpy(un.sun_path, saddr->path, sizeof(un.sun_path)); /* connect to peer */ do { @@ -956,13 +975,18 @@ static int unix_connect_saddr(UnixSocketAddress *saddr, } if (rc < 0) { - error_setg_errno(errp, -rc, "Failed to connect socket"); - close(sock); - sock = -1; + error_setg_errno(errp, -rc, "Failed to connect socket %s", + saddr->path); + goto err; } g_free(connect_state); return sock; + + err: + close(sock); + g_free(connect_state); + return -1; } #else