vulkan/wsi/wayland: Pace frames with commit-timing-v1

Instead of using frame callbacks - which may stop firing if our surface is
occluded - use the new commit-timing-v1 protocol in combination with the
presentation feedback protocol.

If the required protocols are unavailable, or the environment variable
MESA_VK_WSI_DEBUG contains "nowlts", we fall back to frame callback
based pacing behaviour.

Signed-off-by: Derek Foreman <derek.foreman@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26150>
This commit is contained in:
Derek Foreman 2023-12-04 12:47:06 -06:00 committed by Marge Bot
parent d052b0201e
commit c26ab1aee1
6 changed files with 250 additions and 10 deletions

View File

@ -43,6 +43,7 @@ libwayland_drm = static_library(
wp_dir = dep_wl_protocols.get_variable(pkgconfig : 'pkgdatadir', internal : 'pkgdatadir')
wp_protos = {
'fifo-v1': 'staging/fifo/fifo-v1.xml',
'commit-timing-v1': 'staging/commit-timing/commit-timing-v1.xml',
'linux-dmabuf-unstable-v1': 'unstable/linux-dmabuf/linux-dmabuf-unstable-v1.xml',
'presentation-time': 'stable/presentation-time/presentation-time.xml',
'tearing-control-v1': 'staging/tearing-control/tearing-control-v1.xml',

View File

@ -16,6 +16,7 @@ endif
if with_platform_wayland
files_vulkan_wsi += files('wsi_common_wayland.c')
files_vulkan_wsi += wp_files['fifo-v1']
files_vulkan_wsi += wp_files['commit-timing-v1']
files_vulkan_wsi += wp_files['linux-dmabuf-unstable-v1']
files_vulkan_wsi += wp_files['presentation-time']
files_vulkan_wsi += wp_files['tearing-control-v1']

View File

@ -55,6 +55,7 @@ static const struct debug_control debug_control[] = {
{ "noshm", WSI_DEBUG_NOSHM },
{ "linear", WSI_DEBUG_LINEAR },
{ "dxgi", WSI_DEBUG_DXGI },
{ "nowlts", WSI_DEBUG_NOWLTS },
{ NULL, },
};
@ -86,6 +87,7 @@ wsi_device_init(struct wsi_device *wsi,
wsi->sw = device_options->sw_device || (WSI_DEBUG & WSI_DEBUG_SW);
wsi->wants_linear = (WSI_DEBUG & WSI_DEBUG_LINEAR) != 0;
wsi->x11.extra_xwayland_image = device_options->extra_xwayland_image;
wsi->wayland.disable_timestamps = (WSI_DEBUG & WSI_DEBUG_NOWLTS) != 0;
#define WSI_GET_CB(func) \
PFN_vk##func func = (PFN_vk##func)proc_addr(pdevice, "vk" #func)
WSI_GET_CB(GetPhysicalDeviceExternalSemaphoreProperties);

View File

@ -203,6 +203,11 @@ struct wsi_device {
* This requires VK_KHR_timeline_semaphore. */
bool khr_present_wait;
struct {
/* Don't use the commit-timing protocol for pacing */
bool disable_timestamps;
} wayland;
/*
* This sets the ownership for a WSI memory object:
*

View File

@ -40,6 +40,7 @@ struct wsi_swapchain;
#define WSI_DEBUG_NOSHM (1ull << 2)
#define WSI_DEBUG_LINEAR (1ull << 3)
#define WSI_DEBUG_DXGI (1ull << 4)
#define WSI_DEBUG_NOWLTS (1ull << 5)
extern uint64_t WSI_DEBUG;

View File

@ -42,6 +42,7 @@
#include "wsi_common_entrypoints.h"
#include "wsi_common_private.h"
#include "fifo-v1-client-protocol.h"
#include "commit-timing-v1-client-protocol.h"
#include "linux-dmabuf-unstable-v1-client-protocol.h"
#include "presentation-time-client-protocol.h"
#include "linux-drm-syncobj-v1-client-protocol.h"
@ -116,6 +117,8 @@ struct wsi_wl_display {
uint32_t wp_presentation_version;
struct wp_fifo_manager_v1 *fifo_manager;
struct wp_commit_timing_manager_v1 *commit_timing_manager;
bool no_timestamps;
struct wsi_wayland *wsi_wl;
@ -173,6 +176,12 @@ struct wsi_wl_surface {
uint64_t presentation_track_id;
} analytics;
uint64_t last_target_time;
uint64_t displayed_time;
bool valid_refresh_nsec;
unsigned int refresh_nsec;
uint64_t display_time_error;
uint64_t display_time_correction;
struct zwp_linux_dmabuf_feedback_v1 *wl_dmabuf_feedback;
struct dmabuf_feedback dmabuf_feedback, pending_dmabuf_feedback;
@ -185,6 +194,7 @@ struct wsi_wl_swapchain {
struct wsi_wl_surface *wsi_wl_surface;
struct wp_tearing_control_v1 *tearing_control;
struct wp_fifo_v1 *fifo;
struct wp_commit_timer_v1 *commit_timer;
struct wl_callback *frame;
@ -255,6 +265,40 @@ stringify_wayland_id(uint32_t id)
return out;
}
/* Given a time base and a refresh period, find the next
* time past 'from' that is an even multiple of the period
* past the base.
*/
static uint64_t
next_phase_locked_time(uint64_t base, uint64_t period, uint64_t from)
{
uint64_t target, cycles;
assert(from != 0);
if (base == 0)
return from;
if (period == 0)
period = 16666666;
/* If our time base is in the future (which can happen when using
* presentation feedback events), target the next possible
* presentation time.
*/
if (base >= from)
return base + period;
/* Round up our cycle count so imprecision in feedback times doesn't
* lead to a time just after a refresh and a time just before the
* following refresh producing the same cycle count.
*/
cycles = (from - base + period - 1) / period;
target = base + cycles * period;
return target;
}
static struct wsi_wl_format *
wsi_wl_display_add_vk_format(struct wsi_wl_display *display,
struct u_vector *formats,
@ -873,6 +917,10 @@ registry_handle_global(void *data, struct wl_registry *registry,
} else if (strcmp(interface, wp_fifo_manager_v1_interface.name) == 0) {
display->fifo_manager =
wl_registry_bind(registry, name, &wp_fifo_manager_v1_interface, 1);
} else if (!display->no_timestamps &&
strcmp(interface, wp_commit_timing_manager_v1_interface.name) == 0) {
display->commit_timing_manager =
wl_registry_bind(registry, name, &wp_commit_timing_manager_v1_interface, 1);
}
}
@ -903,6 +951,8 @@ wsi_wl_display_finish(struct wsi_wl_display *display)
wp_presentation_destroy(display->wp_presentation_notwrapped);
if (display->fifo_manager)
wp_fifo_manager_v1_destroy(display->fifo_manager);
if (display->commit_timing_manager)
wp_commit_timing_manager_v1_destroy(display->commit_timing_manager);
if (display->tearing_control_manager)
wp_tearing_control_manager_v1_destroy(display->tearing_control_manager);
if (display->wl_display_wrapper)
@ -941,6 +991,8 @@ wsi_wl_display_init(struct wsi_wayland *wsi_wl,
goto fail;
}
display->no_timestamps = wsi_wl->wsi->wayland.disable_timestamps;
wl_proxy_set_queue((struct wl_proxy *) display->wl_display_wrapper,
display->queue);
@ -1724,6 +1776,9 @@ static VkResult wsi_wl_surface_init(struct wsi_wl_surface *wsi_wl_surface,
}
wsi_wl_surface_analytics_init(wsi_wl_surface, pAllocator);
wsi_wl_surface->valid_refresh_nsec = false;
wsi_wl_surface->refresh_nsec = 0;
return VK_SUCCESS;
fail:
@ -1777,6 +1832,8 @@ struct wsi_wl_present_id {
const VkAllocationCallbacks *alloc;
struct wsi_wl_swapchain *chain;
int buffer_id;
uint64_t target_time;
uint64_t correction;
struct wl_list link;
};
@ -1943,6 +2000,37 @@ wsi_wl_swapchain_wait_for_present(struct wsi_swapchain *wsi_chain,
}
}
static int
wsi_wl_swapchain_ensure_dispatch(struct wsi_wl_swapchain *chain)
{
struct wsi_wl_surface *wsi_wl_surface = chain->wsi_wl_surface;
struct wl_display *display = wsi_wl_surface->display->wl_display;
struct timespec timeout = {0, 0};
int ret = 0;
mtx_lock(&chain->present_ids.lock);
if (chain->present_ids.dispatch_in_progress)
goto already_dispatching;
chain->present_ids.dispatch_in_progress = true;
mtx_unlock(&chain->present_ids.lock);
/* Use a dispatch with an instant timeout because dispatch_pending
* won't read any events in the pipe.
*/
ret = wl_display_dispatch_queue_timeout(display,
chain->present_ids.queue,
&timeout);
mtx_lock(&chain->present_ids.lock);
u_cnd_monotonic_broadcast(&chain->present_ids.list_advanced);
chain->present_ids.dispatch_in_progress = false;
already_dispatching:
mtx_unlock(&chain->present_ids.lock);
return ret;
}
static VkResult
wsi_wl_swapchain_acquire_next_image_explicit(struct wsi_swapchain *wsi_chain,
const VkAcquireNextImageInfoKHR *info,
@ -2005,6 +2093,15 @@ wsi_wl_swapchain_acquire_next_image_implicit(struct wsi_swapchain *wsi_chain,
timespec_add(&end_time, &rel_timeout, &start_time);
while (1) {
/* If we can use timestamps, we want to make sure the queue feedback
* events are in is dispatched so we eventually get a refresh rate
* and a vsync time to phase lock to. We don't need to wait for it
* now.
*/
if (chain->commit_timer) {
if (wsi_wl_swapchain_ensure_dispatch(chain) == -1)
return VK_ERROR_OUT_OF_DATE_KHR;
}
/* Try to find a free image. */
for (uint32_t i = 0; i < chain->base.image_count; i++) {
if (!chain->images[i].busy) {
@ -2043,6 +2140,7 @@ wsi_wl_presentation_update_present_id(struct wsi_wl_present_id *id)
if (id->present_id > id->chain->present_ids.max_completed)
id->chain->present_ids.max_completed = id->present_id;
id->chain->wsi_wl_surface->display_time_correction -= id->correction;
wl_list_remove(&id->link);
mtx_unlock(&id->chain->present_ids.lock);
vk_free(id->alloc, id);
@ -2094,11 +2192,31 @@ presentation_handle_presented(void *data,
MESA_TRACE_FUNC_FLOW(&id->flow_id);
struct wsi_wl_swapchain *chain = id->chain;
struct wsi_wl_surface *surface = chain->wsi_wl_surface;
uint64_t target_time = id->target_time;
surface->refresh_nsec = refresh;
presentation_ts.tv_sec = ((uint64_t)tv_sec_hi << 32) + tv_sec_lo;
presentation_ts.tv_nsec = tv_nsec;
presentation_time = timespec_to_nsec(&presentation_ts);
trace_present(id, presentation_time);
if (!surface->valid_refresh_nsec) {
surface->valid_refresh_nsec = true;
surface->last_target_time = presentation_time;
target_time = presentation_time;
}
if (presentation_time > surface->displayed_time)
surface->displayed_time = presentation_time;
if (target_time && presentation_time > target_time)
surface->display_time_error = presentation_time - target_time;
else
surface->display_time_error = 0;
wsi_wl_presentation_update_present_id(id);
wp_presentation_feedback_destroy(feedback);
}
@ -2110,6 +2228,16 @@ presentation_handle_discarded(void *data,
struct wsi_wl_present_id *id = data;
MESA_TRACE_FUNC_FLOW(&id->flow_id);
struct wsi_wl_swapchain *chain = id->chain;
struct wsi_wl_surface *surface = chain->wsi_wl_surface;
if (!surface->valid_refresh_nsec) {
/* We've started occluded, so make up some safe values to throttle us */
surface->displayed_time = os_time_get_nano();
surface->last_target_time = surface->displayed_time;
surface->refresh_nsec = 16666666;
surface->valid_refresh_nsec = true;
}
wsi_wl_presentation_update_present_id(id);
wp_presentation_feedback_destroy(feedback);
@ -2149,6 +2277,71 @@ static const struct wl_callback_listener frame_listener = {
frame_handle_done,
};
static bool
set_timestamp(struct wsi_wl_swapchain *chain,
uint64_t *timestamp,
uint64_t *correction)
{
struct wsi_wl_surface *surface = chain->wsi_wl_surface;
uint64_t target;
struct timespec target_ts;
uint64_t refresh;
uint64_t displayed_time;
int32_t error = 0;
if (!surface->valid_refresh_nsec)
return false;
displayed_time = surface->displayed_time;
refresh = surface->refresh_nsec;
/* If refresh is 0, presentation feedback has informed us we have no
* fixed refresh cycle. In that case we can't generate sensible
* timestamps at all, so bail out.
*/
if (!refresh)
return false;
/* We assume we're being fed at the display's refresh rate, but
* if that doesn't happen our timestamps fall into the past.
*
* This would result in an offscreen surface being unthrottled until
* it "catches up" on missed frames. Instead, correct for missed
* frame opportunities by jumping forward if our display time
* didn't match our target time.
*
* Since we might have a few frames in flight, we need to keep a
* running tally of how much correction we're applying and remove
* it as corrected frames are retired.
*/
if (surface->display_time_error > surface->display_time_correction)
error = surface->display_time_error - surface->display_time_correction;
target = surface->last_target_time;
if (error > 0) {
target += (error / refresh) * refresh;
*correction = (error / refresh) * refresh;
} else {
*correction = 0;
}
surface->display_time_correction += *correction;
target = next_phase_locked_time(displayed_time,
refresh,
target);
/* Take back 500 us as a safety margin, to ensure we don't miss our
* target due to round-off error.
*/
timespec_from_nsec(&target_ts, target - 500000);
wp_commit_timer_v1_set_timestamp(chain->commit_timer,
(uint64_t)target_ts.tv_sec >> 32, target_ts.tv_sec,
target_ts.tv_nsec);
surface->last_target_time = target;
*timestamp = target;
return true;
}
static VkResult
wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain,
uint32_t image_index,
@ -2156,7 +2349,9 @@ wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain,
const VkPresentRegionKHR *damage)
{
struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)wsi_chain;
bool timestamped = false;
bool queue_dispatched = false;
bool need_legacy_throttling = true;
uint64_t flow_id = chain->images[image_index].flow_id;
MESA_TRACE_FUNC_FLOW(&flow_id);
@ -2232,16 +2427,8 @@ wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain,
wl_surface_damage(wsi_wl_surface->surface, 0, 0, INT32_MAX, INT32_MAX);
}
if (mode_fifo && !chain->fifo) {
chain->frame = wl_surface_frame(wsi_wl_surface->surface);
wl_callback_add_listener(chain->frame, &frame_listener, chain);
chain->legacy_fifo_ready = false;
} else {
/* If we present MAILBOX, any subsequent presentation in FIFO can replace this image. */
chain->legacy_fifo_ready = true;
}
if (present_id > 0 || util_perfetto_is_tracing_enabled()) {
if (present_id > 0 || (mode_fifo && chain->commit_timer) ||
util_perfetto_is_tracing_enabled()) {
struct wsi_wl_present_id *id =
vk_zalloc(chain->wsi_wl_surface->display->wsi_wl->alloc, sizeof(*id), sizeof(uintptr_t),
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
@ -2254,6 +2441,12 @@ wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain,
id->submission_time = os_time_get_nano();
if (mode_fifo && chain->fifo && chain->commit_timer) {
timestamped = set_timestamp(chain, &id->target_time, &id->correction);
if (timestamped || !wsi_wl_surface->valid_refresh_nsec)
need_legacy_throttling = false;
}
mtx_lock(&chain->present_ids.lock);
if (chain->present_ids.wp_presentation) {
@ -2273,8 +2466,34 @@ wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain,
chain->images[image_index].busy = true;
if (mode_fifo && need_legacy_throttling) {
chain->frame = wl_surface_frame(wsi_wl_surface->surface);
wl_callback_add_listener(chain->frame, &frame_listener, chain);
chain->legacy_fifo_ready = false;
} else {
/* If we present MAILBOX, any subsequent presentation in FIFO can replace this image. */
chain->legacy_fifo_ready = true;
}
if (mode_fifo && chain->fifo) {
wp_fifo_v1_set_barrier(chain->fifo);
/* If our surface is occluded and we're using vkWaitForPresentKHR,
* we can end up waiting forever. The FIFO condition and the time
* constraint are met, but the image hasn't been presented because
* we're occluded - but the image isn't discarded because there
* are no further content updates for the compositor to process.
*
* This extra commit gives us the second content update to move
* things along. If we're occluded the FIFO constraint is
* satisfied immediately after the time constraint is, pushing
* out a discard. If we're visible, the timed content update
* receives presented feedback and the FIFO one blocks further
* updates until the next refresh.
*/
if (timestamped)
wl_surface_commit(wsi_wl_surface->surface);
wp_fifo_v1_wait_barrier(chain->fifo);
}
wl_surface_commit(wsi_wl_surface->surface);
@ -2495,6 +2714,9 @@ wsi_wl_swapchain_chain_free(struct wsi_wl_swapchain *chain,
if (chain->fifo)
wp_fifo_v1_destroy(chain->fifo);
if (chain->commit_timer)
wp_commit_timer_v1_destroy(chain->commit_timer);
wsi_swapchain_finish(&chain->base);
}
@ -2566,6 +2788,10 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
wp_fifo_v1_destroy(old_chain->fifo);
old_chain->fifo = NULL;
}
if (old_chain->commit_timer) {
wp_commit_timer_v1_destroy(old_chain->commit_timer);
old_chain->commit_timer = NULL;
}
}
/* Take ownership of the wsi_wl_surface */
@ -2720,6 +2946,10 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
chain->fifo = wp_fifo_manager_v1_get_fifo(dpy->fifo_manager,
chain->wsi_wl_surface->surface);
}
if (dpy->commit_timing_manager) {
chain->commit_timer = wp_commit_timing_manager_v1_get_timer(dpy->commit_timing_manager,
chain->wsi_wl_surface->surface);
}
for (uint32_t i = 0; i < chain->base.image_count; i++) {
result = wsi_wl_image_init(chain, &chain->images[i],