anv+hasvk: Use driconf to disable 16-bit for zink.

The HW can technically execute 16-bit operations, but the restrictions on
16-bit ALU ops are so great that it ends up not being a win for
GLES-on-Vulkan to lower mediump to 16-bit operations, at least with the
current state of the Intel compiler.  This brings zink-on-anv in line with
iris and angle-on-anv for mediump behavior (ANGLE uses RelaxedPrecision,
which we ignore).

Perf on some angle traces on my brya (ADL) and i9-9900K (CFL):

ADL zink pubg_mobile_battle_royale:  +13.4574% +/- 5.2046% (n=5)
CFL zink pubg_mobile_battle_royale:  +29.5332% +/- 0.646585% (n=6)
ADL zink aztec_ruins_high:           +5.78027% +/- 4.80645% (n=4)
CFL zink aztec_ruins_high:           -1.10641% +/- 0.140562% (n=12)
ADL zink trex_200:                   +5.86956% +/- 2.09633% (n=10)
CFL zink trex_200:                   +9.72136% +/- 0.749261% (n=10)

Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21775>
This commit is contained in:
Emma Anholt 2023-03-07 10:44:01 -08:00 committed by Marge Bot
parent daa1468b54
commit 8b75b72613
6 changed files with 38 additions and 12 deletions

View File

@ -74,6 +74,7 @@ static const driOptionDescription anv_dri_options[] = {
DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false)
DRI_CONF_ANV_FP64_WORKAROUND_ENABLED(false)
DRI_CONF_ANV_GENERATED_INDIRECT_THRESHOLD(4)
DRI_CONF_NO_16BIT(false)
DRI_CONF_SECTION_END
DRI_CONF_SECTION_DEBUG
@ -193,7 +194,7 @@ get_device_extensions(const struct anv_physical_device *device,
*ext = (struct vk_device_extension_table) {
.KHR_8bit_storage = true,
.KHR_16bit_storage = true,
.KHR_16bit_storage = !device->instance->no_16bit,
.KHR_acceleration_structure = rt_enabled,
.KHR_bind_memory2 = true,
.KHR_buffer_device_address = true,
@ -255,7 +256,7 @@ get_device_extensions(const struct anv_physical_device *device,
.KHR_shader_atomic_int64 = true,
.KHR_shader_clock = true,
.KHR_shader_draw_parameters = true,
.KHR_shader_float16_int8 = true,
.KHR_shader_float16_int8 = !device->instance->no_16bit,
.KHR_shader_float_controls = true,
.KHR_shader_integer_dot_product = true,
.KHR_shader_non_semantic_info = true,
@ -1087,6 +1088,9 @@ anv_init_dri_options(struct anv_instance *instance)
driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour");
instance->lower_depth_range_rate =
driQueryOptionf(&instance->dri_options, "lower_depth_range_rate");
instance->no_16bit =
driQueryOptionb(&instance->dri_options, "no_16bit");
instance->fp64_workaround_enabled =
driQueryOptionb(&instance->dri_options, "fp64_workaround_enabled");
instance->generated_indirect_threshold =
@ -1235,8 +1239,8 @@ anv_get_physical_device_features_1_1(struct anv_physical_device *pdevice,
{
assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
f->storageBuffer16BitAccess = true;
f->uniformAndStorageBuffer16BitAccess = true;
f->storageBuffer16BitAccess = !pdevice->instance->no_16bit;
f->uniformAndStorageBuffer16BitAccess = !pdevice->instance->no_16bit;
f->storagePushConstant16 = true;
f->storageInputOutput16 = false;
f->multiview = true;
@ -1262,8 +1266,8 @@ anv_get_physical_device_features_1_2(struct anv_physical_device *pdevice,
f->storagePushConstant8 = true;
f->shaderBufferInt64Atomics = true;
f->shaderSharedInt64Atomics = false;
f->shaderFloat16 = true;
f->shaderInt8 = true;
f->shaderFloat16 = !pdevice->instance->no_16bit;
f->shaderInt8 = !pdevice->instance->no_16bit;
f->descriptorIndexing = true;
f->shaderInputAttachmentArrayDynamicIndexing = false;

View File

@ -1045,6 +1045,9 @@ struct anv_instance {
bool fp64_workaround_enabled;
float lower_depth_range_rate;
unsigned generated_indirect_threshold;
/* HW workarounds */
bool no_16bit;
};
VkResult anv_init_wsi(struct anv_physical_device *physical_device);

View File

@ -69,6 +69,7 @@ static const driOptionDescription anv_dri_options[] = {
DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(false)
DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false)
DRI_CONF_NO_16BIT(false)
DRI_CONF_SECTION_END
DRI_CONF_SECTION_DEBUG
@ -191,7 +192,7 @@ get_device_extensions(const struct anv_physical_device *device,
*ext = (struct vk_device_extension_table) {
.KHR_8bit_storage = device->info.ver >= 8,
.KHR_16bit_storage = device->info.ver >= 8,
.KHR_16bit_storage = device->info.ver >= 8 && !device->instance->no_16bit,
.KHR_bind_memory2 = true,
.KHR_buffer_device_address = device->has_a64_buffer_access,
.KHR_copy_commands2 = true,
@ -235,7 +236,7 @@ get_device_extensions(const struct anv_physical_device *device,
.KHR_separate_depth_stencil_layouts = true,
.KHR_shader_clock = true,
.KHR_shader_draw_parameters = true,
.KHR_shader_float16_int8 = device->info.ver >= 8,
.KHR_shader_float16_int8 = device->info.ver >= 8 && !device->instance->no_16bit,
.KHR_shader_float_controls = true,
.KHR_shader_integer_dot_product = true,
.KHR_shader_non_semantic_info = true,
@ -1016,6 +1017,8 @@ anv_init_dri_options(struct anv_instance *instance)
driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour");
instance->lower_depth_range_rate =
driQueryOptionf(&instance->dri_options, "lower_depth_range_rate");
instance->no_16bit =
driQueryOptionb(&instance->dri_options, "no_16bit");
}
VkResult anv_CreateInstance(
@ -1162,8 +1165,8 @@ anv_get_physical_device_features_1_1(struct anv_physical_device *pdevice,
{
assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
f->storageBuffer16BitAccess = pdevice->info.ver >= 8;
f->uniformAndStorageBuffer16BitAccess = pdevice->info.ver >= 8;
f->storageBuffer16BitAccess = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit;
f->uniformAndStorageBuffer16BitAccess = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit;
f->storagePushConstant16 = pdevice->info.ver >= 8;
f->storageInputOutput16 = false;
f->multiview = true;
@ -1189,8 +1192,8 @@ anv_get_physical_device_features_1_2(struct anv_physical_device *pdevice,
f->storagePushConstant8 = pdevice->info.ver >= 8;
f->shaderBufferInt64Atomics = false;
f->shaderSharedInt64Atomics = false;
f->shaderFloat16 = pdevice->info.ver >= 8;
f->shaderInt8 = pdevice->info.ver >= 8;
f->shaderFloat16 = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit;
f->shaderInt8 = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit;
f->descriptorIndexing = false;
f->shaderInputAttachmentArrayDynamicIndexing = false;

View File

@ -955,6 +955,9 @@ struct anv_instance {
bool limit_trig_input_range;
bool sample_mask_out_opengl_behaviour;
float lower_depth_range_rate;
/* HW workarounds */
bool no_16bit;
};
VkResult anv_init_wsi(struct anv_physical_device *physical_device);

View File

@ -1012,6 +1012,15 @@ TODO: document the other workarounds.
<application name="Rise of the Tomb Raider" executable="ROTTR.exe">
<option name="limit_trig_input_range" value="true" />
</application>
<!--
Disable 16-bit feature on zink and angle so that GLES mediump doesn't
lower to our inefficent 16-bit shader support. No need to do so for
ANGLE, since it uses RelaxedPrecision decorations, which the intel
compiler ignores.
-->
<engine engine_name_match="mesa zink">
<option name="no_16bit" value="true" />
</engine>
</device>
<device driver="dzn">
<application name="No Man's Sky" executable="NMS.exe">

View File

@ -302,6 +302,10 @@
DRI_CONF_OPT_B(limit_trig_input_range, def, \
"Limit trig input range to [-2p : 2p] to improve sin/cos calculation precision on Intel")
#define DRI_CONF_NO_16BIT(def) \
DRI_CONF_OPT_B(no_16bit, def, \
"Disable 16-bit instructions")
/**
* \brief Image quality-related options
*/