mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2024-11-23 18:24:13 +08:00
intel: add a hasvk vulkan driver
This new driver is a copy of the current Anv code, it will only load on gfx7/8 platforms though. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Jason Ekstrand <jason.ekstrand@collabora.com> Acked-by: Jason Ekstrand <jason@jlekstrand.net> Acked-by: Jason Ekstrand <jason.ekstrand@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18208>
This commit is contained in:
parent
0013ef89bf
commit
50013ca9a5
@ -250,7 +250,7 @@ _vulkan_drivers = get_option('vulkan-drivers')
|
||||
if _vulkan_drivers.contains('auto')
|
||||
if system_has_kms_drm
|
||||
if host_machine.cpu_family().startswith('x86')
|
||||
_vulkan_drivers = ['amd', 'intel', 'swrast']
|
||||
_vulkan_drivers = ['amd', 'intel', 'intel_hasvk', 'swrast']
|
||||
elif ['arm', 'aarch64'].contains(host_machine.cpu_family())
|
||||
_vulkan_drivers = ['swrast']
|
||||
elif ['mips', 'mips64', 'riscv32', 'riscv64'].contains(host_machine.cpu_family())
|
||||
@ -269,6 +269,7 @@ if _vulkan_drivers.contains('auto')
|
||||
endif
|
||||
|
||||
with_intel_vk = _vulkan_drivers.contains('intel')
|
||||
with_intel_hasvk = _vulkan_drivers.contains('intel_hasvk')
|
||||
with_amd_vk = _vulkan_drivers.contains('amd')
|
||||
with_freedreno_vk = _vulkan_drivers.contains('freedreno')
|
||||
with_panfrost_vk = _vulkan_drivers.contains('panfrost')
|
||||
@ -283,7 +284,7 @@ with_microsoft_vk = _vulkan_drivers.contains('microsoft-experimental')
|
||||
with_any_vk = _vulkan_drivers.length() != 0
|
||||
|
||||
with_any_broadcom = with_gallium_vc4 or with_gallium_v3d or with_broadcom_vk
|
||||
with_any_intel = with_intel_vk or with_gallium_iris or with_gallium_crocus or with_intel_tools
|
||||
with_any_intel = with_intel_vk or with_intel_hasvk or with_gallium_iris or with_gallium_crocus or with_intel_tools
|
||||
|
||||
if with_swrast_vk and not with_gallium_softpipe
|
||||
error('swrast vulkan requires gallium swrast')
|
||||
@ -1549,7 +1550,7 @@ endif
|
||||
|
||||
if cc.has_function('dl_iterate_phdr')
|
||||
pre_args += '-DHAVE_DL_ITERATE_PHDR'
|
||||
elif with_intel_vk
|
||||
elif with_intel_vk or with_intel_hasvk
|
||||
error('Intel "Anvil" Vulkan driver requires the dl_iterate_phdr function')
|
||||
endif
|
||||
|
||||
|
@ -198,7 +198,7 @@ option(
|
||||
'vulkan-drivers',
|
||||
type : 'array',
|
||||
value : ['auto'],
|
||||
choices : ['auto', 'amd', 'broadcom', 'freedreno', 'imagination-experimental', 'intel', 'microsoft-experimental', 'panfrost', 'swrast', 'virtio-experimental'],
|
||||
choices : ['auto', 'amd', 'broadcom', 'freedreno', 'imagination-experimental', 'intel', 'intel_hasvk', 'microsoft-experimental', 'panfrost', 'swrast', 'virtio-experimental'],
|
||||
description : 'List of vulkan drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built'
|
||||
)
|
||||
option(
|
||||
|
@ -38,3 +38,6 @@ endif
|
||||
if with_intel_vk
|
||||
subdir('vulkan')
|
||||
endif
|
||||
if with_intel_hasvk
|
||||
subdir('vulkan_hasvk')
|
||||
endif
|
||||
|
13
src/intel/vulkan_hasvk/TODO
Normal file
13
src/intel/vulkan_hasvk/TODO
Normal file
@ -0,0 +1,13 @@
|
||||
Intel Vulkan ToDo
|
||||
=================
|
||||
|
||||
Missing Features:
|
||||
- Investigate CTS failures on HSW
|
||||
- Sparse memory
|
||||
|
||||
Performance:
|
||||
- Multi-{sampled/gfx8,LOD} HiZ
|
||||
- MSAA fast clears
|
||||
- Pushing pieces of UBOs?
|
||||
- Enable guardband clipping
|
||||
- Use soft-pin to avoid relocations
|
251
src/intel/vulkan_hasvk/anv_acceleration_structure.c
Normal file
251
src/intel/vulkan_hasvk/anv_acceleration_structure.c
Normal file
@ -0,0 +1,251 @@
|
||||
/*
|
||||
* Copyright © 2020 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
void
|
||||
anv_GetAccelerationStructureBuildSizesKHR(
|
||||
VkDevice device,
|
||||
VkAccelerationStructureBuildTypeKHR buildType,
|
||||
const VkAccelerationStructureBuildGeometryInfoKHR* pBuildInfo,
|
||||
const uint32_t* pMaxPrimitiveCounts,
|
||||
VkAccelerationStructureBuildSizesInfoKHR* pSizeInfo)
|
||||
{
|
||||
assert(pSizeInfo->sType ==
|
||||
VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR);
|
||||
|
||||
pSizeInfo->accelerationStructureSize = 0; /* TODO */
|
||||
|
||||
uint64_t cpu_build_scratch_size = 0; /* TODO */
|
||||
uint64_t cpu_update_scratch_size = cpu_build_scratch_size;
|
||||
|
||||
uint64_t gpu_build_scratch_size = 0; /* TODO */
|
||||
uint64_t gpu_update_scratch_size = gpu_build_scratch_size;
|
||||
|
||||
switch (buildType) {
|
||||
case VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR:
|
||||
pSizeInfo->buildScratchSize = cpu_build_scratch_size;
|
||||
pSizeInfo->updateScratchSize = cpu_update_scratch_size;
|
||||
break;
|
||||
|
||||
case VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR:
|
||||
pSizeInfo->buildScratchSize = gpu_build_scratch_size;
|
||||
pSizeInfo->updateScratchSize = gpu_update_scratch_size;
|
||||
break;
|
||||
|
||||
case VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_OR_DEVICE_KHR:
|
||||
pSizeInfo->buildScratchSize = MAX2(cpu_build_scratch_size,
|
||||
gpu_build_scratch_size);
|
||||
pSizeInfo->updateScratchSize = MAX2(cpu_update_scratch_size,
|
||||
gpu_update_scratch_size);
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Invalid acceleration structure build type");
|
||||
}
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_CreateAccelerationStructureKHR(
|
||||
VkDevice _device,
|
||||
const VkAccelerationStructureCreateInfoKHR* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkAccelerationStructureKHR* pAccelerationStructure)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer);
|
||||
struct anv_acceleration_structure *accel;
|
||||
|
||||
accel = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*accel), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (accel == NULL)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
vk_object_base_init(&device->vk, &accel->base,
|
||||
VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR);
|
||||
|
||||
accel->size = pCreateInfo->size;
|
||||
accel->address = anv_address_add(buffer->address, pCreateInfo->offset);
|
||||
|
||||
*pAccelerationStructure = anv_acceleration_structure_to_handle(accel);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
anv_DestroyAccelerationStructureKHR(
|
||||
VkDevice _device,
|
||||
VkAccelerationStructureKHR accelerationStructure,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(anv_acceleration_structure, accel, accelerationStructure);
|
||||
|
||||
if (!accel)
|
||||
return;
|
||||
|
||||
vk_object_base_finish(&accel->base);
|
||||
vk_free2(&device->vk.alloc, pAllocator, accel);
|
||||
}
|
||||
|
||||
VkDeviceAddress
|
||||
anv_GetAccelerationStructureDeviceAddressKHR(
|
||||
VkDevice device,
|
||||
const VkAccelerationStructureDeviceAddressInfoKHR* pInfo)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_acceleration_structure, accel,
|
||||
pInfo->accelerationStructure);
|
||||
|
||||
assert(!anv_address_is_null(accel->address));
|
||||
assert(anv_bo_is_pinned(accel->address.bo));
|
||||
|
||||
return anv_address_physical(accel->address);
|
||||
}
|
||||
|
||||
void
|
||||
anv_GetDeviceAccelerationStructureCompatibilityKHR(
|
||||
VkDevice device,
|
||||
const VkAccelerationStructureVersionInfoKHR* pVersionInfo,
|
||||
VkAccelerationStructureCompatibilityKHR* pCompatibility)
|
||||
{
|
||||
unreachable("Unimplemented");
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_BuildAccelerationStructuresKHR(
|
||||
VkDevice _device,
|
||||
VkDeferredOperationKHR deferredOperation,
|
||||
uint32_t infoCount,
|
||||
const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
|
||||
const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
unreachable("Unimplemented");
|
||||
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_CopyAccelerationStructureKHR(
|
||||
VkDevice _device,
|
||||
VkDeferredOperationKHR deferredOperation,
|
||||
const VkCopyAccelerationStructureInfoKHR* pInfo)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
unreachable("Unimplemented");
|
||||
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_CopyAccelerationStructureToMemoryKHR(
|
||||
VkDevice _device,
|
||||
VkDeferredOperationKHR deferredOperation,
|
||||
const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
unreachable("Unimplemented");
|
||||
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_CopyMemoryToAccelerationStructureKHR(
|
||||
VkDevice _device,
|
||||
VkDeferredOperationKHR deferredOperation,
|
||||
const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
unreachable("Unimplemented");
|
||||
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_WriteAccelerationStructuresPropertiesKHR(
|
||||
VkDevice _device,
|
||||
uint32_t accelerationStructureCount,
|
||||
const VkAccelerationStructureKHR* pAccelerationStructures,
|
||||
VkQueryType queryType,
|
||||
size_t dataSize,
|
||||
void* pData,
|
||||
size_t stride)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
unreachable("Unimplemented");
|
||||
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
|
||||
}
|
||||
|
||||
void
|
||||
anv_CmdBuildAccelerationStructuresKHR(
|
||||
VkCommandBuffer commandBuffer,
|
||||
uint32_t infoCount,
|
||||
const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
|
||||
const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos)
|
||||
{
|
||||
unreachable("Unimplemented");
|
||||
}
|
||||
|
||||
void
|
||||
anv_CmdBuildAccelerationStructuresIndirectKHR(
|
||||
VkCommandBuffer commandBuffer,
|
||||
uint32_t infoCount,
|
||||
const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
|
||||
const VkDeviceAddress* pIndirectDeviceAddresses,
|
||||
const uint32_t* pIndirectStrides,
|
||||
const uint32_t* const* ppMaxPrimitiveCounts)
|
||||
{
|
||||
unreachable("Unimplemented");
|
||||
}
|
||||
|
||||
void
|
||||
anv_CmdCopyAccelerationStructureKHR(
|
||||
VkCommandBuffer commandBuffer,
|
||||
const VkCopyAccelerationStructureInfoKHR* pInfo)
|
||||
{
|
||||
unreachable("Unimplemented");
|
||||
}
|
||||
|
||||
void
|
||||
anv_CmdCopyAccelerationStructureToMemoryKHR(
|
||||
VkCommandBuffer commandBuffer,
|
||||
const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo)
|
||||
{
|
||||
unreachable("Unimplemented");
|
||||
}
|
||||
|
||||
void
|
||||
anv_CmdCopyMemoryToAccelerationStructureKHR(
|
||||
VkCommandBuffer commandBuffer,
|
||||
const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo)
|
||||
{
|
||||
unreachable("Unimplemented");
|
||||
}
|
||||
|
||||
void
|
||||
anv_CmdWriteAccelerationStructuresPropertiesKHR(
|
||||
VkCommandBuffer commandBuffer,
|
||||
uint32_t accelerationStructureCount,
|
||||
const VkAccelerationStructureKHR* pAccelerationStructures,
|
||||
VkQueryType queryType,
|
||||
VkQueryPool queryPool,
|
||||
uint32_t firstQuery)
|
||||
{
|
||||
unreachable("Unimplemented");
|
||||
}
|
2176
src/intel/vulkan_hasvk/anv_allocator.c
Normal file
2176
src/intel/vulkan_hasvk/anv_allocator.c
Normal file
File diff suppressed because it is too large
Load Diff
792
src/intel/vulkan_hasvk/anv_android.c
Normal file
792
src/intel/vulkan_hasvk/anv_android.c
Normal file
@ -0,0 +1,792 @@
|
||||
/*
|
||||
* Copyright © 2017, Google Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hardware/gralloc.h>
|
||||
|
||||
#if ANDROID_API_LEVEL >= 26
|
||||
#include <hardware/gralloc1.h>
|
||||
#endif
|
||||
|
||||
#include <hardware/hardware.h>
|
||||
#include <hardware/hwvulkan.h>
|
||||
#include <vulkan/vk_android_native_buffer.h>
|
||||
#include <vulkan/vk_icd.h>
|
||||
#include <sync/sync.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
#include "vk_common_entrypoints.h"
|
||||
#include "vk_util.h"
|
||||
|
||||
static int anv_hal_open(const struct hw_module_t* mod, const char* id, struct hw_device_t** dev);
|
||||
static int anv_hal_close(struct hw_device_t *dev);
|
||||
|
||||
static void UNUSED
|
||||
static_asserts(void)
|
||||
{
|
||||
STATIC_ASSERT(HWVULKAN_DISPATCH_MAGIC == ICD_LOADER_MAGIC);
|
||||
}
|
||||
|
||||
PUBLIC struct hwvulkan_module_t HAL_MODULE_INFO_SYM = {
|
||||
.common = {
|
||||
.tag = HARDWARE_MODULE_TAG,
|
||||
.module_api_version = HWVULKAN_MODULE_API_VERSION_0_1,
|
||||
.hal_api_version = HARDWARE_MAKE_API_VERSION(1, 0),
|
||||
.id = HWVULKAN_HARDWARE_MODULE_ID,
|
||||
.name = "Intel Vulkan HAL",
|
||||
.author = "Intel",
|
||||
.methods = &(hw_module_methods_t) {
|
||||
.open = anv_hal_open,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
/* If any bits in test_mask are set, then unset them and return true. */
|
||||
static inline bool
|
||||
unmask32(uint32_t *inout_mask, uint32_t test_mask)
|
||||
{
|
||||
uint32_t orig_mask = *inout_mask;
|
||||
*inout_mask &= ~test_mask;
|
||||
return *inout_mask != orig_mask;
|
||||
}
|
||||
|
||||
static int
|
||||
anv_hal_open(const struct hw_module_t* mod, const char* id,
|
||||
struct hw_device_t** dev)
|
||||
{
|
||||
assert(mod == &HAL_MODULE_INFO_SYM.common);
|
||||
assert(strcmp(id, HWVULKAN_DEVICE_0) == 0);
|
||||
|
||||
hwvulkan_device_t *hal_dev = malloc(sizeof(*hal_dev));
|
||||
if (!hal_dev)
|
||||
return -1;
|
||||
|
||||
*hal_dev = (hwvulkan_device_t) {
|
||||
.common = {
|
||||
.tag = HARDWARE_DEVICE_TAG,
|
||||
.version = HWVULKAN_DEVICE_API_VERSION_0_1,
|
||||
.module = &HAL_MODULE_INFO_SYM.common,
|
||||
.close = anv_hal_close,
|
||||
},
|
||||
.EnumerateInstanceExtensionProperties = anv_EnumerateInstanceExtensionProperties,
|
||||
.CreateInstance = anv_CreateInstance,
|
||||
.GetInstanceProcAddr = anv_GetInstanceProcAddr,
|
||||
};
|
||||
|
||||
*dev = &hal_dev->common;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
anv_hal_close(struct hw_device_t *dev)
|
||||
{
|
||||
/* hwvulkan.h claims that hw_device_t::close() is never called. */
|
||||
return -1;
|
||||
}
|
||||
|
||||
#if ANDROID_API_LEVEL >= 26
|
||||
#include <vndk/hardware_buffer.h>
|
||||
/* See i915_private_android_types.h in minigbm. */
|
||||
#define HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL 0x100
|
||||
|
||||
enum {
|
||||
/* Usage bit equal to GRALLOC_USAGE_HW_CAMERA_MASK */
|
||||
BUFFER_USAGE_CAMERA_MASK = 0x00060000U,
|
||||
};
|
||||
|
||||
inline VkFormat
|
||||
vk_format_from_android(unsigned android_format, unsigned android_usage)
|
||||
{
|
||||
switch (android_format) {
|
||||
case AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM:
|
||||
return VK_FORMAT_R8G8B8A8_UNORM;
|
||||
case AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM:
|
||||
case AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM:
|
||||
return VK_FORMAT_R8G8B8_UNORM;
|
||||
case AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM:
|
||||
return VK_FORMAT_R5G6B5_UNORM_PACK16;
|
||||
case AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT:
|
||||
return VK_FORMAT_R16G16B16A16_SFLOAT;
|
||||
case AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM:
|
||||
return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
|
||||
case AHARDWAREBUFFER_FORMAT_Y8Cb8Cr8_420:
|
||||
case HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL:
|
||||
return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
|
||||
case AHARDWAREBUFFER_FORMAT_IMPLEMENTATION_DEFINED:
|
||||
if (android_usage & BUFFER_USAGE_CAMERA_MASK)
|
||||
return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
|
||||
else
|
||||
return VK_FORMAT_R8G8B8_UNORM;
|
||||
case AHARDWAREBUFFER_FORMAT_BLOB:
|
||||
default:
|
||||
return VK_FORMAT_UNDEFINED;
|
||||
}
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
android_format_from_vk(unsigned vk_format)
|
||||
{
|
||||
switch (vk_format) {
|
||||
case VK_FORMAT_R8G8B8A8_UNORM:
|
||||
return AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM;
|
||||
case VK_FORMAT_R8G8B8_UNORM:
|
||||
return AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM;
|
||||
case VK_FORMAT_R5G6B5_UNORM_PACK16:
|
||||
return AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM;
|
||||
case VK_FORMAT_R16G16B16A16_SFLOAT:
|
||||
return AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT;
|
||||
case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
|
||||
return AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM;
|
||||
case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
|
||||
#ifdef HAVE_CROS_GRALLOC
|
||||
return AHARDWAREBUFFER_FORMAT_Y8Cb8Cr8_420;
|
||||
#else
|
||||
return HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL;
|
||||
#endif
|
||||
default:
|
||||
return AHARDWAREBUFFER_FORMAT_BLOB;
|
||||
}
|
||||
}
|
||||
|
||||
static VkFormatFeatureFlags
|
||||
features2_to_features(VkFormatFeatureFlags2 features2)
|
||||
{
|
||||
return features2 & VK_ALL_FORMAT_FEATURE_FLAG_BITS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
get_ahw_buffer_format_properties2(
|
||||
VkDevice device_h,
|
||||
const struct AHardwareBuffer *buffer,
|
||||
VkAndroidHardwareBufferFormatProperties2ANDROID *pProperties)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, device_h);
|
||||
|
||||
/* Get a description of buffer contents . */
|
||||
AHardwareBuffer_Desc desc;
|
||||
AHardwareBuffer_describe(buffer, &desc);
|
||||
|
||||
/* Verify description. */
|
||||
uint64_t gpu_usage =
|
||||
AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE |
|
||||
AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT |
|
||||
AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
|
||||
|
||||
/* "Buffer must be a valid Android hardware buffer object with at least
|
||||
* one of the AHARDWAREBUFFER_USAGE_GPU_* usage flags."
|
||||
*/
|
||||
if (!(desc.usage & (gpu_usage)))
|
||||
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
|
||||
|
||||
/* Fill properties fields based on description. */
|
||||
VkAndroidHardwareBufferFormatProperties2ANDROID *p = pProperties;
|
||||
|
||||
p->format = vk_format_from_android(desc.format, desc.usage);
|
||||
|
||||
const struct anv_format *anv_format = anv_get_format(p->format);
|
||||
p->externalFormat = (uint64_t) (uintptr_t) anv_format;
|
||||
|
||||
/* Default to OPTIMAL tiling but set to linear in case
|
||||
* of AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER usage.
|
||||
*/
|
||||
VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
|
||||
if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER)
|
||||
tiling = VK_IMAGE_TILING_LINEAR;
|
||||
|
||||
p->formatFeatures =
|
||||
anv_get_image_format_features2(device->info, p->format, anv_format,
|
||||
tiling, NULL);
|
||||
|
||||
/* "Images can be created with an external format even if the Android hardware
|
||||
* buffer has a format which has an equivalent Vulkan format to enable
|
||||
* consistent handling of images from sources that might use either category
|
||||
* of format. However, all images created with an external format are subject
|
||||
* to the valid usage requirements associated with external formats, even if
|
||||
* the Android hardware buffer’s format has a Vulkan equivalent."
|
||||
*
|
||||
* "The formatFeatures member *must* include
|
||||
* VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT and at least one of
|
||||
* VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT or
|
||||
* VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT"
|
||||
*/
|
||||
p->formatFeatures |=
|
||||
VK_FORMAT_FEATURE_2_MIDPOINT_CHROMA_SAMPLES_BIT;
|
||||
|
||||
/* "Implementations may not always be able to determine the color model,
|
||||
* numerical range, or chroma offsets of the image contents, so the values
|
||||
* in VkAndroidHardwareBufferFormatPropertiesANDROID are only suggestions.
|
||||
* Applications should treat these values as sensible defaults to use in
|
||||
* the absence of more reliable information obtained through some other
|
||||
* means."
|
||||
*/
|
||||
p->samplerYcbcrConversionComponents.r = VK_COMPONENT_SWIZZLE_IDENTITY;
|
||||
p->samplerYcbcrConversionComponents.g = VK_COMPONENT_SWIZZLE_IDENTITY;
|
||||
p->samplerYcbcrConversionComponents.b = VK_COMPONENT_SWIZZLE_IDENTITY;
|
||||
p->samplerYcbcrConversionComponents.a = VK_COMPONENT_SWIZZLE_IDENTITY;
|
||||
|
||||
p->suggestedYcbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601;
|
||||
p->suggestedYcbcrRange = VK_SAMPLER_YCBCR_RANGE_ITU_FULL;
|
||||
|
||||
p->suggestedXChromaOffset = VK_CHROMA_LOCATION_MIDPOINT;
|
||||
p->suggestedYChromaOffset = VK_CHROMA_LOCATION_MIDPOINT;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_GetAndroidHardwareBufferPropertiesANDROID(
|
||||
VkDevice device_h,
|
||||
const struct AHardwareBuffer *buffer,
|
||||
VkAndroidHardwareBufferPropertiesANDROID *pProperties)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, dev, device_h);
|
||||
|
||||
VkAndroidHardwareBufferFormatPropertiesANDROID *format_prop =
|
||||
vk_find_struct(pProperties->pNext,
|
||||
ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_ANDROID);
|
||||
/* Fill format properties of an Android hardware buffer. */
|
||||
if (format_prop) {
|
||||
VkAndroidHardwareBufferFormatProperties2ANDROID format_prop2 = {
|
||||
.sType = VK_STRUCTURE_TYPE_ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_2_ANDROID,
|
||||
};
|
||||
get_ahw_buffer_format_properties2(device_h, buffer, &format_prop2);
|
||||
|
||||
format_prop->format = format_prop2.format;
|
||||
format_prop->externalFormat = format_prop2.externalFormat;
|
||||
format_prop->formatFeatures =
|
||||
features2_to_features(format_prop2.formatFeatures);
|
||||
format_prop->samplerYcbcrConversionComponents =
|
||||
format_prop2.samplerYcbcrConversionComponents;
|
||||
format_prop->suggestedYcbcrModel = format_prop2.suggestedYcbcrModel;
|
||||
format_prop->suggestedYcbcrRange = format_prop2.suggestedYcbcrRange;
|
||||
format_prop->suggestedXChromaOffset = format_prop2.suggestedXChromaOffset;
|
||||
format_prop->suggestedYChromaOffset = format_prop2.suggestedYChromaOffset;
|
||||
}
|
||||
|
||||
VkAndroidHardwareBufferFormatProperties2ANDROID *format_prop2 =
|
||||
vk_find_struct(pProperties->pNext,
|
||||
ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_2_ANDROID);
|
||||
if (format_prop2)
|
||||
get_ahw_buffer_format_properties2(device_h, buffer, format_prop2);
|
||||
|
||||
/* NOTE - We support buffers with only one handle but do not error on
|
||||
* multiple handle case. Reason is that we want to support YUV formats
|
||||
* where we have many logical planes but they all point to the same
|
||||
* buffer, like is the case with VK_FORMAT_G8_B8R8_2PLANE_420_UNORM.
|
||||
*/
|
||||
const native_handle_t *handle =
|
||||
AHardwareBuffer_getNativeHandle(buffer);
|
||||
int dma_buf = (handle && handle->numFds) ? handle->data[0] : -1;
|
||||
if (dma_buf < 0)
|
||||
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
|
||||
|
||||
/* All memory types. */
|
||||
uint32_t memory_types = (1ull << dev->physical->memory.type_count) - 1;
|
||||
|
||||
pProperties->allocationSize = lseek(dma_buf, 0, SEEK_END);
|
||||
pProperties->memoryTypeBits = memory_types;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_GetMemoryAndroidHardwareBufferANDROID(
|
||||
VkDevice device_h,
|
||||
const VkMemoryGetAndroidHardwareBufferInfoANDROID *pInfo,
|
||||
struct AHardwareBuffer **pBuffer)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device_memory, mem, pInfo->memory);
|
||||
|
||||
/* Some quotes from Vulkan spec:
|
||||
*
|
||||
* "If the device memory was created by importing an Android hardware
|
||||
* buffer, vkGetMemoryAndroidHardwareBufferANDROID must return that same
|
||||
* Android hardware buffer object."
|
||||
*
|
||||
* "VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID must
|
||||
* have been included in VkExportMemoryAllocateInfo::handleTypes when
|
||||
* memory was created."
|
||||
*/
|
||||
if (mem->ahw) {
|
||||
*pBuffer = mem->ahw;
|
||||
/* Increase refcount. */
|
||||
AHardwareBuffer_acquire(mem->ahw);
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* Construct ahw usage mask from image usage bits, see
|
||||
* 'AHardwareBuffer Usage Equivalence' in Vulkan spec.
|
||||
*/
|
||||
uint64_t
|
||||
anv_ahw_usage_from_vk_usage(const VkImageCreateFlags vk_create,
|
||||
const VkImageUsageFlags vk_usage)
|
||||
{
|
||||
uint64_t ahw_usage = 0;
|
||||
#if ANDROID_API_LEVEL >= 26
|
||||
if (vk_usage & VK_IMAGE_USAGE_SAMPLED_BIT)
|
||||
ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE;
|
||||
|
||||
if (vk_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)
|
||||
ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE;
|
||||
|
||||
if (vk_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)
|
||||
ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT;
|
||||
|
||||
if (vk_create & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)
|
||||
ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP;
|
||||
|
||||
if (vk_create & VK_IMAGE_CREATE_PROTECTED_BIT)
|
||||
ahw_usage |= AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT;
|
||||
|
||||
/* No usage bits set - set at least one GPU usage. */
|
||||
if (ahw_usage == 0)
|
||||
ahw_usage = AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE;
|
||||
#endif
|
||||
return ahw_usage;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from anv_AllocateMemory when import AHardwareBuffer.
|
||||
*/
|
||||
VkResult
|
||||
anv_import_ahw_memory(VkDevice device_h,
|
||||
struct anv_device_memory *mem,
|
||||
const VkImportAndroidHardwareBufferInfoANDROID *info)
|
||||
{
|
||||
#if ANDROID_API_LEVEL >= 26
|
||||
ANV_FROM_HANDLE(anv_device, device, device_h);
|
||||
|
||||
/* Import from AHardwareBuffer to anv_device_memory. */
|
||||
const native_handle_t *handle =
|
||||
AHardwareBuffer_getNativeHandle(info->buffer);
|
||||
|
||||
/* NOTE - We support buffers with only one handle but do not error on
|
||||
* multiple handle case. Reason is that we want to support YUV formats
|
||||
* where we have many logical planes but they all point to the same
|
||||
* buffer, like is the case with VK_FORMAT_G8_B8R8_2PLANE_420_UNORM.
|
||||
*/
|
||||
int dma_buf = (handle && handle->numFds) ? handle->data[0] : -1;
|
||||
if (dma_buf < 0)
|
||||
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
|
||||
|
||||
VkResult result = anv_device_import_bo(device, dma_buf, 0,
|
||||
0 /* client_address */,
|
||||
&mem->bo);
|
||||
assert(result == VK_SUCCESS);
|
||||
|
||||
/* "If the vkAllocateMemory command succeeds, the implementation must
|
||||
* acquire a reference to the imported hardware buffer, which it must
|
||||
* release when the device memory object is freed. If the command fails,
|
||||
* the implementation must not retain a reference."
|
||||
*/
|
||||
AHardwareBuffer_acquire(info->buffer);
|
||||
mem->ahw = info->buffer;
|
||||
|
||||
return VK_SUCCESS;
|
||||
#else
|
||||
return VK_ERROR_EXTENSION_NOT_PRESENT;
|
||||
#endif
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_create_ahw_memory(VkDevice device_h,
|
||||
struct anv_device_memory *mem,
|
||||
const VkMemoryAllocateInfo *pAllocateInfo)
|
||||
{
|
||||
#if ANDROID_API_LEVEL >= 26
|
||||
const VkMemoryDedicatedAllocateInfo *dedicated_info =
|
||||
vk_find_struct_const(pAllocateInfo->pNext,
|
||||
MEMORY_DEDICATED_ALLOCATE_INFO);
|
||||
|
||||
uint32_t w = 0;
|
||||
uint32_t h = 1;
|
||||
uint32_t layers = 1;
|
||||
uint32_t format = 0;
|
||||
uint64_t usage = 0;
|
||||
|
||||
/* If caller passed dedicated information. */
|
||||
if (dedicated_info && dedicated_info->image) {
|
||||
ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
|
||||
w = image->vk.extent.width;
|
||||
h = image->vk.extent.height;
|
||||
layers = image->vk.array_layers;
|
||||
format = android_format_from_vk(image->vk.format);
|
||||
usage = anv_ahw_usage_from_vk_usage(image->vk.create_flags, image->vk.usage);
|
||||
} else if (dedicated_info && dedicated_info->buffer) {
|
||||
ANV_FROM_HANDLE(anv_buffer, buffer, dedicated_info->buffer);
|
||||
w = buffer->vk.size;
|
||||
format = AHARDWAREBUFFER_FORMAT_BLOB;
|
||||
usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN |
|
||||
AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN;
|
||||
} else {
|
||||
w = pAllocateInfo->allocationSize;
|
||||
format = AHARDWAREBUFFER_FORMAT_BLOB;
|
||||
usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN |
|
||||
AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN;
|
||||
}
|
||||
|
||||
struct AHardwareBuffer *ahw = NULL;
|
||||
struct AHardwareBuffer_Desc desc = {
|
||||
.width = w,
|
||||
.height = h,
|
||||
.layers = layers,
|
||||
.format = format,
|
||||
.usage = usage,
|
||||
};
|
||||
|
||||
if (AHardwareBuffer_allocate(&desc, &ahw) != 0)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
const VkImportAndroidHardwareBufferInfoANDROID import_info = {
|
||||
.buffer = ahw,
|
||||
};
|
||||
VkResult result = anv_import_ahw_memory(device_h, mem, &import_info);
|
||||
|
||||
/* Release a reference to avoid leak for AHB allocation. */
|
||||
AHardwareBuffer_release(ahw);
|
||||
|
||||
return result;
|
||||
#else
|
||||
return VK_ERROR_EXTENSION_NOT_PRESENT;
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_image_init_from_gralloc(struct anv_device *device,
|
||||
struct anv_image *image,
|
||||
const VkImageCreateInfo *base_info,
|
||||
const VkNativeBufferANDROID *gralloc_info)
|
||||
{
|
||||
struct anv_bo *bo = NULL;
|
||||
VkResult result;
|
||||
|
||||
struct anv_image_create_info anv_info = {
|
||||
.vk_info = base_info,
|
||||
.isl_extra_usage_flags = ISL_SURF_USAGE_DISABLE_AUX_BIT,
|
||||
};
|
||||
|
||||
if (gralloc_info->handle->numFds != 1) {
|
||||
return vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
|
||||
"VkNativeBufferANDROID::handle::numFds is %d, "
|
||||
"expected 1", gralloc_info->handle->numFds);
|
||||
}
|
||||
|
||||
/* Do not close the gralloc handle's dma_buf. The lifetime of the dma_buf
|
||||
* must exceed that of the gralloc handle, and we do not own the gralloc
|
||||
* handle.
|
||||
*/
|
||||
int dma_buf = gralloc_info->handle->data[0];
|
||||
|
||||
/* We need to set the WRITE flag on window system buffers so that GEM will
|
||||
* know we're writing to them and synchronize uses on other rings (for
|
||||
* example, if the display server uses the blitter ring).
|
||||
*
|
||||
* If this function fails and if the imported bo was resident in the cache,
|
||||
* we should avoid updating the bo's flags. Therefore, we defer updating
|
||||
* the flags until success is certain.
|
||||
*
|
||||
*/
|
||||
result = anv_device_import_bo(device, dma_buf,
|
||||
ANV_BO_ALLOC_IMPLICIT_SYNC |
|
||||
ANV_BO_ALLOC_IMPLICIT_WRITE,
|
||||
0 /* client_address */,
|
||||
&bo);
|
||||
if (result != VK_SUCCESS) {
|
||||
return vk_errorf(device, result,
|
||||
"failed to import dma-buf from VkNativeBufferANDROID");
|
||||
}
|
||||
|
||||
enum isl_tiling tiling;
|
||||
result = anv_device_get_bo_tiling(device, bo, &tiling);
|
||||
if (result != VK_SUCCESS) {
|
||||
return vk_errorf(device, result,
|
||||
"failed to get tiling from VkNativeBufferANDROID");
|
||||
}
|
||||
anv_info.isl_tiling_flags = 1u << tiling;
|
||||
|
||||
enum isl_format format = anv_get_isl_format(device->info,
|
||||
base_info->format,
|
||||
VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
base_info->tiling);
|
||||
assert(format != ISL_FORMAT_UNSUPPORTED);
|
||||
|
||||
result = anv_image_init(device, image, &anv_info);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_init;
|
||||
|
||||
VkMemoryRequirements2 mem_reqs = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
|
||||
};
|
||||
|
||||
anv_image_get_memory_requirements(device, image, image->vk.aspects,
|
||||
&mem_reqs);
|
||||
|
||||
VkDeviceSize aligned_image_size =
|
||||
align_u64(mem_reqs.memoryRequirements.size,
|
||||
mem_reqs.memoryRequirements.alignment);
|
||||
|
||||
if (bo->size < aligned_image_size) {
|
||||
result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
|
||||
"dma-buf from VkNativeBufferANDROID is too small for "
|
||||
"VkImage: %"PRIu64"B < %"PRIu64"B",
|
||||
bo->size, aligned_image_size);
|
||||
goto fail_size;
|
||||
}
|
||||
|
||||
assert(!image->disjoint);
|
||||
assert(image->n_planes == 1);
|
||||
assert(image->planes[0].primary_surface.memory_range.binding ==
|
||||
ANV_IMAGE_MEMORY_BINDING_MAIN);
|
||||
assert(image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN].address.bo == NULL);
|
||||
assert(image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN].address.offset == 0);
|
||||
image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN].address.bo = bo;
|
||||
image->from_gralloc = true;
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail_size:
|
||||
anv_image_finish(image);
|
||||
fail_init:
|
||||
anv_device_release_bo(device, bo);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_image_bind_from_gralloc(struct anv_device *device,
|
||||
struct anv_image *image,
|
||||
const VkNativeBufferANDROID *gralloc_info)
|
||||
{
|
||||
/* Do not close the gralloc handle's dma_buf. The lifetime of the dma_buf
|
||||
* must exceed that of the gralloc handle, and we do not own the gralloc
|
||||
* handle.
|
||||
*/
|
||||
int dma_buf = gralloc_info->handle->data[0];
|
||||
|
||||
/* We need to set the WRITE flag on window system buffers so that GEM will
|
||||
* know we're writing to them and synchronize uses on other rings (for
|
||||
* example, if the display server uses the blitter ring).
|
||||
*
|
||||
* If this function fails and if the imported bo was resident in the cache,
|
||||
* we should avoid updating the bo's flags. Therefore, we defer updating
|
||||
* the flags until success is certain.
|
||||
*
|
||||
*/
|
||||
struct anv_bo *bo = NULL;
|
||||
VkResult result = anv_device_import_bo(device, dma_buf,
|
||||
ANV_BO_ALLOC_IMPLICIT_SYNC |
|
||||
ANV_BO_ALLOC_IMPLICIT_WRITE,
|
||||
0 /* client_address */,
|
||||
&bo);
|
||||
if (result != VK_SUCCESS) {
|
||||
return vk_errorf(device, result,
|
||||
"failed to import dma-buf from VkNativeBufferANDROID");
|
||||
}
|
||||
|
||||
uint64_t img_size = image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN].memory_range.size;
|
||||
if (img_size < bo->size) {
|
||||
result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
|
||||
"dma-buf from VkNativeBufferANDROID is too small for "
|
||||
"VkImage: %"PRIu64"B < %"PRIu64"B",
|
||||
bo->size, img_size);
|
||||
anv_device_release_bo(device, bo);
|
||||
return result;
|
||||
}
|
||||
|
||||
assert(!image->disjoint);
|
||||
assert(image->n_planes == 1);
|
||||
assert(image->planes[0].primary_surface.memory_range.binding ==
|
||||
ANV_IMAGE_MEMORY_BINDING_MAIN);
|
||||
assert(image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN].address.bo == NULL);
|
||||
assert(image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN].address.offset == 0);
|
||||
image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN].address.bo = bo;
|
||||
image->from_gralloc = true;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
format_supported_with_usage(VkDevice device_h, VkFormat format,
|
||||
VkImageUsageFlags imageUsage)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, device_h);
|
||||
VkPhysicalDevice phys_dev_h = anv_physical_device_to_handle(device->physical);
|
||||
VkResult result;
|
||||
|
||||
const VkPhysicalDeviceImageFormatInfo2 image_format_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
|
||||
.format = format,
|
||||
.type = VK_IMAGE_TYPE_2D,
|
||||
.tiling = VK_IMAGE_TILING_OPTIMAL,
|
||||
.usage = imageUsage,
|
||||
};
|
||||
|
||||
VkImageFormatProperties2 image_format_props = {
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
|
||||
};
|
||||
|
||||
/* Check that requested format and usage are supported. */
|
||||
result = anv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h,
|
||||
&image_format_info, &image_format_props);
|
||||
if (result != VK_SUCCESS) {
|
||||
return vk_errorf(device, result,
|
||||
"anv_GetPhysicalDeviceImageFormatProperties2 failed "
|
||||
"inside %s", __func__);
|
||||
}
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static VkResult
|
||||
setup_gralloc0_usage(struct anv_device *device, VkFormat format,
|
||||
VkImageUsageFlags imageUsage, int *grallocUsage)
|
||||
{
|
||||
/* WARNING: Android's libvulkan.so hardcodes the VkImageUsageFlags
|
||||
* returned to applications via VkSurfaceCapabilitiesKHR::supportedUsageFlags.
|
||||
* The relevant code in libvulkan/swapchain.cpp contains this fun comment:
|
||||
*
|
||||
* TODO(jessehall): I think these are right, but haven't thought hard
|
||||
* about it. Do we need to query the driver for support of any of
|
||||
* these?
|
||||
*
|
||||
* Any disagreement between this function and the hardcoded
|
||||
* VkSurfaceCapabilitiesKHR:supportedUsageFlags causes tests
|
||||
* dEQP-VK.wsi.android.swapchain.*.image_usage to fail.
|
||||
*/
|
||||
|
||||
if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT |
|
||||
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
|
||||
*grallocUsage |= GRALLOC_USAGE_HW_RENDER;
|
||||
|
||||
if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
|
||||
VK_IMAGE_USAGE_SAMPLED_BIT |
|
||||
VK_IMAGE_USAGE_STORAGE_BIT |
|
||||
VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))
|
||||
*grallocUsage |= GRALLOC_USAGE_HW_TEXTURE;
|
||||
|
||||
/* All VkImageUsageFlags not explicitly checked here are unsupported for
|
||||
* gralloc swapchains.
|
||||
*/
|
||||
if (imageUsage != 0) {
|
||||
return vk_errorf(device, VK_ERROR_FORMAT_NOT_SUPPORTED,
|
||||
"unsupported VkImageUsageFlags(0x%x) for gralloc "
|
||||
"swapchain", imageUsage);
|
||||
}
|
||||
|
||||
/* The below formats support GRALLOC_USAGE_HW_FB (that is, display
|
||||
* scanout). This short list of formats is univserally supported on Intel
|
||||
* but is incomplete. The full set of supported formats is dependent on
|
||||
* kernel and hardware.
|
||||
*
|
||||
* FINISHME: Advertise all display-supported formats.
|
||||
*/
|
||||
switch (format) {
|
||||
case VK_FORMAT_B8G8R8A8_UNORM:
|
||||
case VK_FORMAT_R5G6B5_UNORM_PACK16:
|
||||
case VK_FORMAT_R8G8B8A8_UNORM:
|
||||
case VK_FORMAT_R8G8B8A8_SRGB:
|
||||
*grallocUsage |= GRALLOC_USAGE_HW_FB |
|
||||
GRALLOC_USAGE_HW_COMPOSER |
|
||||
GRALLOC_USAGE_EXTERNAL_DISP;
|
||||
break;
|
||||
default:
|
||||
mesa_logw("%s: unsupported format=%d", __func__, format);
|
||||
}
|
||||
|
||||
if (*grallocUsage == 0)
|
||||
return VK_ERROR_FORMAT_NOT_SUPPORTED;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
#if ANDROID_API_LEVEL >= 26
|
||||
VkResult anv_GetSwapchainGrallocUsage2ANDROID(
|
||||
VkDevice device_h,
|
||||
VkFormat format,
|
||||
VkImageUsageFlags imageUsage,
|
||||
VkSwapchainImageUsageFlagsANDROID swapchainImageUsage,
|
||||
uint64_t* grallocConsumerUsage,
|
||||
uint64_t* grallocProducerUsage)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, device_h);
|
||||
VkResult result;
|
||||
|
||||
*grallocConsumerUsage = 0;
|
||||
*grallocProducerUsage = 0;
|
||||
mesa_logd("%s: format=%d, usage=0x%x", __func__, format, imageUsage);
|
||||
|
||||
result = format_supported_with_usage(device_h, format, imageUsage);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
int32_t grallocUsage = 0;
|
||||
result = setup_gralloc0_usage(device, format, imageUsage, &grallocUsage);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
/* Setup gralloc1 usage flags from gralloc0 flags. */
|
||||
|
||||
if (grallocUsage & GRALLOC_USAGE_HW_RENDER) {
|
||||
*grallocProducerUsage |= GRALLOC1_PRODUCER_USAGE_GPU_RENDER_TARGET;
|
||||
*grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_CLIENT_TARGET;
|
||||
}
|
||||
|
||||
if (grallocUsage & GRALLOC_USAGE_HW_TEXTURE) {
|
||||
*grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_GPU_TEXTURE;
|
||||
}
|
||||
|
||||
if (grallocUsage & (GRALLOC_USAGE_HW_FB |
|
||||
GRALLOC_USAGE_HW_COMPOSER |
|
||||
GRALLOC_USAGE_EXTERNAL_DISP)) {
|
||||
*grallocProducerUsage |= GRALLOC1_PRODUCER_USAGE_GPU_RENDER_TARGET;
|
||||
*grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_HWCOMPOSER;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
VkResult anv_GetSwapchainGrallocUsageANDROID(
|
||||
VkDevice device_h,
|
||||
VkFormat format,
|
||||
VkImageUsageFlags imageUsage,
|
||||
int* grallocUsage)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, device_h);
|
||||
VkResult result;
|
||||
|
||||
*grallocUsage = 0;
|
||||
mesa_logd("%s: format=%d, usage=0x%x", __func__, format, imageUsage);
|
||||
|
||||
result = format_supported_with_usage(device_h, format, imageUsage);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
return setup_gralloc0_usage(device, format, imageUsage, grallocUsage);
|
||||
}
|
57
src/intel/vulkan_hasvk/anv_android.h
Normal file
57
src/intel/vulkan_hasvk/anv_android.h
Normal file
@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright © 2018 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ANV_ANDROID_H
|
||||
#define ANV_ANDROID_H
|
||||
|
||||
#if defined(ANDROID) && ANDROID_API_LEVEL >= 26
|
||||
#include <vndk/hardware_buffer.h>
|
||||
#endif
|
||||
#include <vulkan/vulkan.h>
|
||||
#include <vulkan/vulkan_android.h>
|
||||
#include <vulkan/vk_android_native_buffer.h>
|
||||
|
||||
struct anv_device_memory;
|
||||
struct anv_device;
|
||||
struct anv_image;
|
||||
|
||||
VkResult anv_image_init_from_gralloc(struct anv_device *device,
|
||||
struct anv_image *image,
|
||||
const VkImageCreateInfo *base_info,
|
||||
const VkNativeBufferANDROID *gralloc_info);
|
||||
|
||||
VkResult anv_image_bind_from_gralloc(struct anv_device *device,
|
||||
struct anv_image *image,
|
||||
const VkNativeBufferANDROID *gralloc_info);
|
||||
|
||||
uint64_t anv_ahw_usage_from_vk_usage(const VkImageCreateFlags vk_create,
|
||||
const VkImageUsageFlags vk_usage);
|
||||
|
||||
VkResult anv_import_ahw_memory(VkDevice device_h,
|
||||
struct anv_device_memory *mem,
|
||||
const VkImportAndroidHardwareBufferInfoANDROID *info);
|
||||
|
||||
VkResult anv_create_ahw_memory(VkDevice device_h,
|
||||
struct anv_device_memory *mem,
|
||||
const VkMemoryAllocateInfo *pAllocateInfo);
|
||||
#endif /* ANV_ANDROID_H */
|
63
src/intel/vulkan_hasvk/anv_android_stubs.c
Normal file
63
src/intel/vulkan_hasvk/anv_android_stubs.c
Normal file
@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright © 2018 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_android.h"
|
||||
|
||||
VkResult
|
||||
anv_image_init_from_gralloc(struct anv_device *device,
|
||||
struct anv_image *image,
|
||||
const VkImageCreateInfo *base_info,
|
||||
const VkNativeBufferANDROID *gralloc_info)
|
||||
{
|
||||
return VK_ERROR_EXTENSION_NOT_PRESENT;
|
||||
}
|
||||
|
||||
VkResult anv_image_bind_from_gralloc(struct anv_device *device,
|
||||
struct anv_image *image,
|
||||
const VkNativeBufferANDROID *gralloc_info)
|
||||
{
|
||||
return VK_ERROR_EXTENSION_NOT_PRESENT;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
anv_ahw_usage_from_vk_usage(const VkImageCreateFlags vk_create,
|
||||
const VkImageUsageFlags vk_usage)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_import_ahw_memory(VkDevice device_h,
|
||||
struct anv_device_memory *mem,
|
||||
const VkImportAndroidHardwareBufferInfoANDROID *info)
|
||||
{
|
||||
return VK_ERROR_EXTENSION_NOT_PRESENT;
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_create_ahw_memory(VkDevice device_h,
|
||||
struct anv_device_memory *mem,
|
||||
const VkMemoryAllocateInfo *pAllocateInfo)
|
||||
{
|
||||
return VK_ERROR_EXTENSION_NOT_PRESENT;
|
||||
}
|
2477
src/intel/vulkan_hasvk/anv_batch_chain.c
Normal file
2477
src/intel/vulkan_hasvk/anv_batch_chain.c
Normal file
File diff suppressed because it is too large
Load Diff
1995
src/intel/vulkan_hasvk/anv_blorp.c
Normal file
1995
src/intel/vulkan_hasvk/anv_blorp.c
Normal file
File diff suppressed because it is too large
Load Diff
237
src/intel/vulkan_hasvk/anv_bo_sync.c
Normal file
237
src/intel/vulkan_hasvk/anv_bo_sync.c
Normal file
@ -0,0 +1,237 @@
|
||||
/*
|
||||
* Copyright © 2021 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
#include "util/os_time.h"
|
||||
|
||||
static struct anv_bo_sync *
|
||||
to_anv_bo_sync(struct vk_sync *sync)
|
||||
{
|
||||
assert(sync->type == &anv_bo_sync_type);
|
||||
return container_of(sync, struct anv_bo_sync, sync);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
anv_bo_sync_init(struct vk_device *vk_device,
|
||||
struct vk_sync *vk_sync,
|
||||
uint64_t initial_value)
|
||||
{
|
||||
struct anv_device *device = container_of(vk_device, struct anv_device, vk);
|
||||
struct anv_bo_sync *sync = to_anv_bo_sync(vk_sync);
|
||||
|
||||
sync->state = initial_value ? ANV_BO_SYNC_STATE_SIGNALED :
|
||||
ANV_BO_SYNC_STATE_RESET;
|
||||
|
||||
return anv_device_alloc_bo(device, "bo-sync", 4096,
|
||||
ANV_BO_ALLOC_EXTERNAL |
|
||||
ANV_BO_ALLOC_IMPLICIT_SYNC,
|
||||
0 /* explicit_address */,
|
||||
&sync->bo);
|
||||
}
|
||||
|
||||
static void
|
||||
anv_bo_sync_finish(struct vk_device *vk_device,
|
||||
struct vk_sync *vk_sync)
|
||||
{
|
||||
struct anv_device *device = container_of(vk_device, struct anv_device, vk);
|
||||
struct anv_bo_sync *sync = to_anv_bo_sync(vk_sync);
|
||||
|
||||
anv_device_release_bo(device, sync->bo);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
anv_bo_sync_reset(struct vk_device *vk_device,
|
||||
struct vk_sync *vk_sync)
|
||||
{
|
||||
struct anv_bo_sync *sync = to_anv_bo_sync(vk_sync);
|
||||
|
||||
sync->state = ANV_BO_SYNC_STATE_RESET;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static int64_t
|
||||
anv_get_relative_timeout(uint64_t abs_timeout)
|
||||
{
|
||||
uint64_t now = os_time_get_nano();
|
||||
|
||||
/* We don't want negative timeouts.
|
||||
*
|
||||
* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is
|
||||
* supposed to block indefinitely timeouts < 0. Unfortunately,
|
||||
* this was broken for a couple of kernel releases. Since there's
|
||||
* no way to know whether or not the kernel we're using is one of
|
||||
* the broken ones, the best we can do is to clamp the timeout to
|
||||
* INT64_MAX. This limits the maximum timeout from 584 years to
|
||||
* 292 years - likely not a big deal.
|
||||
*/
|
||||
if (abs_timeout < now)
|
||||
return 0;
|
||||
|
||||
uint64_t rel_timeout = abs_timeout - now;
|
||||
if (rel_timeout > (uint64_t) INT64_MAX)
|
||||
rel_timeout = INT64_MAX;
|
||||
|
||||
return rel_timeout;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
anv_bo_sync_wait(struct vk_device *vk_device,
|
||||
uint32_t wait_count,
|
||||
const struct vk_sync_wait *waits,
|
||||
enum vk_sync_wait_flags wait_flags,
|
||||
uint64_t abs_timeout_ns)
|
||||
{
|
||||
struct anv_device *device = container_of(vk_device, struct anv_device, vk);
|
||||
VkResult result;
|
||||
|
||||
uint32_t pending = wait_count;
|
||||
while (pending) {
|
||||
pending = 0;
|
||||
bool signaled = false;
|
||||
for (uint32_t i = 0; i < wait_count; i++) {
|
||||
struct anv_bo_sync *sync = to_anv_bo_sync(waits[i].sync);
|
||||
switch (sync->state) {
|
||||
case ANV_BO_SYNC_STATE_RESET:
|
||||
/* This fence hasn't been submitted yet, we'll catch it the next
|
||||
* time around. Yes, this may mean we dead-loop but, short of
|
||||
* lots of locking and a condition variable, there's not much that
|
||||
* we can do about that.
|
||||
*/
|
||||
assert(!(wait_flags & VK_SYNC_WAIT_PENDING));
|
||||
pending++;
|
||||
continue;
|
||||
|
||||
case ANV_BO_SYNC_STATE_SIGNALED:
|
||||
/* This fence is not pending. If waitAll isn't set, we can return
|
||||
* early. Otherwise, we have to keep going.
|
||||
*/
|
||||
if (wait_flags & VK_SYNC_WAIT_ANY)
|
||||
return VK_SUCCESS;
|
||||
continue;
|
||||
|
||||
case ANV_BO_SYNC_STATE_SUBMITTED:
|
||||
/* These are the fences we really care about. Go ahead and wait
|
||||
* on it until we hit a timeout.
|
||||
*/
|
||||
if (!(wait_flags & VK_SYNC_WAIT_PENDING)) {
|
||||
uint64_t rel_timeout = anv_get_relative_timeout(abs_timeout_ns);
|
||||
result = anv_device_wait(device, sync->bo, rel_timeout);
|
||||
/* This also covers VK_TIMEOUT */
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
sync->state = ANV_BO_SYNC_STATE_SIGNALED;
|
||||
signaled = true;
|
||||
}
|
||||
if (wait_flags & VK_SYNC_WAIT_ANY)
|
||||
return VK_SUCCESS;
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Invalid BO sync state");
|
||||
}
|
||||
}
|
||||
|
||||
if (pending && !signaled) {
|
||||
/* If we've hit this then someone decided to vkWaitForFences before
|
||||
* they've actually submitted any of them to a queue. This is a
|
||||
* fairly pessimal case, so it's ok to lock here and use a standard
|
||||
* pthreads condition variable.
|
||||
*/
|
||||
pthread_mutex_lock(&device->mutex);
|
||||
|
||||
/* It's possible that some of the fences have changed state since the
|
||||
* last time we checked. Now that we have the lock, check for
|
||||
* pending fences again and don't wait if it's changed.
|
||||
*/
|
||||
uint32_t now_pending = 0;
|
||||
for (uint32_t i = 0; i < wait_count; i++) {
|
||||
struct anv_bo_sync *sync = to_anv_bo_sync(waits[i].sync);
|
||||
if (sync->state == ANV_BO_SYNC_STATE_RESET)
|
||||
now_pending++;
|
||||
}
|
||||
assert(now_pending <= pending);
|
||||
|
||||
if (now_pending == pending) {
|
||||
struct timespec abstime = {
|
||||
.tv_sec = abs_timeout_ns / NSEC_PER_SEC,
|
||||
.tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
|
||||
};
|
||||
|
||||
ASSERTED int ret;
|
||||
ret = pthread_cond_timedwait(&device->queue_submit,
|
||||
&device->mutex, &abstime);
|
||||
assert(ret != EINVAL);
|
||||
if (os_time_get_nano() >= abs_timeout_ns) {
|
||||
pthread_mutex_unlock(&device->mutex);
|
||||
return VK_TIMEOUT;
|
||||
}
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&device->mutex);
|
||||
}
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
const struct vk_sync_type anv_bo_sync_type = {
|
||||
.size = sizeof(struct anv_bo_sync),
|
||||
.features = VK_SYNC_FEATURE_BINARY |
|
||||
VK_SYNC_FEATURE_GPU_WAIT |
|
||||
VK_SYNC_FEATURE_GPU_MULTI_WAIT |
|
||||
VK_SYNC_FEATURE_CPU_WAIT |
|
||||
VK_SYNC_FEATURE_CPU_RESET |
|
||||
VK_SYNC_FEATURE_WAIT_ANY |
|
||||
VK_SYNC_FEATURE_WAIT_PENDING,
|
||||
.init = anv_bo_sync_init,
|
||||
.finish = anv_bo_sync_finish,
|
||||
.reset = anv_bo_sync_reset,
|
||||
.wait_many = anv_bo_sync_wait,
|
||||
};
|
||||
|
||||
VkResult
|
||||
anv_create_sync_for_memory(struct vk_device *device,
|
||||
VkDeviceMemory memory,
|
||||
bool signal_memory,
|
||||
struct vk_sync **sync_out)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device_memory, mem, memory);
|
||||
struct anv_bo_sync *bo_sync;
|
||||
|
||||
bo_sync = vk_zalloc(&device->alloc, sizeof(*bo_sync), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
if (bo_sync == NULL)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
bo_sync->sync.type = &anv_bo_sync_type;
|
||||
bo_sync->state = signal_memory ? ANV_BO_SYNC_STATE_RESET :
|
||||
ANV_BO_SYNC_STATE_SUBMITTED;
|
||||
bo_sync->bo = anv_bo_ref(mem->bo);
|
||||
|
||||
*sync_out = &bo_sync->sync;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
1112
src/intel/vulkan_hasvk/anv_cmd_buffer.c
Normal file
1112
src/intel/vulkan_hasvk/anv_cmd_buffer.c
Normal file
File diff suppressed because it is too large
Load Diff
2046
src/intel/vulkan_hasvk/anv_descriptor_set.c
Normal file
2046
src/intel/vulkan_hasvk/anv_descriptor_set.c
Normal file
File diff suppressed because it is too large
Load Diff
4834
src/intel/vulkan_hasvk/anv_device.c
Normal file
4834
src/intel/vulkan_hasvk/anv_device.c
Normal file
File diff suppressed because it is too large
Load Diff
1745
src/intel/vulkan_hasvk/anv_formats.c
Normal file
1745
src/intel/vulkan_hasvk/anv_formats.c
Normal file
File diff suppressed because it is too large
Load Diff
405
src/intel/vulkan_hasvk/anv_gem.c
Normal file
405
src/intel/vulkan_hasvk/anv_gem.c
Normal file
@ -0,0 +1,405 @@
|
||||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
#include "common/intel_defines.h"
|
||||
#include "common/intel_gem.h"
|
||||
|
||||
/**
|
||||
* Wrapper around DRM_IOCTL_I915_GEM_CREATE.
|
||||
*
|
||||
* Return gem handle, or 0 on failure. Gem handles are never 0.
|
||||
*/
|
||||
uint32_t
|
||||
anv_gem_create(struct anv_device *device, uint64_t size)
|
||||
{
|
||||
struct drm_i915_gem_create gem_create = {
|
||||
.size = size,
|
||||
};
|
||||
|
||||
int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create);
|
||||
if (ret != 0) {
|
||||
/* FIXME: What do we do if this fails? */
|
||||
return 0;
|
||||
}
|
||||
|
||||
return gem_create.handle;
|
||||
}
|
||||
|
||||
void
|
||||
anv_gem_close(struct anv_device *device, uint32_t gem_handle)
|
||||
{
|
||||
struct drm_gem_close close = {
|
||||
.handle = gem_handle,
|
||||
};
|
||||
|
||||
intel_ioctl(device->fd, DRM_IOCTL_GEM_CLOSE, &close);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,
|
||||
uint32_t flags, uint32_t num_regions,
|
||||
struct drm_i915_gem_memory_class_instance *regions)
|
||||
{
|
||||
/* Check for invalid flags */
|
||||
assert((flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) == 0);
|
||||
|
||||
struct drm_i915_gem_create_ext_memory_regions ext_regions = {
|
||||
.base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS },
|
||||
.num_regions = num_regions,
|
||||
.regions = (uintptr_t)regions,
|
||||
};
|
||||
|
||||
struct drm_i915_gem_create_ext gem_create = {
|
||||
.size = anv_bo_size,
|
||||
.extensions = (uintptr_t) &ext_regions,
|
||||
.flags = flags,
|
||||
};
|
||||
|
||||
int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE_EXT,
|
||||
&gem_create);
|
||||
if (ret != 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return gem_create.handle;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper around DRM_IOCTL_I915_GEM_MMAP. Returns MAP_FAILED on error.
|
||||
*/
|
||||
static void*
|
||||
anv_gem_mmap_offset(struct anv_device *device, uint32_t gem_handle,
|
||||
uint64_t offset, uint64_t size, uint32_t flags)
|
||||
{
|
||||
struct drm_i915_gem_mmap_offset gem_mmap = {
|
||||
.handle = gem_handle,
|
||||
.flags = device->info->has_local_mem ? I915_MMAP_OFFSET_FIXED :
|
||||
(flags & I915_MMAP_WC) ? I915_MMAP_OFFSET_WC : I915_MMAP_OFFSET_WB,
|
||||
};
|
||||
assert(offset == 0);
|
||||
|
||||
/* Get the fake offset back */
|
||||
int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &gem_mmap);
|
||||
if (ret != 0)
|
||||
return MAP_FAILED;
|
||||
|
||||
/* And map it */
|
||||
void *map = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
device->fd, gem_mmap.offset);
|
||||
return map;
|
||||
}
|
||||
|
||||
static void*
|
||||
anv_gem_mmap_legacy(struct anv_device *device, uint32_t gem_handle,
|
||||
uint64_t offset, uint64_t size, uint32_t flags)
|
||||
{
|
||||
assert(!device->info->has_local_mem);
|
||||
|
||||
struct drm_i915_gem_mmap gem_mmap = {
|
||||
.handle = gem_handle,
|
||||
.offset = offset,
|
||||
.size = size,
|
||||
.flags = flags,
|
||||
};
|
||||
|
||||
int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap);
|
||||
if (ret != 0)
|
||||
return MAP_FAILED;
|
||||
|
||||
return (void *)(uintptr_t) gem_mmap.addr_ptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper around DRM_IOCTL_I915_GEM_MMAP. Returns MAP_FAILED on error.
|
||||
*/
|
||||
void*
|
||||
anv_gem_mmap(struct anv_device *device, uint32_t gem_handle,
|
||||
uint64_t offset, uint64_t size, uint32_t flags)
|
||||
{
|
||||
void *map;
|
||||
if (device->physical->has_mmap_offset)
|
||||
map = anv_gem_mmap_offset(device, gem_handle, offset, size, flags);
|
||||
else
|
||||
map = anv_gem_mmap_legacy(device, gem_handle, offset, size, flags);
|
||||
|
||||
if (map != MAP_FAILED)
|
||||
VG(VALGRIND_MALLOCLIKE_BLOCK(map, size, 0, 1));
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
/* This is just a wrapper around munmap, but it also notifies valgrind that
|
||||
* this map is no longer valid. Pair this with anv_gem_mmap().
|
||||
*/
|
||||
void
|
||||
anv_gem_munmap(struct anv_device *device, void *p, uint64_t size)
|
||||
{
|
||||
VG(VALGRIND_FREELIKE_BLOCK(p, 0));
|
||||
munmap(p, size);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
anv_gem_userptr(struct anv_device *device, void *mem, size_t size)
|
||||
{
|
||||
struct drm_i915_gem_userptr userptr = {
|
||||
.user_ptr = (__u64)((unsigned long) mem),
|
||||
.user_size = size,
|
||||
.flags = 0,
|
||||
};
|
||||
|
||||
if (device->physical->has_userptr_probe)
|
||||
userptr.flags |= I915_USERPTR_PROBE;
|
||||
|
||||
int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
|
||||
if (ret == -1)
|
||||
return 0;
|
||||
|
||||
return userptr.handle;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_set_caching(struct anv_device *device,
|
||||
uint32_t gem_handle, uint32_t caching)
|
||||
{
|
||||
struct drm_i915_gem_caching gem_caching = {
|
||||
.handle = gem_handle,
|
||||
.caching = caching,
|
||||
};
|
||||
|
||||
return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_CACHING, &gem_caching);
|
||||
}
|
||||
|
||||
/**
|
||||
* On error, \a timeout_ns holds the remaining time.
|
||||
*/
|
||||
int
|
||||
anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns)
|
||||
{
|
||||
struct drm_i915_gem_wait wait = {
|
||||
.bo_handle = gem_handle,
|
||||
.timeout_ns = *timeout_ns,
|
||||
.flags = 0,
|
||||
};
|
||||
|
||||
int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
|
||||
*timeout_ns = wait.timeout_ns;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_execbuffer(struct anv_device *device,
|
||||
struct drm_i915_gem_execbuffer2 *execbuf)
|
||||
{
|
||||
if (execbuf->flags & I915_EXEC_FENCE_OUT)
|
||||
return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2_WR, execbuf);
|
||||
else
|
||||
return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf);
|
||||
}
|
||||
|
||||
/** Return -1 on error. */
|
||||
int
|
||||
anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle)
|
||||
{
|
||||
if (!device->info->has_tiling_uapi)
|
||||
return -1;
|
||||
|
||||
struct drm_i915_gem_get_tiling get_tiling = {
|
||||
.handle = gem_handle,
|
||||
};
|
||||
|
||||
/* FIXME: On discrete platforms we don't have DRM_IOCTL_I915_GEM_GET_TILING
|
||||
* anymore, so we will need another way to get the tiling. Apparently this
|
||||
* is only used in Android code, so we may need some other way to
|
||||
* communicate the tiling mode.
|
||||
*/
|
||||
if (intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling)) {
|
||||
assert(!"Failed to get BO tiling");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return get_tiling.tiling_mode;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_set_tiling(struct anv_device *device,
|
||||
uint32_t gem_handle, uint32_t stride, uint32_t tiling)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* On discrete platforms we don't have DRM_IOCTL_I915_GEM_SET_TILING. So
|
||||
* nothing needs to be done.
|
||||
*/
|
||||
if (!device->info->has_tiling_uapi)
|
||||
return 0;
|
||||
|
||||
/* set_tiling overwrites the input on the error path, so we have to open
|
||||
* code intel_ioctl.
|
||||
*/
|
||||
do {
|
||||
struct drm_i915_gem_set_tiling set_tiling = {
|
||||
.handle = gem_handle,
|
||||
.tiling_mode = tiling,
|
||||
.stride = stride,
|
||||
};
|
||||
|
||||
ret = ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
|
||||
} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_get_param(int fd, uint32_t param)
|
||||
{
|
||||
int tmp;
|
||||
|
||||
drm_i915_getparam_t gp = {
|
||||
.param = param,
|
||||
.value = &tmp,
|
||||
};
|
||||
|
||||
int ret = intel_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
|
||||
if (ret == 0)
|
||||
return tmp;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool
|
||||
anv_gem_has_context_priority(int fd, int priority)
|
||||
{
|
||||
return !anv_gem_set_context_param(fd, 0, I915_CONTEXT_PARAM_PRIORITY,
|
||||
priority);
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_create_context(struct anv_device *device)
|
||||
{
|
||||
struct drm_i915_gem_context_create create = { 0 };
|
||||
|
||||
int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
|
||||
if (ret == -1)
|
||||
return -1;
|
||||
|
||||
return create.ctx_id;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_destroy_context(struct anv_device *device, int context)
|
||||
{
|
||||
struct drm_i915_gem_context_destroy destroy = {
|
||||
.ctx_id = context,
|
||||
};
|
||||
|
||||
return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy);
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_set_context_param(int fd, int context, uint32_t param, uint64_t value)
|
||||
{
|
||||
struct drm_i915_gem_context_param p = {
|
||||
.ctx_id = context,
|
||||
.param = param,
|
||||
.value = value,
|
||||
};
|
||||
int err = 0;
|
||||
|
||||
if (intel_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p))
|
||||
err = -errno;
|
||||
return err;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_context_get_reset_stats(int fd, int context,
|
||||
uint32_t *active, uint32_t *pending)
|
||||
{
|
||||
struct drm_i915_reset_stats stats = {
|
||||
.ctx_id = context,
|
||||
};
|
||||
|
||||
int ret = intel_ioctl(fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats);
|
||||
if (ret == 0) {
|
||||
*active = stats.batch_active;
|
||||
*pending = stats.batch_pending;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle)
|
||||
{
|
||||
struct drm_prime_handle args = {
|
||||
.handle = gem_handle,
|
||||
.flags = DRM_CLOEXEC | DRM_RDWR,
|
||||
};
|
||||
|
||||
int ret = intel_ioctl(device->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
|
||||
if (ret == -1)
|
||||
return -1;
|
||||
|
||||
return args.fd;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
anv_gem_fd_to_handle(struct anv_device *device, int fd)
|
||||
{
|
||||
struct drm_prime_handle args = {
|
||||
.fd = fd,
|
||||
};
|
||||
|
||||
int ret = intel_ioctl(device->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args);
|
||||
if (ret == -1)
|
||||
return 0;
|
||||
|
||||
return args.handle;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_reg_read(int fd, uint32_t offset, uint64_t *result)
|
||||
{
|
||||
struct drm_i915_reg_read args = {
|
||||
.offset = offset
|
||||
};
|
||||
|
||||
int ret = intel_ioctl(fd, DRM_IOCTL_I915_REG_READ, &args);
|
||||
|
||||
*result = args.val;
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct drm_i915_query_engine_info *
|
||||
anv_gem_get_engine_info(int fd)
|
||||
{
|
||||
return intel_i915_query_alloc(fd, DRM_I915_QUERY_ENGINE_INFO, NULL);
|
||||
}
|
187
src/intel/vulkan_hasvk/anv_gem_stubs.c
Normal file
187
src/intel/vulkan_hasvk/anv_gem_stubs.c
Normal file
@ -0,0 +1,187 @@
|
||||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include "util/anon_file.h"
|
||||
#include "anv_private.h"
|
||||
|
||||
uint32_t
|
||||
anv_gem_create(struct anv_device *device, uint64_t size)
|
||||
{
|
||||
int fd = os_create_anonymous_file(size, "fake bo");
|
||||
if (fd == -1)
|
||||
return 0;
|
||||
|
||||
assert(fd != 0);
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
void
|
||||
anv_gem_close(struct anv_device *device, uint32_t gem_handle)
|
||||
{
|
||||
close(gem_handle);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,
|
||||
uint32_t flags, uint32_t num_regions,
|
||||
struct drm_i915_gem_memory_class_instance *regions)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void*
|
||||
anv_gem_mmap(struct anv_device *device, uint32_t gem_handle,
|
||||
uint64_t offset, uint64_t size, uint32_t flags)
|
||||
{
|
||||
/* Ignore flags, as they're specific to I915_GEM_MMAP. */
|
||||
(void) flags;
|
||||
|
||||
return mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
gem_handle, offset);
|
||||
}
|
||||
|
||||
/* This is just a wrapper around munmap, but it also notifies valgrind that
|
||||
* this map is no longer valid. Pair this with anv_gem_mmap().
|
||||
*/
|
||||
void
|
||||
anv_gem_munmap(struct anv_device *device, void *p, uint64_t size)
|
||||
{
|
||||
munmap(p, size);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
anv_gem_userptr(struct anv_device *device, void *mem, size_t size)
|
||||
{
|
||||
int fd = os_create_anonymous_file(size, "fake bo");
|
||||
if (fd == -1)
|
||||
return 0;
|
||||
|
||||
assert(fd != 0);
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_execbuffer(struct anv_device *device,
|
||||
struct drm_i915_gem_execbuffer2 *execbuf)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_set_tiling(struct anv_device *device,
|
||||
uint32_t gem_handle, uint32_t stride, uint32_t tiling)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle,
|
||||
uint32_t caching)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_get_param(int fd, uint32_t param)
|
||||
{
|
||||
unreachable("Unused");
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_create_context(struct anv_device *device)
|
||||
{
|
||||
unreachable("Unused");
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_destroy_context(struct anv_device *device, int context)
|
||||
{
|
||||
unreachable("Unused");
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_set_context_param(int fd, int context, uint32_t param, uint64_t value)
|
||||
{
|
||||
unreachable("Unused");
|
||||
}
|
||||
|
||||
bool
|
||||
anv_gem_has_context_priority(int fd, int priority)
|
||||
{
|
||||
unreachable("Unused");
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_context_get_reset_stats(int fd, int context,
|
||||
uint32_t *active, uint32_t *pending)
|
||||
{
|
||||
unreachable("Unused");
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle)
|
||||
{
|
||||
unreachable("Unused");
|
||||
}
|
||||
|
||||
uint32_t
|
||||
anv_gem_fd_to_handle(struct anv_device *device, int fd)
|
||||
{
|
||||
unreachable("Unused");
|
||||
}
|
||||
|
||||
int
|
||||
anv_i915_query(int fd, uint64_t query_id, void *buffer,
|
||||
int32_t *buffer_len)
|
||||
{
|
||||
unreachable("Unused");
|
||||
}
|
||||
|
||||
struct drm_i915_query_engine_info *
|
||||
anv_gem_get_engine_info(int fd)
|
||||
{
|
||||
unreachable("Unused");
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_reg_read(int fd, uint32_t offset, uint64_t *result)
|
||||
{
|
||||
unreachable("Unused");
|
||||
}
|
180
src/intel/vulkan_hasvk/anv_genX.h
Normal file
180
src/intel/vulkan_hasvk/anv_genX.h
Normal file
@ -0,0 +1,180 @@
|
||||
/*
|
||||
* Copyright © 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* NOTE: The header can be included multiple times, from the same file.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Gen-specific function declarations. This header must *not* be included
|
||||
* directly. Instead, it is included multiple times by anv_private.h.
|
||||
*
|
||||
* In this header file, the usual genx() macro is available.
|
||||
*/
|
||||
|
||||
#ifndef ANV_PRIVATE_H
|
||||
#error This file is included by means other than anv_private.h
|
||||
#endif
|
||||
|
||||
struct intel_sample_positions;
|
||||
|
||||
typedef struct VkRenderingSelfDependencyInfoMESA VkRenderingSelfDependencyInfoMESA;
|
||||
|
||||
extern const uint32_t genX(vk_to_intel_cullmode)[];
|
||||
|
||||
extern const uint32_t genX(vk_to_intel_front_face)[];
|
||||
|
||||
extern const uint32_t genX(vk_to_intel_primitive_type)[];
|
||||
|
||||
extern const uint32_t genX(vk_to_intel_compare_op)[];
|
||||
|
||||
extern const uint32_t genX(vk_to_intel_stencil_op)[];
|
||||
|
||||
extern const uint32_t genX(vk_to_intel_logic_op)[];
|
||||
|
||||
void genX(init_physical_device_state)(struct anv_physical_device *device);
|
||||
|
||||
VkResult genX(init_device_state)(struct anv_device *device);
|
||||
|
||||
void genX(init_cps_device_state)(struct anv_device *device);
|
||||
|
||||
void genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
void genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
void genX(cmd_buffer_emit_gfx7_depth_flush)(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
void genX(cmd_buffer_emit_gfx12_depth_wa)(struct anv_cmd_buffer *cmd_buffer,
|
||||
const struct isl_surf *surf);
|
||||
|
||||
void genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(struct anv_cmd_buffer *cmd_buffer,
|
||||
int vb_index,
|
||||
struct anv_address vb_address,
|
||||
uint32_t vb_size);
|
||||
void genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(struct anv_cmd_buffer *cmd_buffer,
|
||||
uint32_t access_type,
|
||||
uint64_t vb_used);
|
||||
|
||||
void genX(cmd_buffer_emit_hashing_mode)(struct anv_cmd_buffer *cmd_buffer,
|
||||
unsigned width, unsigned height,
|
||||
unsigned scale);
|
||||
|
||||
void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer);
|
||||
void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
enum anv_pipe_bits
|
||||
genX(emit_apply_pipe_flushes)(struct anv_batch *batch,
|
||||
struct anv_device *device,
|
||||
uint32_t current_pipeline,
|
||||
enum anv_pipe_bits bits);
|
||||
|
||||
void genX(emit_so_memcpy_init)(struct anv_memcpy_state *state,
|
||||
struct anv_device *device,
|
||||
struct anv_batch *batch);
|
||||
|
||||
void genX(emit_so_memcpy_fini)(struct anv_memcpy_state *state);
|
||||
|
||||
void genX(emit_so_memcpy)(struct anv_memcpy_state *state,
|
||||
struct anv_address dst, struct anv_address src,
|
||||
uint32_t size);
|
||||
|
||||
void genX(emit_l3_config)(struct anv_batch *batch,
|
||||
const struct anv_device *device,
|
||||
const struct intel_l3_config *cfg);
|
||||
|
||||
void genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
|
||||
const struct intel_l3_config *cfg);
|
||||
|
||||
void genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer);
|
||||
void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
void genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer,
|
||||
bool enable);
|
||||
|
||||
void genX(cmd_buffer_mark_image_written)(struct anv_cmd_buffer *cmd_buffer,
|
||||
const struct anv_image *image,
|
||||
VkImageAspectFlagBits aspect,
|
||||
enum isl_aux_usage aux_usage,
|
||||
uint32_t level,
|
||||
uint32_t base_layer,
|
||||
uint32_t layer_count);
|
||||
|
||||
void genX(cmd_emit_conditional_render_predicate)(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
struct anv_state genX(cmd_buffer_ray_query_globals)(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
void
|
||||
genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
|
||||
const struct intel_l3_config *l3_config,
|
||||
VkShaderStageFlags active_stages,
|
||||
const unsigned entry_size[4],
|
||||
enum intel_urb_deref_block_size *deref_block_size);
|
||||
|
||||
void genX(emit_multisample)(struct anv_batch *batch, uint32_t samples,
|
||||
const struct vk_sample_locations_state *sl);
|
||||
|
||||
void genX(emit_sample_pattern)(struct anv_batch *batch,
|
||||
const struct vk_sample_locations_state *sl);
|
||||
|
||||
void genX(emit_shading_rate)(struct anv_batch *batch,
|
||||
const struct anv_graphics_pipeline *pipeline,
|
||||
const struct vk_fragment_shading_rate_state *fsr);
|
||||
|
||||
void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_address dst, struct anv_address src,
|
||||
uint32_t size);
|
||||
|
||||
void genX(blorp_exec)(struct blorp_batch *batch,
|
||||
const struct blorp_params *params);
|
||||
|
||||
void genX(cmd_emit_timestamp)(struct anv_batch *batch,
|
||||
struct anv_device *device,
|
||||
struct anv_address addr,
|
||||
bool end_of_pipe);
|
||||
|
||||
void
|
||||
genX(rasterization_mode)(VkPolygonMode raster_mode,
|
||||
VkLineRasterizationModeEXT line_mode,
|
||||
float line_width,
|
||||
uint32_t *api_mode,
|
||||
bool *msaa_rasterization_enable);
|
||||
|
||||
uint32_t
|
||||
genX(ms_rasterization_mode)(struct anv_graphics_pipeline *pipeline,
|
||||
VkPolygonMode raster_mode);
|
||||
|
||||
VkPolygonMode
|
||||
genX(raster_polygon_mode)(struct anv_graphics_pipeline *pipeline,
|
||||
VkPrimitiveTopology primitive_topology);
|
||||
|
||||
void
|
||||
genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
|
||||
const struct vk_graphics_pipeline_state *state);
|
||||
|
||||
void
|
||||
genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline);
|
||||
|
||||
void
|
||||
genX(ray_tracing_pipeline_emit)(struct anv_ray_tracing_pipeline *pipeline);
|
2973
src/intel/vulkan_hasvk/anv_image.c
Normal file
2973
src/intel/vulkan_hasvk/anv_image.c
Normal file
File diff suppressed because it is too large
Load Diff
516
src/intel/vulkan_hasvk/anv_measure.c
Normal file
516
src/intel/vulkan_hasvk/anv_measure.c
Normal file
@ -0,0 +1,516 @@
|
||||
/*
|
||||
* Copyright © 2020 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_measure.h"
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "common/intel_measure.h"
|
||||
#include "util/debug.h"
|
||||
|
||||
struct anv_measure_batch {
|
||||
struct anv_bo *bo;
|
||||
struct intel_measure_batch base;
|
||||
};
|
||||
|
||||
void
|
||||
anv_measure_device_init(struct anv_physical_device *device)
|
||||
{
|
||||
switch (device->info.verx10) {
|
||||
case 125:
|
||||
device->cmd_emit_timestamp = &gfx125_cmd_emit_timestamp;
|
||||
break;
|
||||
case 120:
|
||||
device->cmd_emit_timestamp = &gfx12_cmd_emit_timestamp;
|
||||
break;
|
||||
case 110:
|
||||
device->cmd_emit_timestamp = &gfx11_cmd_emit_timestamp;
|
||||
break;
|
||||
case 90:
|
||||
device->cmd_emit_timestamp = &gfx9_cmd_emit_timestamp;
|
||||
break;
|
||||
case 80:
|
||||
device->cmd_emit_timestamp = &gfx8_cmd_emit_timestamp;
|
||||
break;
|
||||
case 75:
|
||||
device->cmd_emit_timestamp = &gfx75_cmd_emit_timestamp;
|
||||
break;
|
||||
case 70:
|
||||
device->cmd_emit_timestamp = &gfx7_cmd_emit_timestamp;
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
|
||||
/* initialise list of measure structures that await rendering */
|
||||
struct intel_measure_device *measure_device = &device->measure_device;
|
||||
intel_measure_init(measure_device);
|
||||
struct intel_measure_config *config = measure_device->config;
|
||||
if (config == NULL)
|
||||
return;
|
||||
|
||||
/* the final member of intel_measure_ringbuffer is a zero-length array of
|
||||
* intel_measure_buffered_result objects. Allocate additional space for
|
||||
* the buffered objects based on the run-time configurable buffer_size
|
||||
*/
|
||||
const size_t rb_bytes = sizeof(struct intel_measure_ringbuffer) +
|
||||
config->buffer_size * sizeof(struct intel_measure_buffered_result);
|
||||
struct intel_measure_ringbuffer * rb =
|
||||
vk_zalloc(&device->instance->vk.alloc,
|
||||
rb_bytes, 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
measure_device->ringbuffer = rb;
|
||||
}
|
||||
|
||||
static struct intel_measure_config*
|
||||
config_from_command_buffer(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
return cmd_buffer->device->physical->measure_device.config;
|
||||
}
|
||||
|
||||
void
|
||||
anv_measure_init(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct intel_measure_config *config = config_from_command_buffer(cmd_buffer);
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
|
||||
if (!config || !config->enabled) {
|
||||
cmd_buffer->measure = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
/* the final member of anv_measure is a zero-length array of
|
||||
* intel_measure_snapshot objects. Create additional space for the
|
||||
* snapshot objects based on the run-time configurable batch_size
|
||||
*/
|
||||
const size_t batch_bytes = sizeof(struct anv_measure_batch) +
|
||||
config->batch_size * sizeof(struct intel_measure_snapshot);
|
||||
struct anv_measure_batch * measure =
|
||||
vk_alloc(&cmd_buffer->vk.pool->alloc,
|
||||
batch_bytes, 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
|
||||
memset(measure, 0, batch_bytes);
|
||||
ASSERTED VkResult result =
|
||||
anv_device_alloc_bo(device, "measure data",
|
||||
config->batch_size * sizeof(uint64_t),
|
||||
ANV_BO_ALLOC_MAPPED,
|
||||
0,
|
||||
(struct anv_bo**)&measure->bo);
|
||||
measure->base.timestamps = measure->bo->map;
|
||||
assert(result == VK_SUCCESS);
|
||||
|
||||
cmd_buffer->measure = measure;
|
||||
}
|
||||
|
||||
static void
|
||||
anv_measure_start_snapshot(struct anv_cmd_buffer *cmd_buffer,
|
||||
enum intel_measure_snapshot_type type,
|
||||
const char *event_name,
|
||||
uint32_t count)
|
||||
{
|
||||
struct anv_batch *batch = &cmd_buffer->batch;
|
||||
struct anv_measure_batch *measure = cmd_buffer->measure;
|
||||
struct anv_physical_device *device = cmd_buffer->device->physical;
|
||||
struct intel_measure_device *measure_device = &device->measure_device;
|
||||
|
||||
const unsigned device_frame = measure_device->frame;
|
||||
|
||||
/* if the command buffer is not associated with a frame, associate it with
|
||||
* the most recent acquired frame
|
||||
*/
|
||||
if (measure->base.frame == 0)
|
||||
measure->base.frame = device_frame;
|
||||
|
||||
// uintptr_t framebuffer = (uintptr_t)cmd_buffer->state.framebuffer;
|
||||
//
|
||||
// if (!measure->base.framebuffer &&
|
||||
// cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY)
|
||||
// /* secondary command buffer inherited the framebuffer from the primary */
|
||||
// measure->base.framebuffer = framebuffer;
|
||||
//
|
||||
// /* verify framebuffer has been properly tracked */
|
||||
// assert(type == INTEL_SNAPSHOT_END ||
|
||||
// framebuffer == measure->base.framebuffer ||
|
||||
// framebuffer == 0 ); /* compute has no framebuffer */
|
||||
|
||||
unsigned index = measure->base.index++;
|
||||
|
||||
(*device->cmd_emit_timestamp)(batch, cmd_buffer->device,
|
||||
(struct anv_address) {
|
||||
.bo = measure->bo,
|
||||
.offset = index * sizeof(uint64_t) },
|
||||
true /* end_of_pipe */);
|
||||
|
||||
if (event_name == NULL)
|
||||
event_name = intel_measure_snapshot_string(type);
|
||||
|
||||
struct intel_measure_snapshot *snapshot = &(measure->base.snapshots[index]);
|
||||
memset(snapshot, 0, sizeof(*snapshot));
|
||||
snapshot->type = type;
|
||||
snapshot->count = (unsigned) count;
|
||||
snapshot->event_count = measure->base.event_count;
|
||||
snapshot->event_name = event_name;
|
||||
// snapshot->framebuffer = framebuffer;
|
||||
|
||||
if (type == INTEL_SNAPSHOT_COMPUTE && cmd_buffer->state.compute.pipeline) {
|
||||
snapshot->cs = (uintptr_t) cmd_buffer->state.compute.pipeline->cs;
|
||||
} else if (cmd_buffer->state.gfx.pipeline) {
|
||||
const struct anv_graphics_pipeline *pipeline =
|
||||
cmd_buffer->state.gfx.pipeline;
|
||||
snapshot->vs = (uintptr_t) pipeline->shaders[MESA_SHADER_VERTEX];
|
||||
snapshot->tcs = (uintptr_t) pipeline->shaders[MESA_SHADER_TESS_CTRL];
|
||||
snapshot->tes = (uintptr_t) pipeline->shaders[MESA_SHADER_TESS_EVAL];
|
||||
snapshot->gs = (uintptr_t) pipeline->shaders[MESA_SHADER_GEOMETRY];
|
||||
snapshot->fs = (uintptr_t) pipeline->shaders[MESA_SHADER_FRAGMENT];
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
anv_measure_end_snapshot(struct anv_cmd_buffer *cmd_buffer,
|
||||
uint32_t event_count)
|
||||
{
|
||||
struct anv_batch *batch = &cmd_buffer->batch;
|
||||
struct anv_measure_batch *measure = cmd_buffer->measure;
|
||||
struct anv_physical_device *device = cmd_buffer->device->physical;
|
||||
|
||||
unsigned index = measure->base.index++;
|
||||
assert(index % 2 == 1);
|
||||
|
||||
(*device->cmd_emit_timestamp)(batch, cmd_buffer->device,
|
||||
(struct anv_address) {
|
||||
.bo = measure->bo,
|
||||
.offset = index * sizeof(uint64_t) },
|
||||
true /* end_of_pipe */);
|
||||
|
||||
struct intel_measure_snapshot *snapshot = &(measure->base.snapshots[index]);
|
||||
memset(snapshot, 0, sizeof(*snapshot));
|
||||
snapshot->type = INTEL_SNAPSHOT_END;
|
||||
snapshot->event_count = event_count;
|
||||
}
|
||||
|
||||
static bool
|
||||
state_changed(struct anv_cmd_buffer *cmd_buffer,
|
||||
enum intel_measure_snapshot_type type)
|
||||
{
|
||||
uintptr_t vs=0, tcs=0, tes=0, gs=0, fs=0, cs=0;
|
||||
|
||||
if (cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)
|
||||
/* can't record timestamps in this mode */
|
||||
return false;
|
||||
|
||||
if (type == INTEL_SNAPSHOT_COMPUTE) {
|
||||
const struct anv_compute_pipeline *cs_pipe =
|
||||
cmd_buffer->state.compute.pipeline;
|
||||
assert(cs_pipe);
|
||||
cs = (uintptr_t)cs_pipe->cs;
|
||||
} else if (type == INTEL_SNAPSHOT_DRAW) {
|
||||
const struct anv_graphics_pipeline *gfx = cmd_buffer->state.gfx.pipeline;
|
||||
assert(gfx);
|
||||
vs = (uintptr_t) gfx->shaders[MESA_SHADER_VERTEX];
|
||||
tcs = (uintptr_t) gfx->shaders[MESA_SHADER_TESS_CTRL];
|
||||
tes = (uintptr_t) gfx->shaders[MESA_SHADER_TESS_EVAL];
|
||||
gs = (uintptr_t) gfx->shaders[MESA_SHADER_GEOMETRY];
|
||||
fs = (uintptr_t) gfx->shaders[MESA_SHADER_FRAGMENT];
|
||||
}
|
||||
/* else blorp, all programs NULL */
|
||||
|
||||
return intel_measure_state_changed(&cmd_buffer->measure->base,
|
||||
vs, tcs, tes, gs, fs, cs);
|
||||
}
|
||||
|
||||
void
|
||||
_anv_measure_snapshot(struct anv_cmd_buffer *cmd_buffer,
|
||||
enum intel_measure_snapshot_type type,
|
||||
const char *event_name,
|
||||
uint32_t count)
|
||||
{
|
||||
struct intel_measure_config *config = config_from_command_buffer(cmd_buffer);
|
||||
struct anv_measure_batch *measure = cmd_buffer->measure;
|
||||
|
||||
assert(config);
|
||||
if (measure == NULL)
|
||||
return;
|
||||
|
||||
assert(type != INTEL_SNAPSHOT_END);
|
||||
if (!state_changed(cmd_buffer, type)) {
|
||||
/* filter out this event */
|
||||
return;
|
||||
}
|
||||
|
||||
/* increment event count */
|
||||
++measure->base.event_count;
|
||||
if (measure->base.event_count == 1 ||
|
||||
measure->base.event_count == config->event_interval + 1) {
|
||||
/* the first event of an interval */
|
||||
|
||||
if (measure->base.index % 2) {
|
||||
/* end the previous event */
|
||||
anv_measure_end_snapshot(cmd_buffer, measure->base.event_count - 1);
|
||||
}
|
||||
measure->base.event_count = 1;
|
||||
|
||||
if (measure->base.index == config->batch_size) {
|
||||
/* Snapshot buffer is full. The batch must be flushed before
|
||||
* additional snapshots can be taken.
|
||||
*/
|
||||
static bool warned = false;
|
||||
if (unlikely(!warned)) {
|
||||
fprintf(config->file,
|
||||
"WARNING: batch size exceeds INTEL_MEASURE limit: %d. "
|
||||
"Data has been dropped. "
|
||||
"Increase setting with INTEL_MEASURE=batch_size={count}\n",
|
||||
config->batch_size);
|
||||
}
|
||||
|
||||
warned = true;
|
||||
return;
|
||||
}
|
||||
|
||||
anv_measure_start_snapshot(cmd_buffer, type, event_name, count);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when a command buffer is reset. Re-initializes existing anv_measure
|
||||
* data structures.
|
||||
*/
|
||||
void
|
||||
anv_measure_reset(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct intel_measure_config *config = config_from_command_buffer(cmd_buffer);
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
struct anv_measure_batch *measure = cmd_buffer->measure;
|
||||
|
||||
if (!config)
|
||||
return;
|
||||
|
||||
if (!config->enabled) {
|
||||
cmd_buffer->measure = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!measure) {
|
||||
/* Capture has recently been enabled. Instead of resetting, a new data
|
||||
* structure must be allocated and initialized.
|
||||
*/
|
||||
return anv_measure_init(cmd_buffer);
|
||||
}
|
||||
|
||||
/* it is possible that the command buffer contains snapshots that have not
|
||||
* yet been processed
|
||||
*/
|
||||
intel_measure_gather(&device->physical->measure_device,
|
||||
device->info);
|
||||
|
||||
assert(cmd_buffer->device != NULL);
|
||||
|
||||
measure->base.index = 0;
|
||||
// measure->base.framebuffer = 0;
|
||||
measure->base.frame = 0;
|
||||
measure->base.event_count = 0;
|
||||
list_inithead(&measure->base.link);
|
||||
}
|
||||
|
||||
void
|
||||
anv_measure_destroy(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct intel_measure_config *config = config_from_command_buffer(cmd_buffer);
|
||||
struct anv_measure_batch *measure = cmd_buffer->measure;
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
struct anv_physical_device *physical = device->physical;
|
||||
|
||||
if (!config)
|
||||
return;
|
||||
if (measure == NULL)
|
||||
return;
|
||||
|
||||
/* it is possible that the command buffer contains snapshots that have not
|
||||
* yet been processed
|
||||
*/
|
||||
intel_measure_gather(&physical->measure_device, &physical->info);
|
||||
|
||||
anv_device_release_bo(device, measure->bo);
|
||||
vk_free(&cmd_buffer->vk.pool->alloc, measure);
|
||||
cmd_buffer->measure = NULL;
|
||||
}
|
||||
|
||||
static struct intel_measure_config*
|
||||
config_from_device(struct anv_device *device)
|
||||
{
|
||||
return device->physical->measure_device.config;
|
||||
}
|
||||
|
||||
void
|
||||
anv_measure_device_destroy(struct anv_physical_device *device)
|
||||
{
|
||||
struct intel_measure_device *measure_device = &device->measure_device;
|
||||
struct intel_measure_config *config = measure_device->config;
|
||||
|
||||
if (!config)
|
||||
return;
|
||||
|
||||
if (measure_device->ringbuffer != NULL) {
|
||||
vk_free(&device->instance->vk.alloc, measure_device->ringbuffer);
|
||||
measure_device->ringbuffer = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Hook for command buffer submission.
|
||||
*/
|
||||
void
|
||||
_anv_measure_submit(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct intel_measure_config *config = config_from_command_buffer(cmd_buffer);
|
||||
struct anv_measure_batch *measure = cmd_buffer->measure;
|
||||
struct intel_measure_device *measure_device = &cmd_buffer->device->physical->measure_device;
|
||||
|
||||
if (!config)
|
||||
return;
|
||||
if (measure == NULL)
|
||||
return;
|
||||
|
||||
struct intel_measure_batch *base = &measure->base;
|
||||
if (base->index == 0)
|
||||
/* no snapshots were started */
|
||||
return;
|
||||
|
||||
/* finalize snapshots and enqueue them */
|
||||
static unsigned cmd_buffer_count = 0;
|
||||
base->batch_count = p_atomic_inc_return(&cmd_buffer_count);
|
||||
|
||||
if (base->index %2 == 1) {
|
||||
anv_measure_end_snapshot(cmd_buffer, base->event_count);
|
||||
base->event_count = 0;
|
||||
}
|
||||
|
||||
/* Mark the final timestamp as 'not completed'. This marker will be used
|
||||
* to verify that rendering is complete.
|
||||
*/
|
||||
base->timestamps[base->index - 1] = 0;
|
||||
|
||||
/* add to the list of submitted snapshots */
|
||||
pthread_mutex_lock(&measure_device->mutex);
|
||||
list_addtail(&measure->base.link, &measure_device->queued_snapshots);
|
||||
pthread_mutex_unlock(&measure_device->mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Hook for the start of a frame.
|
||||
*/
|
||||
void
|
||||
_anv_measure_acquire(struct anv_device *device)
|
||||
{
|
||||
struct intel_measure_config *config = config_from_device(device);
|
||||
struct intel_measure_device *measure_device = &device->physical->measure_device;
|
||||
|
||||
if (!config)
|
||||
return;
|
||||
if (measure_device == NULL)
|
||||
return;
|
||||
|
||||
intel_measure_frame_transition(p_atomic_inc_return(&measure_device->frame));
|
||||
|
||||
/* iterate the queued snapshots and publish those that finished */
|
||||
intel_measure_gather(measure_device, &device->physical->info);
|
||||
}
|
||||
|
||||
void
|
||||
_anv_measure_endcommandbuffer(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct intel_measure_config *config = config_from_command_buffer(cmd_buffer);
|
||||
struct anv_measure_batch *measure = cmd_buffer->measure;
|
||||
|
||||
if (!config)
|
||||
return;
|
||||
if (measure == NULL)
|
||||
return;
|
||||
if (measure->base.index % 2 == 0)
|
||||
return;
|
||||
|
||||
anv_measure_end_snapshot(cmd_buffer, measure->base.event_count);
|
||||
measure->base.event_count = 0;
|
||||
}
|
||||
|
||||
void
|
||||
_anv_measure_beginrenderpass(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct intel_measure_config *config = config_from_command_buffer(cmd_buffer);
|
||||
struct anv_measure_batch *measure = cmd_buffer->measure;
|
||||
|
||||
if (!config)
|
||||
return;
|
||||
if (measure == NULL)
|
||||
return;
|
||||
|
||||
// if (measure->base.framebuffer == (uintptr_t) cmd_buffer->state.framebuffer)
|
||||
// /* no change */
|
||||
// return;
|
||||
|
||||
bool filtering = (config->flags & (INTEL_MEASURE_RENDERPASS |
|
||||
INTEL_MEASURE_SHADER));
|
||||
if (filtering && measure->base.index % 2 == 1) {
|
||||
/* snapshot for previous renderpass was not ended */
|
||||
anv_measure_end_snapshot(cmd_buffer,
|
||||
measure->base.event_count);
|
||||
measure->base.event_count = 0;
|
||||
}
|
||||
|
||||
// measure->base.framebuffer = (uintptr_t) cmd_buffer->state.framebuffer;
|
||||
}
|
||||
|
||||
void
|
||||
_anv_measure_add_secondary(struct anv_cmd_buffer *primary,
|
||||
struct anv_cmd_buffer *secondary)
|
||||
{
|
||||
struct intel_measure_config *config = config_from_command_buffer(primary);
|
||||
struct anv_measure_batch *measure = primary->measure;
|
||||
if (!config)
|
||||
return;
|
||||
if (measure == NULL)
|
||||
return;
|
||||
if (config->flags & (INTEL_MEASURE_BATCH | INTEL_MEASURE_FRAME))
|
||||
/* secondary timing will be contained within the primary */
|
||||
return;
|
||||
if (secondary->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT) {
|
||||
static bool warned = false;
|
||||
if (unlikely(!warned)) {
|
||||
fprintf(config->file,
|
||||
"WARNING: INTEL_MEASURE cannot capture timings of commands "
|
||||
"in secondary command buffers with "
|
||||
"VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT set.\n");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (measure->base.index % 2 == 1)
|
||||
anv_measure_end_snapshot(primary, measure->base.event_count);
|
||||
|
||||
struct intel_measure_snapshot *snapshot = &(measure->base.snapshots[measure->base.index]);
|
||||
_anv_measure_snapshot(primary, INTEL_SNAPSHOT_SECONDARY_BATCH, NULL, 0);
|
||||
|
||||
snapshot->secondary = &secondary->measure->base;
|
||||
}
|
82
src/intel/vulkan_hasvk/anv_measure.h
Normal file
82
src/intel/vulkan_hasvk/anv_measure.h
Normal file
@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Copyright © 2020 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ANV_MEASURE_H
|
||||
#define ANV_MEASURE_H
|
||||
|
||||
#include "anv_private.h"
|
||||
#include "common/intel_measure.h"
|
||||
|
||||
void anv_measure_device_init(struct anv_physical_device *device);
|
||||
void anv_measure_device_destroy(struct anv_physical_device *device);
|
||||
|
||||
void anv_measure_init(struct anv_cmd_buffer *cmd_buffer);
|
||||
void anv_measure_destroy(struct anv_cmd_buffer *cmd_buffer);
|
||||
void anv_measure_reset(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
void _anv_measure_snapshot(struct anv_cmd_buffer *cmd_buffer,
|
||||
enum intel_measure_snapshot_type type,
|
||||
const char *event_name,
|
||||
uint32_t count);
|
||||
|
||||
/* ends snapshots before command buffer submission */
|
||||
void _anv_measure_endcommandbuffer(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
/* when measuring render passes, inserts a timestamp */
|
||||
void _anv_measure_beginrenderpass(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
/* tracks frame progression */
|
||||
void _anv_measure_acquire(struct anv_device *device);
|
||||
|
||||
/* should be combined with endcommandbuffer */
|
||||
void _anv_measure_submit(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
void
|
||||
_anv_measure_add_secondary(struct anv_cmd_buffer *primary,
|
||||
struct anv_cmd_buffer *secondary);
|
||||
|
||||
#define anv_measure_acquire(device) \
|
||||
if (unlikely(device->physical->measure_device.config)) \
|
||||
_anv_measure_acquire(device)
|
||||
|
||||
#define anv_measure_snapshot(cmd_buffer, type, event_name, count) \
|
||||
if (unlikely(cmd_buffer->measure)) \
|
||||
_anv_measure_snapshot(cmd_buffer, type, event_name, count)
|
||||
|
||||
#define anv_measure_endcommandbuffer(cmd_buffer) \
|
||||
if (unlikely(cmd_buffer->measure)) \
|
||||
_anv_measure_endcommandbuffer(cmd_buffer)
|
||||
|
||||
#define anv_measure_beginrenderpass(cmd_buffer) \
|
||||
if (unlikely(cmd_buffer->measure)) \
|
||||
_anv_measure_beginrenderpass(cmd_buffer)
|
||||
|
||||
#define anv_measure_submit(cmd_buffer) \
|
||||
if (unlikely(cmd_buffer->measure)) \
|
||||
_anv_measure_submit(cmd_buffer)
|
||||
|
||||
#define anv_measure_add_secondary(primary, secondary) \
|
||||
if (unlikely(primary->measure)) \
|
||||
_anv_measure_add_secondary(primary, secondary)
|
||||
|
||||
#endif /* ANV_MEASURE_H */
|
97
src/intel/vulkan_hasvk/anv_nir.h
Normal file
97
src/intel/vulkan_hasvk/anv_nir.h
Normal file
@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ANV_NIR_H
|
||||
#define ANV_NIR_H
|
||||
|
||||
#include "nir/nir.h"
|
||||
#include "anv_private.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
bool anv_check_for_primitive_replication(struct anv_device *device,
|
||||
VkShaderStageFlags stages,
|
||||
nir_shader **shaders,
|
||||
uint32_t view_mask);
|
||||
|
||||
bool anv_nir_lower_multiview(nir_shader *shader, uint32_t view_mask,
|
||||
bool use_primitive_replication);
|
||||
|
||||
bool anv_nir_lower_ycbcr_textures(nir_shader *shader,
|
||||
const struct anv_pipeline_layout *layout);
|
||||
|
||||
static inline nir_address_format
|
||||
anv_nir_ssbo_addr_format(const struct anv_physical_device *pdevice,
|
||||
bool robust_buffer_access)
|
||||
{
|
||||
if (pdevice->has_a64_buffer_access) {
|
||||
if (robust_buffer_access)
|
||||
return nir_address_format_64bit_bounded_global;
|
||||
else
|
||||
return nir_address_format_64bit_global_32bit_offset;
|
||||
} else {
|
||||
return nir_address_format_32bit_index_offset;
|
||||
}
|
||||
}
|
||||
|
||||
static inline nir_address_format
|
||||
anv_nir_ubo_addr_format(const struct anv_physical_device *pdevice,
|
||||
bool robust_buffer_access)
|
||||
{
|
||||
if (pdevice->has_a64_buffer_access) {
|
||||
if (robust_buffer_access)
|
||||
return nir_address_format_64bit_bounded_global;
|
||||
else
|
||||
return nir_address_format_64bit_global_32bit_offset;
|
||||
} else {
|
||||
return nir_address_format_32bit_index_offset;
|
||||
}
|
||||
}
|
||||
|
||||
bool anv_nir_lower_ubo_loads(nir_shader *shader);
|
||||
|
||||
void anv_nir_apply_pipeline_layout(nir_shader *shader,
|
||||
const struct anv_physical_device *pdevice,
|
||||
bool robust_buffer_access,
|
||||
const struct anv_pipeline_layout *layout,
|
||||
struct anv_pipeline_bind_map *map);
|
||||
|
||||
void anv_nir_compute_push_layout(nir_shader *nir,
|
||||
const struct anv_physical_device *pdevice,
|
||||
bool robust_buffer_access,
|
||||
struct brw_stage_prog_data *prog_data,
|
||||
struct anv_pipeline_bind_map *map,
|
||||
void *mem_ctx);
|
||||
|
||||
void anv_nir_validate_push_layout(struct brw_stage_prog_data *prog_data,
|
||||
struct anv_pipeline_bind_map *map);
|
||||
|
||||
bool anv_nir_add_base_work_group_id(nir_shader *shader);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* ANV_NIR_H */
|
63
src/intel/vulkan_hasvk/anv_nir_add_base_work_group_id.c
Normal file
63
src/intel/vulkan_hasvk/anv_nir_add_base_work_group_id.c
Normal file
@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright © 2017 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_nir.h"
|
||||
#include "nir/nir_builder.h"
|
||||
#include "compiler/brw_compiler.h"
|
||||
|
||||
static bool
|
||||
anv_nir_add_base_work_group_id_instr(nir_builder *b,
|
||||
nir_instr *instr,
|
||||
UNUSED void *cb_data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *load_id = nir_instr_as_intrinsic(instr);
|
||||
if (load_id->intrinsic != nir_intrinsic_load_workgroup_id)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_after_instr(&load_id->instr);
|
||||
|
||||
nir_ssa_def *load_base =
|
||||
nir_load_push_constant(b, 3, 32, nir_imm_int(b, 0),
|
||||
.base = offsetof(struct anv_push_constants, cs.base_work_group_id),
|
||||
.range = 3 * sizeof(uint32_t));
|
||||
|
||||
nir_ssa_def *id = nir_iadd(b, &load_id->dest.ssa, load_base);
|
||||
|
||||
nir_ssa_def_rewrite_uses_after(&load_id->dest.ssa, id, id->parent_instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
anv_nir_add_base_work_group_id(nir_shader *shader)
|
||||
{
|
||||
assert(shader->info.stage == MESA_SHADER_COMPUTE);
|
||||
|
||||
return nir_shader_instructions_pass(shader,
|
||||
anv_nir_add_base_work_group_id_instr,
|
||||
nir_metadata_block_index |
|
||||
nir_metadata_dominance,
|
||||
NULL);
|
||||
}
|
1686
src/intel/vulkan_hasvk/anv_nir_apply_pipeline_layout.c
Normal file
1686
src/intel/vulkan_hasvk/anv_nir_apply_pipeline_layout.c
Normal file
File diff suppressed because it is too large
Load Diff
290
src/intel/vulkan_hasvk/anv_nir_compute_push_layout.c
Normal file
290
src/intel/vulkan_hasvk/anv_nir_compute_push_layout.c
Normal file
@ -0,0 +1,290 @@
|
||||
/*
|
||||
* Copyright © 2019 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_nir.h"
|
||||
#include "nir_builder.h"
|
||||
#include "compiler/brw_nir.h"
|
||||
#include "util/mesa-sha1.h"
|
||||
|
||||
#define sizeof_field(type, field) sizeof(((type *)0)->field)
|
||||
|
||||
void
|
||||
anv_nir_compute_push_layout(nir_shader *nir,
|
||||
const struct anv_physical_device *pdevice,
|
||||
bool robust_buffer_access,
|
||||
struct brw_stage_prog_data *prog_data,
|
||||
struct anv_pipeline_bind_map *map,
|
||||
void *mem_ctx)
|
||||
{
|
||||
const struct brw_compiler *compiler = pdevice->compiler;
|
||||
const struct intel_device_info *devinfo = compiler->devinfo;
|
||||
memset(map->push_ranges, 0, sizeof(map->push_ranges));
|
||||
|
||||
bool has_const_ubo = false;
|
||||
unsigned push_start = UINT_MAX, push_end = 0;
|
||||
nir_foreach_function(function, nir) {
|
||||
if (!function->impl)
|
||||
continue;
|
||||
|
||||
nir_foreach_block(block, function->impl) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_ubo:
|
||||
if (nir_src_is_const(intrin->src[0]) &&
|
||||
nir_src_is_const(intrin->src[1]))
|
||||
has_const_ubo = true;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_push_constant: {
|
||||
unsigned base = nir_intrinsic_base(intrin);
|
||||
unsigned range = nir_intrinsic_range(intrin);
|
||||
push_start = MIN2(push_start, base);
|
||||
push_end = MAX2(push_end, base + range);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_desc_set_address_intel:
|
||||
push_start = MIN2(push_start,
|
||||
offsetof(struct anv_push_constants, desc_sets));
|
||||
push_end = MAX2(push_end, push_start +
|
||||
sizeof_field(struct anv_push_constants, desc_sets));
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const bool has_push_intrinsic = push_start <= push_end;
|
||||
|
||||
const bool push_ubo_ranges =
|
||||
pdevice->info.verx10 >= 75 &&
|
||||
has_const_ubo && nir->info.stage != MESA_SHADER_COMPUTE &&
|
||||
!brw_shader_stage_requires_bindless_resources(nir->info.stage);
|
||||
|
||||
if (push_ubo_ranges && robust_buffer_access) {
|
||||
/* We can't on-the-fly adjust our push ranges because doing so would
|
||||
* mess up the layout in the shader. When robustBufferAccess is
|
||||
* enabled, we push a mask into the shader indicating which pushed
|
||||
* registers are valid and we zero out the invalid ones at the top of
|
||||
* the shader.
|
||||
*/
|
||||
const uint32_t push_reg_mask_start =
|
||||
offsetof(struct anv_push_constants, push_reg_mask[nir->info.stage]);
|
||||
const uint32_t push_reg_mask_end = push_reg_mask_start + sizeof(uint64_t);
|
||||
push_start = MIN2(push_start, push_reg_mask_start);
|
||||
push_end = MAX2(push_end, push_reg_mask_end);
|
||||
}
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_COMPUTE && devinfo->verx10 < 125) {
|
||||
/* For compute shaders, we always have to have the subgroup ID. The
|
||||
* back-end compiler will "helpfully" add it for us in the last push
|
||||
* constant slot. Yes, there is an off-by-one error here but that's
|
||||
* because the back-end will add it so we want to claim the number of
|
||||
* push constants one dword less than the full amount including
|
||||
* gl_SubgroupId.
|
||||
*/
|
||||
assert(push_end <= offsetof(struct anv_push_constants, cs.subgroup_id));
|
||||
push_end = offsetof(struct anv_push_constants, cs.subgroup_id);
|
||||
}
|
||||
|
||||
/* Align push_start down to a 32B boundary and make it no larger than
|
||||
* push_end (no push constants is indicated by push_start = UINT_MAX).
|
||||
*/
|
||||
push_start = MIN2(push_start, push_end);
|
||||
push_start = align_down_u32(push_start, 32);
|
||||
|
||||
/* For vec4 our push data size needs to be aligned to a vec4 and for
|
||||
* scalar, it needs to be aligned to a DWORD.
|
||||
*/
|
||||
const unsigned align = compiler->scalar_stage[nir->info.stage] ? 4 : 16;
|
||||
nir->num_uniforms = ALIGN(push_end - push_start, align);
|
||||
prog_data->nr_params = nir->num_uniforms / 4;
|
||||
prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params);
|
||||
|
||||
struct anv_push_range push_constant_range = {
|
||||
.set = ANV_DESCRIPTOR_SET_PUSH_CONSTANTS,
|
||||
.start = push_start / 32,
|
||||
.length = DIV_ROUND_UP(push_end - push_start, 32),
|
||||
};
|
||||
|
||||
if (has_push_intrinsic) {
|
||||
nir_foreach_function(function, nir) {
|
||||
if (!function->impl)
|
||||
continue;
|
||||
|
||||
nir_builder build, *b = &build;
|
||||
nir_builder_init(b, function->impl);
|
||||
|
||||
nir_foreach_block(block, function->impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_push_constant: {
|
||||
/* With bindless shaders we load uniforms with SEND
|
||||
* messages. All the push constants are located after the
|
||||
* RT_DISPATCH_GLOBALS. We just need to add the offset to
|
||||
* the address right after RT_DISPATCH_GLOBALS (see
|
||||
* brw_nir_lower_rt_intrinsics.c).
|
||||
*/
|
||||
unsigned base_offset =
|
||||
brw_shader_stage_requires_bindless_resources(nir->info.stage) ? 0 : push_start;
|
||||
intrin->intrinsic = nir_intrinsic_load_uniform;
|
||||
nir_intrinsic_set_base(intrin,
|
||||
nir_intrinsic_base(intrin) -
|
||||
base_offset);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_desc_set_address_intel: {
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
nir_ssa_def *pc_load = nir_load_uniform(b, 1, 64,
|
||||
nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint64_t)),
|
||||
.base = offsetof(struct anv_push_constants, desc_sets),
|
||||
.range = sizeof_field(struct anv_push_constants, desc_sets),
|
||||
.dest_type = nir_type_uint64);
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, pc_load);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (push_ubo_ranges) {
|
||||
brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
|
||||
|
||||
/* The vec4 back-end pushes at most 32 regs while the scalar back-end
|
||||
* pushes up to 64. This is primarily because the scalar back-end has a
|
||||
* massively more competent register allocator and so the risk of
|
||||
* spilling due to UBO pushing isn't nearly as high.
|
||||
*/
|
||||
const unsigned max_push_regs =
|
||||
compiler->scalar_stage[nir->info.stage] ? 64 : 32;
|
||||
|
||||
unsigned total_push_regs = push_constant_range.length;
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
if (total_push_regs + prog_data->ubo_ranges[i].length > max_push_regs)
|
||||
prog_data->ubo_ranges[i].length = max_push_regs - total_push_regs;
|
||||
total_push_regs += prog_data->ubo_ranges[i].length;
|
||||
}
|
||||
assert(total_push_regs <= max_push_regs);
|
||||
|
||||
int n = 0;
|
||||
|
||||
if (push_constant_range.length > 0)
|
||||
map->push_ranges[n++] = push_constant_range;
|
||||
|
||||
if (robust_buffer_access) {
|
||||
const uint32_t push_reg_mask_offset =
|
||||
offsetof(struct anv_push_constants, push_reg_mask[nir->info.stage]);
|
||||
assert(push_reg_mask_offset >= push_start);
|
||||
prog_data->push_reg_mask_param =
|
||||
(push_reg_mask_offset - push_start) / 4;
|
||||
}
|
||||
|
||||
unsigned range_start_reg = push_constant_range.length;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
struct brw_ubo_range *ubo_range = &prog_data->ubo_ranges[i];
|
||||
if (ubo_range->length == 0)
|
||||
continue;
|
||||
|
||||
if (n >= 4 || (n == 3 && compiler->constant_buffer_0_is_relative)) {
|
||||
memset(ubo_range, 0, sizeof(*ubo_range));
|
||||
continue;
|
||||
}
|
||||
|
||||
const struct anv_pipeline_binding *binding =
|
||||
&map->surface_to_descriptor[ubo_range->block];
|
||||
|
||||
map->push_ranges[n++] = (struct anv_push_range) {
|
||||
.set = binding->set,
|
||||
.index = binding->index,
|
||||
.dynamic_offset_index = binding->dynamic_offset_index,
|
||||
.start = ubo_range->start,
|
||||
.length = ubo_range->length,
|
||||
};
|
||||
|
||||
/* We only bother to shader-zero pushed client UBOs */
|
||||
if (binding->set < MAX_SETS && robust_buffer_access) {
|
||||
prog_data->zero_push_reg |= BITFIELD64_RANGE(range_start_reg,
|
||||
ubo_range->length);
|
||||
}
|
||||
|
||||
range_start_reg += ubo_range->length;
|
||||
}
|
||||
} else {
|
||||
/* For Ivy Bridge, the push constants packets have a different
|
||||
* rule that would require us to iterate in the other direction
|
||||
* and possibly mess around with dynamic state base address.
|
||||
* Don't bother; just emit regular push constants at n = 0.
|
||||
*
|
||||
* In the compute case, we don't have multiple push ranges so it's
|
||||
* better to just provide one in push_ranges[0].
|
||||
*/
|
||||
map->push_ranges[0] = push_constant_range;
|
||||
}
|
||||
|
||||
/* Now that we're done computing the push constant portion of the
|
||||
* bind map, hash it. This lets us quickly determine if the actual
|
||||
* mapping has changed and not just a no-op pipeline change.
|
||||
*/
|
||||
_mesa_sha1_compute(map->push_ranges,
|
||||
sizeof(map->push_ranges),
|
||||
map->push_sha1);
|
||||
}
|
||||
|
||||
void
|
||||
anv_nir_validate_push_layout(struct brw_stage_prog_data *prog_data,
|
||||
struct anv_pipeline_bind_map *map)
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
unsigned prog_data_push_size = DIV_ROUND_UP(prog_data->nr_params, 8);
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
prog_data_push_size += prog_data->ubo_ranges[i].length;
|
||||
|
||||
unsigned bind_map_push_size = 0;
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
bind_map_push_size += map->push_ranges[i].length;
|
||||
|
||||
/* We could go through everything again but it should be enough to assert
|
||||
* that they push the same number of registers. This should alert us if
|
||||
* the back-end compiler decides to re-arrange stuff or shrink a range.
|
||||
*/
|
||||
assert(prog_data_push_size == bind_map_push_size);
|
||||
#endif
|
||||
}
|
324
src/intel/vulkan_hasvk/anv_nir_lower_multiview.c
Normal file
324
src/intel/vulkan_hasvk/anv_nir_lower_multiview.c
Normal file
@ -0,0 +1,324 @@
|
||||
/*
|
||||
* Copyright © 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_nir.h"
|
||||
#include "nir/nir_builder.h"
|
||||
#include "util/debug.h"
|
||||
|
||||
/**
|
||||
* This file implements the lowering required for VK_KHR_multiview.
|
||||
*
|
||||
* When possible, Primitive Replication is used and the shader is modified to
|
||||
* make gl_Position an array and fill it with values for each view.
|
||||
*
|
||||
* Otherwise we implement multiview using instanced rendering. The number of
|
||||
* instances in each draw call is multiplied by the number of views in the
|
||||
* subpass. Then, in the shader, we divide gl_InstanceId by the number of
|
||||
* views and use gl_InstanceId % view_count to compute the actual ViewIndex.
|
||||
*/
|
||||
|
||||
struct lower_multiview_state {
|
||||
nir_builder builder;
|
||||
|
||||
uint32_t view_mask;
|
||||
|
||||
nir_ssa_def *instance_id;
|
||||
nir_ssa_def *view_index;
|
||||
};
|
||||
|
||||
static nir_ssa_def *
|
||||
build_instance_id(struct lower_multiview_state *state)
|
||||
{
|
||||
assert(state->builder.shader->info.stage == MESA_SHADER_VERTEX);
|
||||
|
||||
if (state->instance_id == NULL) {
|
||||
nir_builder *b = &state->builder;
|
||||
|
||||
b->cursor = nir_before_block(nir_start_block(b->impl));
|
||||
|
||||
/* We use instancing for implementing multiview. The actual instance id
|
||||
* is given by dividing instance_id by the number of views in this
|
||||
* subpass.
|
||||
*/
|
||||
state->instance_id =
|
||||
nir_idiv(b, nir_load_instance_id(b),
|
||||
nir_imm_int(b, util_bitcount(state->view_mask)));
|
||||
}
|
||||
|
||||
return state->instance_id;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
build_view_index(struct lower_multiview_state *state)
|
||||
{
|
||||
assert(state->builder.shader->info.stage != MESA_SHADER_FRAGMENT);
|
||||
|
||||
if (state->view_index == NULL) {
|
||||
nir_builder *b = &state->builder;
|
||||
|
||||
b->cursor = nir_before_block(nir_start_block(b->impl));
|
||||
|
||||
assert(state->view_mask != 0);
|
||||
if (util_bitcount(state->view_mask) == 1) {
|
||||
/* Set the view index directly. */
|
||||
state->view_index = nir_imm_int(b, ffs(state->view_mask) - 1);
|
||||
} else if (state->builder.shader->info.stage == MESA_SHADER_VERTEX) {
|
||||
/* We only support 16 viewports */
|
||||
assert((state->view_mask & 0xffff0000) == 0);
|
||||
|
||||
/* We use instancing for implementing multiview. The compacted view
|
||||
* id is given by instance_id % view_count. We then have to convert
|
||||
* that to an actual view id.
|
||||
*/
|
||||
nir_ssa_def *compacted =
|
||||
nir_umod(b, nir_load_instance_id(b),
|
||||
nir_imm_int(b, util_bitcount(state->view_mask)));
|
||||
|
||||
if (util_is_power_of_two_or_zero(state->view_mask + 1)) {
|
||||
/* If we have a full view mask, then compacted is what we want */
|
||||
state->view_index = compacted;
|
||||
} else {
|
||||
/* Now we define a map from compacted view index to the actual
|
||||
* view index that's based on the view_mask. The map is given by
|
||||
* 16 nibbles, each of which is a value from 0 to 15.
|
||||
*/
|
||||
uint64_t remap = 0;
|
||||
uint32_t i = 0;
|
||||
u_foreach_bit(bit, state->view_mask) {
|
||||
assert(bit < 16);
|
||||
remap |= (uint64_t)bit << (i++ * 4);
|
||||
}
|
||||
|
||||
nir_ssa_def *shift = nir_imul(b, compacted, nir_imm_int(b, 4));
|
||||
|
||||
/* One of these days, when we have int64 everywhere, this will be
|
||||
* easier.
|
||||
*/
|
||||
nir_ssa_def *shifted;
|
||||
if (remap <= UINT32_MAX) {
|
||||
shifted = nir_ushr(b, nir_imm_int(b, remap), shift);
|
||||
} else {
|
||||
nir_ssa_def *shifted_low =
|
||||
nir_ushr(b, nir_imm_int(b, remap), shift);
|
||||
nir_ssa_def *shifted_high =
|
||||
nir_ushr(b, nir_imm_int(b, remap >> 32),
|
||||
nir_isub(b, shift, nir_imm_int(b, 32)));
|
||||
shifted = nir_bcsel(b, nir_ilt(b, shift, nir_imm_int(b, 32)),
|
||||
shifted_low, shifted_high);
|
||||
}
|
||||
state->view_index = nir_iand(b, shifted, nir_imm_int(b, 0xf));
|
||||
}
|
||||
} else {
|
||||
const struct glsl_type *type = glsl_int_type();
|
||||
if (b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
|
||||
b->shader->info.stage == MESA_SHADER_GEOMETRY)
|
||||
type = glsl_array_type(type, 1, 0);
|
||||
|
||||
nir_variable *idx_var =
|
||||
nir_variable_create(b->shader, nir_var_shader_in,
|
||||
type, "view index");
|
||||
idx_var->data.location = VARYING_SLOT_VIEW_INDEX;
|
||||
if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
|
||||
idx_var->data.interpolation = INTERP_MODE_FLAT;
|
||||
|
||||
nir_deref_instr *deref = nir_build_deref_var(b, idx_var);
|
||||
if (glsl_type_is_array(type))
|
||||
deref = nir_build_deref_array_imm(b, deref, 0);
|
||||
|
||||
state->view_index = nir_load_deref(b, deref);
|
||||
}
|
||||
}
|
||||
|
||||
return state->view_index;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_load_view_index(const nir_instr *instr, const void *data)
|
||||
{
|
||||
return instr->type == nir_instr_type_intrinsic &&
|
||||
nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_view_index;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
replace_load_view_index_with_zero(struct nir_builder *b,
|
||||
nir_instr *instr, void *data)
|
||||
{
|
||||
assert(is_load_view_index(instr, data));
|
||||
return nir_imm_zero(b, 1, 32);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
replace_load_view_index_with_layer_id(struct nir_builder *b,
|
||||
nir_instr *instr, void *data)
|
||||
{
|
||||
assert(is_load_view_index(instr, data));
|
||||
return nir_load_layer_id(b);
|
||||
}
|
||||
|
||||
bool
|
||||
anv_nir_lower_multiview(nir_shader *shader, uint32_t view_mask,
|
||||
bool use_primitive_replication)
|
||||
{
|
||||
assert(shader->info.stage != MESA_SHADER_COMPUTE);
|
||||
|
||||
/* If multiview isn't enabled, just lower the ViewIndex builtin to zero. */
|
||||
if (view_mask == 0) {
|
||||
return nir_shader_lower_instructions(shader, is_load_view_index,
|
||||
replace_load_view_index_with_zero, NULL);
|
||||
}
|
||||
|
||||
if (shader->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
return nir_shader_lower_instructions(shader, is_load_view_index,
|
||||
replace_load_view_index_with_layer_id, NULL);
|
||||
}
|
||||
|
||||
/* This pass assumes a single entrypoint */
|
||||
nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader);
|
||||
|
||||
/* Primitive Replication allows a shader to write different positions for
|
||||
* each view in the same execution. If only the position depends on the
|
||||
* view, then it is possible to use the feature instead of instancing to
|
||||
* implement multiview.
|
||||
*/
|
||||
if (use_primitive_replication) {
|
||||
bool progress = nir_lower_multiview(shader, view_mask);
|
||||
|
||||
if (progress) {
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, entrypoint);
|
||||
b.cursor = nir_before_cf_list(&entrypoint->body);
|
||||
|
||||
/* Fill Layer ID with zero. Replication will use that as base to
|
||||
* apply the RTAI offsets.
|
||||
*/
|
||||
nir_variable *layer_id_out =
|
||||
nir_variable_create(shader, nir_var_shader_out,
|
||||
glsl_int_type(), "layer ID");
|
||||
layer_id_out->data.location = VARYING_SLOT_LAYER;
|
||||
nir_store_var(&b, layer_id_out, nir_imm_zero(&b, 1, 32), 0x1);
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
struct lower_multiview_state state = {
|
||||
.view_mask = view_mask,
|
||||
};
|
||||
|
||||
nir_builder_init(&state.builder, entrypoint);
|
||||
|
||||
nir_foreach_block(block, entrypoint) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
|
||||
|
||||
if (load->intrinsic != nir_intrinsic_load_instance_id &&
|
||||
load->intrinsic != nir_intrinsic_load_view_index)
|
||||
continue;
|
||||
|
||||
assert(load->dest.is_ssa);
|
||||
|
||||
nir_ssa_def *value;
|
||||
if (load->intrinsic == nir_intrinsic_load_instance_id) {
|
||||
value = build_instance_id(&state);
|
||||
} else {
|
||||
assert(load->intrinsic == nir_intrinsic_load_view_index);
|
||||
value = build_view_index(&state);
|
||||
}
|
||||
|
||||
nir_ssa_def_rewrite_uses(&load->dest.ssa, value);
|
||||
|
||||
nir_instr_remove(&load->instr);
|
||||
}
|
||||
}
|
||||
|
||||
/* The view index is available in all stages but the instance id is only
|
||||
* available in the VS. If it's not a fragment shader, we need to pass
|
||||
* the view index on to the next stage.
|
||||
*/
|
||||
nir_ssa_def *view_index = build_view_index(&state);
|
||||
|
||||
nir_builder *b = &state.builder;
|
||||
|
||||
assert(view_index->parent_instr->block == nir_start_block(entrypoint));
|
||||
b->cursor = nir_after_instr(view_index->parent_instr);
|
||||
|
||||
/* Unless there is only one possible view index (that would be set
|
||||
* directly), pass it to the next stage. */
|
||||
if (util_bitcount(state.view_mask) != 1) {
|
||||
nir_variable *view_index_out =
|
||||
nir_variable_create(shader, nir_var_shader_out,
|
||||
glsl_int_type(), "view index");
|
||||
view_index_out->data.location = VARYING_SLOT_VIEW_INDEX;
|
||||
nir_store_var(b, view_index_out, view_index, 0x1);
|
||||
}
|
||||
|
||||
nir_variable *layer_id_out =
|
||||
nir_variable_create(shader, nir_var_shader_out,
|
||||
glsl_int_type(), "layer ID");
|
||||
layer_id_out->data.location = VARYING_SLOT_LAYER;
|
||||
nir_store_var(b, layer_id_out, view_index, 0x1);
|
||||
|
||||
nir_metadata_preserve(entrypoint, nir_metadata_block_index |
|
||||
nir_metadata_dominance);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
anv_check_for_primitive_replication(struct anv_device *device,
|
||||
VkShaderStageFlags stages,
|
||||
nir_shader **shaders,
|
||||
uint32_t view_mask)
|
||||
{
|
||||
assert(device->info->ver >= 12);
|
||||
|
||||
static int primitive_replication_max_views = -1;
|
||||
if (primitive_replication_max_views < 0) {
|
||||
/* TODO: Figure out why we are not getting same benefits for larger than
|
||||
* 2 views. For now use Primitive Replication just for the 2-view case
|
||||
* by default.
|
||||
*/
|
||||
const unsigned default_max_views = 2;
|
||||
|
||||
primitive_replication_max_views =
|
||||
MIN2(MAX_VIEWS_FOR_PRIMITIVE_REPLICATION,
|
||||
env_var_as_unsigned("ANV_PRIMITIVE_REPLICATION_MAX_VIEWS",
|
||||
default_max_views));
|
||||
}
|
||||
|
||||
/* TODO: We should be able to support replication at 'geometry' stages
|
||||
* later than Vertex. In that case only the last stage can refer to
|
||||
* gl_ViewIndex.
|
||||
*/
|
||||
if (stages & ~(VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT))
|
||||
return false;
|
||||
|
||||
int view_count = util_bitcount(view_mask);
|
||||
if (view_count == 1 || view_count > primitive_replication_max_views)
|
||||
return false;
|
||||
|
||||
return nir_can_lower_multiview(shaders[MESA_SHADER_VERTEX]);
|
||||
}
|
124
src/intel/vulkan_hasvk/anv_nir_lower_ubo_loads.c
Normal file
124
src/intel/vulkan_hasvk/anv_nir_lower_ubo_loads.c
Normal file
@ -0,0 +1,124 @@
|
||||
/*
|
||||
* Copyright © 2020 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_nir.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
static bool
|
||||
lower_ubo_load_instr(nir_builder *b, nir_instr *instr, UNUSED void *_data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
|
||||
if (load->intrinsic != nir_intrinsic_load_global_constant_offset &&
|
||||
load->intrinsic != nir_intrinsic_load_global_constant_bounded)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
nir_ssa_def *base_addr = load->src[0].ssa;
|
||||
nir_ssa_def *bound = NULL;
|
||||
if (load->intrinsic == nir_intrinsic_load_global_constant_bounded)
|
||||
bound = load->src[2].ssa;
|
||||
|
||||
unsigned bit_size = load->dest.ssa.bit_size;
|
||||
assert(bit_size >= 8 && bit_size % 8 == 0);
|
||||
unsigned byte_size = bit_size / 8;
|
||||
|
||||
nir_ssa_def *val;
|
||||
if (nir_src_is_const(load->src[1])) {
|
||||
uint32_t offset = nir_src_as_uint(load->src[1]);
|
||||
|
||||
/* Things should be component-aligned. */
|
||||
assert(offset % byte_size == 0);
|
||||
|
||||
assert(ANV_UBO_ALIGNMENT == 64);
|
||||
|
||||
unsigned suboffset = offset % 64;
|
||||
uint64_t aligned_offset = offset - suboffset;
|
||||
|
||||
/* Load two just in case we go over a 64B boundary */
|
||||
nir_ssa_def *data[2];
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
nir_ssa_def *pred;
|
||||
if (bound) {
|
||||
pred = nir_ilt(b, nir_imm_int(b, aligned_offset + i * 64 + 63),
|
||||
bound);
|
||||
} else {
|
||||
pred = nir_imm_true(b);
|
||||
}
|
||||
|
||||
nir_ssa_def *addr = nir_iadd_imm(b, base_addr,
|
||||
aligned_offset + i * 64);
|
||||
|
||||
data[i] = nir_load_global_const_block_intel(b, 16, addr, pred);
|
||||
}
|
||||
|
||||
val = nir_extract_bits(b, data, 2, suboffset * 8,
|
||||
load->num_components, bit_size);
|
||||
} else {
|
||||
nir_ssa_def *offset = load->src[1].ssa;
|
||||
nir_ssa_def *addr = nir_iadd(b, base_addr, nir_u2u64(b, offset));
|
||||
|
||||
if (bound) {
|
||||
nir_ssa_def *zero = nir_imm_zero(b, load->num_components, bit_size);
|
||||
|
||||
unsigned load_size = byte_size * load->num_components;
|
||||
nir_ssa_def *in_bounds =
|
||||
nir_ilt(b, nir_iadd_imm(b, offset, load_size - 1), bound);
|
||||
|
||||
nir_push_if(b, in_bounds);
|
||||
|
||||
nir_ssa_def *load_val =
|
||||
nir_build_load_global_constant(b, load->dest.ssa.num_components,
|
||||
load->dest.ssa.bit_size, addr,
|
||||
.access = nir_intrinsic_access(load),
|
||||
.align_mul = nir_intrinsic_align_mul(load),
|
||||
.align_offset = nir_intrinsic_align_offset(load));
|
||||
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
val = nir_if_phi(b, load_val, zero);
|
||||
} else {
|
||||
val = nir_build_load_global_constant(b, load->dest.ssa.num_components,
|
||||
load->dest.ssa.bit_size, addr,
|
||||
.access = nir_intrinsic_access(load),
|
||||
.align_mul = nir_intrinsic_align_mul(load),
|
||||
.align_offset = nir_intrinsic_align_offset(load));
|
||||
}
|
||||
}
|
||||
|
||||
nir_ssa_def_rewrite_uses(&load->dest.ssa, val);
|
||||
nir_instr_remove(&load->instr);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
anv_nir_lower_ubo_loads(nir_shader *shader)
|
||||
{
|
||||
return nir_shader_instructions_pass(shader, lower_ubo_load_instr,
|
||||
nir_metadata_none,
|
||||
NULL);
|
||||
}
|
349
src/intel/vulkan_hasvk/anv_nir_lower_ycbcr_textures.c
Normal file
349
src/intel/vulkan_hasvk/anv_nir_lower_ycbcr_textures.c
Normal file
@ -0,0 +1,349 @@
|
||||
/*
|
||||
* Copyright © 2017 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_nir.h"
|
||||
#include "anv_private.h"
|
||||
#include "nir/nir.h"
|
||||
#include "nir/nir_builder.h"
|
||||
#include "nir/nir_vulkan.h"
|
||||
|
||||
struct ycbcr_state {
|
||||
nir_builder *builder;
|
||||
nir_ssa_def *image_size;
|
||||
nir_tex_instr *origin_tex;
|
||||
nir_deref_instr *tex_deref;
|
||||
struct anv_ycbcr_conversion *conversion;
|
||||
};
|
||||
|
||||
/* TODO: we should probably replace this with a push constant/uniform. */
|
||||
static nir_ssa_def *
|
||||
get_texture_size(struct ycbcr_state *state, nir_deref_instr *texture)
|
||||
{
|
||||
if (state->image_size)
|
||||
return state->image_size;
|
||||
|
||||
nir_builder *b = state->builder;
|
||||
const struct glsl_type *type = texture->type;
|
||||
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1);
|
||||
|
||||
tex->op = nir_texop_txs;
|
||||
tex->sampler_dim = glsl_get_sampler_dim(type);
|
||||
tex->is_array = glsl_sampler_type_is_array(type);
|
||||
tex->is_shadow = glsl_sampler_type_is_shadow(type);
|
||||
tex->dest_type = nir_type_int32;
|
||||
|
||||
tex->src[0].src_type = nir_tex_src_texture_deref;
|
||||
tex->src[0].src = nir_src_for_ssa(&texture->dest.ssa);
|
||||
|
||||
nir_ssa_dest_init(&tex->instr, &tex->dest,
|
||||
nir_tex_instr_dest_size(tex), 32, NULL);
|
||||
nir_builder_instr_insert(b, &tex->instr);
|
||||
|
||||
state->image_size = nir_i2f32(b, &tex->dest.ssa);
|
||||
|
||||
return state->image_size;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
implicit_downsampled_coord(nir_builder *b,
|
||||
nir_ssa_def *value,
|
||||
nir_ssa_def *max_value,
|
||||
int div_scale)
|
||||
{
|
||||
return nir_fadd(b,
|
||||
value,
|
||||
nir_fdiv(b,
|
||||
nir_imm_float(b, 1.0f),
|
||||
nir_fmul(b,
|
||||
nir_imm_float(b, div_scale),
|
||||
max_value)));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
implicit_downsampled_coords(struct ycbcr_state *state,
|
||||
nir_ssa_def *old_coords,
|
||||
const struct anv_format_plane *plane_format)
|
||||
{
|
||||
nir_builder *b = state->builder;
|
||||
struct anv_ycbcr_conversion *conversion = state->conversion;
|
||||
nir_ssa_def *image_size = get_texture_size(state, state->tex_deref);
|
||||
nir_ssa_def *comp[4] = { NULL, };
|
||||
int c;
|
||||
|
||||
for (c = 0; c < ARRAY_SIZE(conversion->chroma_offsets); c++) {
|
||||
if (plane_format->denominator_scales[c] > 1 &&
|
||||
conversion->chroma_offsets[c] == VK_CHROMA_LOCATION_COSITED_EVEN) {
|
||||
comp[c] = implicit_downsampled_coord(b,
|
||||
nir_channel(b, old_coords, c),
|
||||
nir_channel(b, image_size, c),
|
||||
plane_format->denominator_scales[c]);
|
||||
} else {
|
||||
comp[c] = nir_channel(b, old_coords, c);
|
||||
}
|
||||
}
|
||||
|
||||
/* Leave other coordinates untouched */
|
||||
for (; c < old_coords->num_components; c++)
|
||||
comp[c] = nir_channel(b, old_coords, c);
|
||||
|
||||
return nir_vec(b, comp, old_coords->num_components);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
create_plane_tex_instr_implicit(struct ycbcr_state *state,
|
||||
uint32_t plane)
|
||||
{
|
||||
nir_builder *b = state->builder;
|
||||
struct anv_ycbcr_conversion *conversion = state->conversion;
|
||||
const struct anv_format_plane *plane_format =
|
||||
&conversion->format->planes[plane];
|
||||
nir_tex_instr *old_tex = state->origin_tex;
|
||||
nir_tex_instr *tex = nir_tex_instr_create(b->shader, old_tex->num_srcs + 1);
|
||||
|
||||
for (uint32_t i = 0; i < old_tex->num_srcs; i++) {
|
||||
tex->src[i].src_type = old_tex->src[i].src_type;
|
||||
|
||||
switch (old_tex->src[i].src_type) {
|
||||
case nir_tex_src_coord:
|
||||
if (plane_format->has_chroma && conversion->chroma_reconstruction) {
|
||||
assert(old_tex->src[i].src.is_ssa);
|
||||
tex->src[i].src =
|
||||
nir_src_for_ssa(implicit_downsampled_coords(state,
|
||||
old_tex->src[i].src.ssa,
|
||||
plane_format));
|
||||
break;
|
||||
}
|
||||
FALLTHROUGH;
|
||||
default:
|
||||
nir_src_copy(&tex->src[i].src, &old_tex->src[i].src, &tex->instr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
tex->src[tex->num_srcs - 1].src = nir_src_for_ssa(nir_imm_int(b, plane));
|
||||
tex->src[tex->num_srcs - 1].src_type = nir_tex_src_plane;
|
||||
|
||||
tex->sampler_dim = old_tex->sampler_dim;
|
||||
tex->dest_type = old_tex->dest_type;
|
||||
|
||||
tex->op = old_tex->op;
|
||||
tex->coord_components = old_tex->coord_components;
|
||||
tex->is_new_style_shadow = old_tex->is_new_style_shadow;
|
||||
tex->component = old_tex->component;
|
||||
|
||||
tex->texture_index = old_tex->texture_index;
|
||||
tex->sampler_index = old_tex->sampler_index;
|
||||
tex->is_array = old_tex->is_array;
|
||||
|
||||
nir_ssa_dest_init(&tex->instr, &tex->dest,
|
||||
old_tex->dest.ssa.num_components,
|
||||
nir_dest_bit_size(old_tex->dest), NULL);
|
||||
nir_builder_instr_insert(b, &tex->instr);
|
||||
|
||||
return &tex->dest.ssa;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
channel_to_component(enum isl_channel_select channel)
|
||||
{
|
||||
switch (channel) {
|
||||
case ISL_CHANNEL_SELECT_RED:
|
||||
return 0;
|
||||
case ISL_CHANNEL_SELECT_GREEN:
|
||||
return 1;
|
||||
case ISL_CHANNEL_SELECT_BLUE:
|
||||
return 2;
|
||||
case ISL_CHANNEL_SELECT_ALPHA:
|
||||
return 3;
|
||||
default:
|
||||
unreachable("invalid channel");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static enum isl_channel_select
|
||||
swizzle_channel(struct isl_swizzle swizzle, unsigned channel)
|
||||
{
|
||||
switch (channel) {
|
||||
case 0:
|
||||
return swizzle.r;
|
||||
case 1:
|
||||
return swizzle.g;
|
||||
case 2:
|
||||
return swizzle.b;
|
||||
case 3:
|
||||
return swizzle.a;
|
||||
default:
|
||||
unreachable("invalid channel");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
anv_nir_lower_ycbcr_textures_instr(nir_builder *builder,
|
||||
nir_instr *instr,
|
||||
void *cb_data)
|
||||
{
|
||||
const struct anv_pipeline_layout *layout = cb_data;
|
||||
|
||||
if (instr->type != nir_instr_type_tex)
|
||||
return false;
|
||||
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
|
||||
int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
|
||||
assert(deref_src_idx >= 0);
|
||||
nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
|
||||
|
||||
nir_variable *var = nir_deref_instr_get_variable(deref);
|
||||
const struct anv_descriptor_set_layout *set_layout =
|
||||
layout->set[var->data.descriptor_set].layout;
|
||||
const struct anv_descriptor_set_binding_layout *binding =
|
||||
&set_layout->binding[var->data.binding];
|
||||
|
||||
/* For the following instructions, we don't apply any change and let the
|
||||
* instruction apply to the first plane.
|
||||
*/
|
||||
if (tex->op == nir_texop_txs ||
|
||||
tex->op == nir_texop_query_levels ||
|
||||
tex->op == nir_texop_lod)
|
||||
return false;
|
||||
|
||||
if (binding->immutable_samplers == NULL)
|
||||
return false;
|
||||
|
||||
assert(tex->texture_index == 0);
|
||||
unsigned array_index = 0;
|
||||
if (deref->deref_type != nir_deref_type_var) {
|
||||
assert(deref->deref_type == nir_deref_type_array);
|
||||
if (!nir_src_is_const(deref->arr.index))
|
||||
return false;
|
||||
array_index = nir_src_as_uint(deref->arr.index);
|
||||
array_index = MIN2(array_index, binding->array_size - 1);
|
||||
}
|
||||
const struct anv_sampler *sampler = binding->immutable_samplers[array_index];
|
||||
|
||||
if (sampler->conversion == NULL)
|
||||
return false;
|
||||
|
||||
struct ycbcr_state state = {
|
||||
.builder = builder,
|
||||
.origin_tex = tex,
|
||||
.tex_deref = deref,
|
||||
.conversion = sampler->conversion,
|
||||
};
|
||||
|
||||
builder->cursor = nir_before_instr(&tex->instr);
|
||||
|
||||
const struct anv_format *format = state.conversion->format;
|
||||
const struct isl_format_layout *y_isl_layout = NULL;
|
||||
for (uint32_t p = 0; p < format->n_planes; p++) {
|
||||
if (!format->planes[p].has_chroma)
|
||||
y_isl_layout = isl_format_get_layout(format->planes[p].isl_format);
|
||||
}
|
||||
assert(y_isl_layout != NULL);
|
||||
uint8_t y_bpc = y_isl_layout->channels_array[0].bits;
|
||||
|
||||
/* |ycbcr_comp| holds components in the order : Cr-Y-Cb */
|
||||
nir_ssa_def *zero = nir_imm_float(builder, 0.0f);
|
||||
nir_ssa_def *one = nir_imm_float(builder, 1.0f);
|
||||
/* Use extra 2 channels for following swizzle */
|
||||
nir_ssa_def *ycbcr_comp[5] = { zero, zero, zero, one, zero };
|
||||
|
||||
uint8_t ycbcr_bpcs[5];
|
||||
memset(ycbcr_bpcs, y_bpc, sizeof(ycbcr_bpcs));
|
||||
|
||||
/* Go through all the planes and gather the samples into a |ycbcr_comp|
|
||||
* while applying a swizzle required by the spec:
|
||||
*
|
||||
* R, G, B should respectively map to Cr, Y, Cb
|
||||
*/
|
||||
for (uint32_t p = 0; p < format->n_planes; p++) {
|
||||
const struct anv_format_plane *plane_format = &format->planes[p];
|
||||
nir_ssa_def *plane_sample = create_plane_tex_instr_implicit(&state, p);
|
||||
|
||||
for (uint32_t pc = 0; pc < 4; pc++) {
|
||||
enum isl_channel_select ycbcr_swizzle =
|
||||
swizzle_channel(plane_format->ycbcr_swizzle, pc);
|
||||
if (ycbcr_swizzle == ISL_CHANNEL_SELECT_ZERO)
|
||||
continue;
|
||||
|
||||
unsigned ycbcr_component = channel_to_component(ycbcr_swizzle);
|
||||
ycbcr_comp[ycbcr_component] = nir_channel(builder, plane_sample, pc);
|
||||
|
||||
/* Also compute the number of bits for each component. */
|
||||
const struct isl_format_layout *isl_layout =
|
||||
isl_format_get_layout(plane_format->isl_format);
|
||||
ycbcr_bpcs[ycbcr_component] = isl_layout->channels_array[pc].bits;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now remaps components to the order specified by the conversion. */
|
||||
nir_ssa_def *swizzled_comp[4] = { NULL, };
|
||||
uint32_t swizzled_bpcs[4] = { 0, };
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(state.conversion->mapping); i++) {
|
||||
/* Maps to components in |ycbcr_comp| */
|
||||
static const uint32_t swizzle_mapping[] = {
|
||||
[VK_COMPONENT_SWIZZLE_ZERO] = 4,
|
||||
[VK_COMPONENT_SWIZZLE_ONE] = 3,
|
||||
[VK_COMPONENT_SWIZZLE_R] = 0,
|
||||
[VK_COMPONENT_SWIZZLE_G] = 1,
|
||||
[VK_COMPONENT_SWIZZLE_B] = 2,
|
||||
[VK_COMPONENT_SWIZZLE_A] = 3,
|
||||
};
|
||||
const VkComponentSwizzle m = state.conversion->mapping[i];
|
||||
|
||||
if (m == VK_COMPONENT_SWIZZLE_IDENTITY) {
|
||||
swizzled_comp[i] = ycbcr_comp[i];
|
||||
swizzled_bpcs[i] = ycbcr_bpcs[i];
|
||||
} else {
|
||||
swizzled_comp[i] = ycbcr_comp[swizzle_mapping[m]];
|
||||
swizzled_bpcs[i] = ycbcr_bpcs[swizzle_mapping[m]];
|
||||
}
|
||||
}
|
||||
|
||||
nir_ssa_def *result = nir_vec(builder, swizzled_comp, 4);
|
||||
if (state.conversion->ycbcr_model != VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY) {
|
||||
result = nir_convert_ycbcr_to_rgb(builder,
|
||||
state.conversion->ycbcr_model,
|
||||
state.conversion->ycbcr_range,
|
||||
result,
|
||||
swizzled_bpcs);
|
||||
}
|
||||
|
||||
nir_ssa_def_rewrite_uses(&tex->dest.ssa, result);
|
||||
nir_instr_remove(&tex->instr);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
anv_nir_lower_ycbcr_textures(nir_shader *shader,
|
||||
const struct anv_pipeline_layout *layout)
|
||||
{
|
||||
return nir_shader_instructions_pass(shader,
|
||||
anv_nir_lower_ycbcr_textures_instr,
|
||||
nir_metadata_block_index |
|
||||
nir_metadata_dominance,
|
||||
(void *)layout);
|
||||
}
|
488
src/intel/vulkan_hasvk/anv_perf.c
Normal file
488
src/intel/vulkan_hasvk/anv_perf.c
Normal file
@ -0,0 +1,488 @@
|
||||
/*
|
||||
* Copyright © 2018 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
#include "vk_util.h"
|
||||
|
||||
#include "perf/intel_perf.h"
|
||||
#include "perf/intel_perf_mdapi.h"
|
||||
|
||||
#include "util/mesa-sha1.h"
|
||||
|
||||
void
|
||||
anv_physical_device_init_perf(struct anv_physical_device *device, int fd)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &device->info;
|
||||
|
||||
device->perf = NULL;
|
||||
|
||||
/* We need self modifying batches. The i915 parser prevents it on
|
||||
* Gfx7.5 :( maybe one day.
|
||||
*/
|
||||
if (devinfo->ver < 8)
|
||||
return;
|
||||
|
||||
struct intel_perf_config *perf = intel_perf_new(NULL);
|
||||
|
||||
intel_perf_init_metrics(perf, &device->info, fd,
|
||||
false /* pipeline statistics */,
|
||||
true /* register snapshots */);
|
||||
|
||||
if (!perf->n_queries)
|
||||
goto err;
|
||||
|
||||
/* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in
|
||||
* perf revision 2.
|
||||
*/
|
||||
if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
|
||||
if (!intel_perf_has_hold_preemption(perf))
|
||||
goto err;
|
||||
}
|
||||
|
||||
device->perf = perf;
|
||||
|
||||
/* Compute the number of commands we need to implement a performance
|
||||
* query.
|
||||
*/
|
||||
const struct intel_perf_query_field_layout *layout = &perf->query_layout;
|
||||
device->n_perf_query_commands = 0;
|
||||
for (uint32_t f = 0; f < layout->n_fields; f++) {
|
||||
struct intel_perf_query_field *field = &layout->fields[f];
|
||||
|
||||
switch (field->type) {
|
||||
case INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC:
|
||||
device->n_perf_query_commands++;
|
||||
break;
|
||||
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
|
||||
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT:
|
||||
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A:
|
||||
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
|
||||
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
|
||||
device->n_perf_query_commands += field->size / 4;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unhandled register type");
|
||||
}
|
||||
}
|
||||
device->n_perf_query_commands *= 2; /* Begin & End */
|
||||
device->n_perf_query_commands += 1; /* availability */
|
||||
|
||||
return;
|
||||
|
||||
err:
|
||||
ralloc_free(perf);
|
||||
}
|
||||
|
||||
void
|
||||
anv_device_perf_init(struct anv_device *device)
|
||||
{
|
||||
device->perf_fd = -1;
|
||||
}
|
||||
|
||||
static int
|
||||
anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
|
||||
{
|
||||
uint64_t properties[DRM_I915_PERF_PROP_MAX * 2];
|
||||
struct drm_i915_perf_open_param param;
|
||||
int p = 0, stream_fd;
|
||||
|
||||
properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA;
|
||||
properties[p++] = true;
|
||||
|
||||
properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET;
|
||||
properties[p++] = metric_id;
|
||||
|
||||
properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
|
||||
properties[p++] = device->info->ver >= 8 ?
|
||||
I915_OA_FORMAT_A32u40_A4u32_B8_C8 :
|
||||
I915_OA_FORMAT_A45_B8_C8;
|
||||
|
||||
properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
|
||||
properties[p++] = 31; /* slowest sampling period */
|
||||
|
||||
properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE;
|
||||
properties[p++] = device->context_id;
|
||||
|
||||
properties[p++] = DRM_I915_PERF_PROP_HOLD_PREEMPTION;
|
||||
properties[p++] = true;
|
||||
|
||||
/* If global SSEU is available, pin it to the default. This will ensure on
|
||||
* Gfx11 for instance we use the full EU array. Initially when perf was
|
||||
* enabled we would use only half on Gfx11 because of functional
|
||||
* requirements.
|
||||
*
|
||||
* Temporary disable this option on Gfx12.5+, kernel doesn't appear to
|
||||
* support it.
|
||||
*/
|
||||
if (intel_perf_has_global_sseu(device->physical->perf) &&
|
||||
device->info->verx10 < 125) {
|
||||
properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU;
|
||||
properties[p++] = (uintptr_t) &device->physical->perf->sseu;
|
||||
}
|
||||
|
||||
memset(¶m, 0, sizeof(param));
|
||||
param.flags = 0;
|
||||
param.flags |= I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK;
|
||||
param.properties_ptr = (uintptr_t)properties;
|
||||
param.num_properties = p / 2;
|
||||
|
||||
stream_fd = intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
|
||||
return stream_fd;
|
||||
}
|
||||
|
||||
/* VK_INTEL_performance_query */
|
||||
VkResult anv_InitializePerformanceApiINTEL(
|
||||
VkDevice _device,
|
||||
const VkInitializePerformanceApiInfoINTEL* pInitializeInfo)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
|
||||
if (!device->physical->perf)
|
||||
return VK_ERROR_EXTENSION_NOT_PRESENT;
|
||||
|
||||
/* Not much to do here */
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult anv_GetPerformanceParameterINTEL(
|
||||
VkDevice _device,
|
||||
VkPerformanceParameterTypeINTEL parameter,
|
||||
VkPerformanceValueINTEL* pValue)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
|
||||
if (!device->physical->perf)
|
||||
return VK_ERROR_EXTENSION_NOT_PRESENT;
|
||||
|
||||
VkResult result = VK_SUCCESS;
|
||||
switch (parameter) {
|
||||
case VK_PERFORMANCE_PARAMETER_TYPE_HW_COUNTERS_SUPPORTED_INTEL:
|
||||
pValue->type = VK_PERFORMANCE_VALUE_TYPE_BOOL_INTEL;
|
||||
pValue->data.valueBool = VK_TRUE;
|
||||
break;
|
||||
|
||||
case VK_PERFORMANCE_PARAMETER_TYPE_STREAM_MARKER_VALID_BITS_INTEL:
|
||||
pValue->type = VK_PERFORMANCE_VALUE_TYPE_UINT32_INTEL;
|
||||
pValue->data.value32 = 25;
|
||||
break;
|
||||
|
||||
default:
|
||||
result = VK_ERROR_FEATURE_NOT_PRESENT;
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
VkResult anv_CmdSetPerformanceMarkerINTEL(
|
||||
VkCommandBuffer commandBuffer,
|
||||
const VkPerformanceMarkerInfoINTEL* pMarkerInfo)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
|
||||
cmd_buffer->intel_perf_marker = pMarkerInfo->marker;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult anv_AcquirePerformanceConfigurationINTEL(
|
||||
VkDevice _device,
|
||||
const VkPerformanceConfigurationAcquireInfoINTEL* pAcquireInfo,
|
||||
VkPerformanceConfigurationINTEL* pConfiguration)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
struct anv_performance_configuration_intel *config;
|
||||
|
||||
config = vk_object_alloc(&device->vk, NULL, sizeof(*config),
|
||||
VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL);
|
||||
if (!config)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
|
||||
config->register_config =
|
||||
intel_perf_load_configuration(device->physical->perf, device->fd,
|
||||
INTEL_PERF_QUERY_GUID_MDAPI);
|
||||
if (!config->register_config) {
|
||||
vk_object_free(&device->vk, NULL, config);
|
||||
return VK_INCOMPLETE;
|
||||
}
|
||||
|
||||
int ret =
|
||||
intel_perf_store_configuration(device->physical->perf, device->fd,
|
||||
config->register_config, NULL /* guid */);
|
||||
if (ret < 0) {
|
||||
ralloc_free(config->register_config);
|
||||
vk_object_free(&device->vk, NULL, config);
|
||||
return VK_INCOMPLETE;
|
||||
}
|
||||
|
||||
config->config_id = ret;
|
||||
}
|
||||
|
||||
*pConfiguration = anv_performance_configuration_intel_to_handle(config);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult anv_ReleasePerformanceConfigurationINTEL(
|
||||
VkDevice _device,
|
||||
VkPerformanceConfigurationINTEL _configuration)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
|
||||
|
||||
if (!INTEL_DEBUG(DEBUG_NO_OACONFIG))
|
||||
intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config->config_id);
|
||||
|
||||
ralloc_free(config->register_config);
|
||||
|
||||
vk_object_free(&device->vk, NULL, config);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult anv_QueueSetPerformanceConfigurationINTEL(
|
||||
VkQueue _queue,
|
||||
VkPerformanceConfigurationINTEL _configuration)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_queue, queue, _queue);
|
||||
ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
|
||||
struct anv_device *device = queue->device;
|
||||
|
||||
if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
|
||||
if (device->perf_fd < 0) {
|
||||
device->perf_fd = anv_device_perf_open(device, config->config_id);
|
||||
if (device->perf_fd < 0)
|
||||
return VK_ERROR_INITIALIZATION_FAILED;
|
||||
} else {
|
||||
int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
|
||||
(void *)(uintptr_t) config->config_id);
|
||||
if (ret < 0)
|
||||
return vk_device_set_lost(&device->vk, "i915-perf config failed: %m");
|
||||
}
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void anv_UninitializePerformanceApiINTEL(
|
||||
VkDevice _device)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
|
||||
if (device->perf_fd >= 0) {
|
||||
close(device->perf_fd);
|
||||
device->perf_fd = -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* VK_KHR_performance_query */
|
||||
static const VkPerformanceCounterUnitKHR
|
||||
intel_perf_counter_unit_to_vk_unit[] = {
|
||||
[INTEL_PERF_COUNTER_UNITS_BYTES] = VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR,
|
||||
[INTEL_PERF_COUNTER_UNITS_HZ] = VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR,
|
||||
[INTEL_PERF_COUNTER_UNITS_NS] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR,
|
||||
[INTEL_PERF_COUNTER_UNITS_US] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR, /* todo */
|
||||
[INTEL_PERF_COUNTER_UNITS_PIXELS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
|
||||
[INTEL_PERF_COUNTER_UNITS_TEXELS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
|
||||
[INTEL_PERF_COUNTER_UNITS_THREADS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
|
||||
[INTEL_PERF_COUNTER_UNITS_PERCENT] = VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR,
|
||||
[INTEL_PERF_COUNTER_UNITS_MESSAGES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
|
||||
[INTEL_PERF_COUNTER_UNITS_NUMBER] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
|
||||
[INTEL_PERF_COUNTER_UNITS_CYCLES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
|
||||
[INTEL_PERF_COUNTER_UNITS_EVENTS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
|
||||
[INTEL_PERF_COUNTER_UNITS_UTILIZATION] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
|
||||
[INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
|
||||
[INTEL_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
|
||||
[INTEL_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
|
||||
[INTEL_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
|
||||
};
|
||||
|
||||
static const VkPerformanceCounterStorageKHR
|
||||
intel_perf_counter_data_type_to_vk_storage[] = {
|
||||
[INTEL_PERF_COUNTER_DATA_TYPE_BOOL32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
|
||||
[INTEL_PERF_COUNTER_DATA_TYPE_UINT32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
|
||||
[INTEL_PERF_COUNTER_DATA_TYPE_UINT64] = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR,
|
||||
[INTEL_PERF_COUNTER_DATA_TYPE_FLOAT] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR,
|
||||
[INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR,
|
||||
};
|
||||
|
||||
VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
uint32_t queueFamilyIndex,
|
||||
uint32_t* pCounterCount,
|
||||
VkPerformanceCounterKHR* pCounters,
|
||||
VkPerformanceCounterDescriptionKHR* pCounterDescriptions)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
|
||||
struct intel_perf_config *perf = pdevice->perf;
|
||||
|
||||
uint32_t desc_count = *pCounterCount;
|
||||
|
||||
VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterKHR, out, pCounters, pCounterCount);
|
||||
VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterDescriptionKHR, out_desc,
|
||||
pCounterDescriptions, &desc_count);
|
||||
|
||||
/* We cannot support performance queries on anything other than RCS,
|
||||
* because the MI_REPORT_PERF_COUNT command is not available on other
|
||||
* engines.
|
||||
*/
|
||||
struct anv_queue_family *queue_family =
|
||||
&pdevice->queue.families[queueFamilyIndex];
|
||||
if (queue_family->engine_class != I915_ENGINE_CLASS_RENDER)
|
||||
return vk_outarray_status(&out);
|
||||
|
||||
for (int c = 0; c < (perf ? perf->n_counters : 0); c++) {
|
||||
const struct intel_perf_query_counter *intel_counter = perf->counter_infos[c].counter;
|
||||
|
||||
vk_outarray_append_typed(VkPerformanceCounterKHR, &out, counter) {
|
||||
counter->unit = intel_perf_counter_unit_to_vk_unit[intel_counter->units];
|
||||
counter->scope = VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_KHR;
|
||||
counter->storage = intel_perf_counter_data_type_to_vk_storage[intel_counter->data_type];
|
||||
|
||||
unsigned char sha1_result[20];
|
||||
_mesa_sha1_compute(intel_counter->symbol_name,
|
||||
strlen(intel_counter->symbol_name),
|
||||
sha1_result);
|
||||
memcpy(counter->uuid, sha1_result, sizeof(counter->uuid));
|
||||
}
|
||||
|
||||
vk_outarray_append_typed(VkPerformanceCounterDescriptionKHR, &out_desc, desc) {
|
||||
desc->flags = 0; /* None so far. */
|
||||
snprintf(desc->name, sizeof(desc->name), "%s", intel_counter->name);
|
||||
snprintf(desc->category, sizeof(desc->category), "%s", intel_counter->category);
|
||||
snprintf(desc->description, sizeof(desc->description), "%s", intel_counter->desc);
|
||||
}
|
||||
}
|
||||
|
||||
return vk_outarray_status(&out);
|
||||
}
|
||||
|
||||
void anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
const VkQueryPoolPerformanceCreateInfoKHR* pPerformanceQueryCreateInfo,
|
||||
uint32_t* pNumPasses)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
|
||||
struct intel_perf_config *perf = pdevice->perf;
|
||||
|
||||
if (!perf) {
|
||||
*pNumPasses = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
*pNumPasses = intel_perf_get_n_passes(perf,
|
||||
pPerformanceQueryCreateInfo->pCounterIndices,
|
||||
pPerformanceQueryCreateInfo->counterIndexCount,
|
||||
NULL);
|
||||
}
|
||||
|
||||
VkResult anv_AcquireProfilingLockKHR(
|
||||
VkDevice _device,
|
||||
const VkAcquireProfilingLockInfoKHR* pInfo)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
struct intel_perf_config *perf = device->physical->perf;
|
||||
struct intel_perf_query_info *first_metric_set = &perf->queries[0];
|
||||
int fd = -1;
|
||||
|
||||
assert(device->perf_fd == -1);
|
||||
|
||||
if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
|
||||
fd = anv_device_perf_open(device, first_metric_set->oa_metrics_set_id);
|
||||
if (fd < 0)
|
||||
return VK_TIMEOUT;
|
||||
}
|
||||
|
||||
device->perf_fd = fd;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void anv_ReleaseProfilingLockKHR(
|
||||
VkDevice _device)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
|
||||
if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
|
||||
assert(device->perf_fd >= 0);
|
||||
close(device->perf_fd);
|
||||
}
|
||||
device->perf_fd = -1;
|
||||
}
|
||||
|
||||
void
|
||||
anv_perf_write_pass_results(struct intel_perf_config *perf,
|
||||
struct anv_query_pool *pool, uint32_t pass,
|
||||
const struct intel_perf_query_result *accumulated_results,
|
||||
union VkPerformanceCounterResultKHR *results)
|
||||
{
|
||||
for (uint32_t c = 0; c < pool->n_counters; c++) {
|
||||
const struct intel_perf_counter_pass *counter_pass = &pool->counter_pass[c];
|
||||
|
||||
if (counter_pass->pass != pass)
|
||||
continue;
|
||||
|
||||
switch (pool->pass_query[pass]->kind) {
|
||||
case INTEL_PERF_QUERY_TYPE_PIPELINE: {
|
||||
assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);
|
||||
uint32_t accu_offset = counter_pass->counter->offset / sizeof(uint64_t);
|
||||
results[c].uint64 = accumulated_results->accumulator[accu_offset];
|
||||
break;
|
||||
}
|
||||
|
||||
case INTEL_PERF_QUERY_TYPE_OA:
|
||||
case INTEL_PERF_QUERY_TYPE_RAW:
|
||||
switch (counter_pass->counter->data_type) {
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
|
||||
results[c].uint64 =
|
||||
counter_pass->counter->oa_counter_read_uint64(perf,
|
||||
counter_pass->query,
|
||||
accumulated_results);
|
||||
break;
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
|
||||
results[c].float32 =
|
||||
counter_pass->counter->oa_counter_read_float(perf,
|
||||
counter_pass->query,
|
||||
accumulated_results);
|
||||
break;
|
||||
default:
|
||||
/* So far we aren't using uint32, double or bool32... */
|
||||
unreachable("unexpected counter data type");
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("invalid query type");
|
||||
}
|
||||
|
||||
/* The Vulkan extension only has nanoseconds as a unit */
|
||||
if (counter_pass->counter->units == INTEL_PERF_COUNTER_UNITS_US) {
|
||||
assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);
|
||||
results[c].uint64 *= 1000;
|
||||
}
|
||||
}
|
||||
}
|
3300
src/intel/vulkan_hasvk/anv_pipeline.c
Normal file
3300
src/intel/vulkan_hasvk/anv_pipeline.c
Normal file
File diff suppressed because it is too large
Load Diff
380
src/intel/vulkan_hasvk/anv_pipeline_cache.c
Normal file
380
src/intel/vulkan_hasvk/anv_pipeline_cache.c
Normal file
@ -0,0 +1,380 @@
|
||||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "util/blob.h"
|
||||
#include "util/hash_table.h"
|
||||
#include "util/debug.h"
|
||||
#include "util/disk_cache.h"
|
||||
#include "util/mesa-sha1.h"
|
||||
#include "nir/nir_serialize.h"
|
||||
#include "anv_private.h"
|
||||
#include "nir/nir_xfb_info.h"
|
||||
#include "vulkan/util/vk_util.h"
|
||||
|
||||
static bool
|
||||
anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,
|
||||
struct blob *blob);
|
||||
|
||||
struct vk_pipeline_cache_object *
|
||||
anv_shader_bin_deserialize(struct vk_device *device,
|
||||
const void *key_data, size_t key_size,
|
||||
struct blob_reader *blob);
|
||||
|
||||
static void
|
||||
anv_shader_bin_destroy(struct vk_pipeline_cache_object *object)
|
||||
{
|
||||
struct anv_device *device =
|
||||
container_of(object->device, struct anv_device, vk);
|
||||
struct anv_shader_bin *shader =
|
||||
container_of(object, struct anv_shader_bin, base);
|
||||
|
||||
anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
|
||||
vk_pipeline_cache_object_finish(&shader->base);
|
||||
vk_free(&device->vk.alloc, shader);
|
||||
}
|
||||
|
||||
static const struct vk_pipeline_cache_object_ops anv_shader_bin_ops = {
|
||||
.serialize = anv_shader_bin_serialize,
|
||||
.deserialize = anv_shader_bin_deserialize,
|
||||
.destroy = anv_shader_bin_destroy,
|
||||
};
|
||||
|
||||
const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2] = {
|
||||
&anv_shader_bin_ops,
|
||||
NULL
|
||||
};
|
||||
|
||||
struct anv_shader_bin *
|
||||
anv_shader_bin_create(struct anv_device *device,
|
||||
gl_shader_stage stage,
|
||||
const void *key_data, uint32_t key_size,
|
||||
const void *kernel_data, uint32_t kernel_size,
|
||||
const struct brw_stage_prog_data *prog_data_in,
|
||||
uint32_t prog_data_size,
|
||||
const struct brw_compile_stats *stats, uint32_t num_stats,
|
||||
const nir_xfb_info *xfb_info_in,
|
||||
const struct anv_pipeline_bind_map *bind_map)
|
||||
{
|
||||
VK_MULTIALLOC(ma);
|
||||
VK_MULTIALLOC_DECL(&ma, struct anv_shader_bin, shader, 1);
|
||||
VK_MULTIALLOC_DECL_SIZE(&ma, void, obj_key_data, key_size);
|
||||
VK_MULTIALLOC_DECL_SIZE(&ma, struct brw_stage_prog_data, prog_data,
|
||||
prog_data_size);
|
||||
VK_MULTIALLOC_DECL(&ma, struct brw_shader_reloc, prog_data_relocs,
|
||||
prog_data_in->num_relocs);
|
||||
VK_MULTIALLOC_DECL(&ma, uint32_t, prog_data_param, prog_data_in->nr_params);
|
||||
|
||||
VK_MULTIALLOC_DECL_SIZE(&ma, nir_xfb_info, xfb_info,
|
||||
xfb_info_in == NULL ? 0 :
|
||||
nir_xfb_info_size(xfb_info_in->output_count));
|
||||
|
||||
VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, surface_to_descriptor,
|
||||
bind_map->surface_count);
|
||||
VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, sampler_to_descriptor,
|
||||
bind_map->sampler_count);
|
||||
|
||||
if (!vk_multialloc_alloc(&ma, &device->vk.alloc,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
|
||||
return NULL;
|
||||
|
||||
memcpy(obj_key_data, key_data, key_size);
|
||||
vk_pipeline_cache_object_init(&device->vk, &shader->base,
|
||||
&anv_shader_bin_ops, obj_key_data, key_size);
|
||||
|
||||
shader->stage = stage;
|
||||
|
||||
shader->kernel =
|
||||
anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
|
||||
memcpy(shader->kernel.map, kernel_data, kernel_size);
|
||||
shader->kernel_size = kernel_size;
|
||||
|
||||
uint64_t shader_data_addr = INSTRUCTION_STATE_POOL_MIN_ADDRESS +
|
||||
shader->kernel.offset +
|
||||
prog_data_in->const_data_offset;
|
||||
|
||||
int rv_count = 0;
|
||||
struct brw_shader_reloc_value reloc_values[5];
|
||||
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
|
||||
.id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW,
|
||||
.value = shader_data_addr,
|
||||
};
|
||||
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
|
||||
.id = BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH,
|
||||
.value = shader_data_addr >> 32,
|
||||
};
|
||||
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
|
||||
.id = BRW_SHADER_RELOC_SHADER_START_OFFSET,
|
||||
.value = shader->kernel.offset,
|
||||
};
|
||||
if (brw_shader_stage_is_bindless(stage)) {
|
||||
const struct brw_bs_prog_data *bs_prog_data =
|
||||
brw_bs_prog_data_const(prog_data_in);
|
||||
uint64_t resume_sbt_addr = INSTRUCTION_STATE_POOL_MIN_ADDRESS +
|
||||
shader->kernel.offset +
|
||||
bs_prog_data->resume_sbt_offset;
|
||||
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
|
||||
.id = BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW,
|
||||
.value = resume_sbt_addr,
|
||||
};
|
||||
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
|
||||
.id = BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH,
|
||||
.value = resume_sbt_addr >> 32,
|
||||
};
|
||||
}
|
||||
|
||||
brw_write_shader_relocs(&device->physical->compiler->isa,
|
||||
shader->kernel.map, prog_data_in,
|
||||
reloc_values, rv_count);
|
||||
|
||||
memcpy(prog_data, prog_data_in, prog_data_size);
|
||||
typed_memcpy(prog_data_relocs, prog_data_in->relocs,
|
||||
prog_data_in->num_relocs);
|
||||
prog_data->relocs = prog_data_relocs;
|
||||
memset(prog_data_param, 0,
|
||||
prog_data->nr_params * sizeof(*prog_data_param));
|
||||
prog_data->param = prog_data_param;
|
||||
shader->prog_data = prog_data;
|
||||
shader->prog_data_size = prog_data_size;
|
||||
|
||||
assert(num_stats <= ARRAY_SIZE(shader->stats));
|
||||
typed_memcpy(shader->stats, stats, num_stats);
|
||||
shader->num_stats = num_stats;
|
||||
|
||||
if (xfb_info_in) {
|
||||
*xfb_info = *xfb_info_in;
|
||||
typed_memcpy(xfb_info->outputs, xfb_info_in->outputs,
|
||||
xfb_info_in->output_count);
|
||||
shader->xfb_info = xfb_info;
|
||||
} else {
|
||||
shader->xfb_info = NULL;
|
||||
}
|
||||
|
||||
shader->bind_map = *bind_map;
|
||||
typed_memcpy(surface_to_descriptor, bind_map->surface_to_descriptor,
|
||||
bind_map->surface_count);
|
||||
shader->bind_map.surface_to_descriptor = surface_to_descriptor;
|
||||
typed_memcpy(sampler_to_descriptor, bind_map->sampler_to_descriptor,
|
||||
bind_map->sampler_count);
|
||||
shader->bind_map.sampler_to_descriptor = sampler_to_descriptor;
|
||||
|
||||
return shader;
|
||||
}
|
||||
|
||||
static bool
|
||||
anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,
|
||||
struct blob *blob)
|
||||
{
|
||||
struct anv_shader_bin *shader =
|
||||
container_of(object, struct anv_shader_bin, base);
|
||||
|
||||
blob_write_uint32(blob, shader->stage);
|
||||
|
||||
blob_write_uint32(blob, shader->kernel_size);
|
||||
blob_write_bytes(blob, shader->kernel.map, shader->kernel_size);
|
||||
|
||||
blob_write_uint32(blob, shader->prog_data_size);
|
||||
blob_write_bytes(blob, shader->prog_data, shader->prog_data_size);
|
||||
blob_write_bytes(blob, shader->prog_data->relocs,
|
||||
shader->prog_data->num_relocs *
|
||||
sizeof(shader->prog_data->relocs[0]));
|
||||
|
||||
blob_write_uint32(blob, shader->num_stats);
|
||||
blob_write_bytes(blob, shader->stats,
|
||||
shader->num_stats * sizeof(shader->stats[0]));
|
||||
|
||||
if (shader->xfb_info) {
|
||||
uint32_t xfb_info_size =
|
||||
nir_xfb_info_size(shader->xfb_info->output_count);
|
||||
blob_write_uint32(blob, xfb_info_size);
|
||||
blob_write_bytes(blob, shader->xfb_info, xfb_info_size);
|
||||
} else {
|
||||
blob_write_uint32(blob, 0);
|
||||
}
|
||||
|
||||
blob_write_bytes(blob, shader->bind_map.surface_sha1,
|
||||
sizeof(shader->bind_map.surface_sha1));
|
||||
blob_write_bytes(blob, shader->bind_map.sampler_sha1,
|
||||
sizeof(shader->bind_map.sampler_sha1));
|
||||
blob_write_bytes(blob, shader->bind_map.push_sha1,
|
||||
sizeof(shader->bind_map.push_sha1));
|
||||
blob_write_uint32(blob, shader->bind_map.surface_count);
|
||||
blob_write_uint32(blob, shader->bind_map.sampler_count);
|
||||
blob_write_bytes(blob, shader->bind_map.surface_to_descriptor,
|
||||
shader->bind_map.surface_count *
|
||||
sizeof(*shader->bind_map.surface_to_descriptor));
|
||||
blob_write_bytes(blob, shader->bind_map.sampler_to_descriptor,
|
||||
shader->bind_map.sampler_count *
|
||||
sizeof(*shader->bind_map.sampler_to_descriptor));
|
||||
blob_write_bytes(blob, shader->bind_map.push_ranges,
|
||||
sizeof(shader->bind_map.push_ranges));
|
||||
|
||||
return !blob->out_of_memory;
|
||||
}
|
||||
|
||||
struct vk_pipeline_cache_object *
|
||||
anv_shader_bin_deserialize(struct vk_device *vk_device,
|
||||
const void *key_data, size_t key_size,
|
||||
struct blob_reader *blob)
|
||||
{
|
||||
struct anv_device *device =
|
||||
container_of(vk_device, struct anv_device, vk);
|
||||
|
||||
gl_shader_stage stage = blob_read_uint32(blob);
|
||||
|
||||
uint32_t kernel_size = blob_read_uint32(blob);
|
||||
const void *kernel_data = blob_read_bytes(blob, kernel_size);
|
||||
|
||||
uint32_t prog_data_size = blob_read_uint32(blob);
|
||||
const void *prog_data_bytes = blob_read_bytes(blob, prog_data_size);
|
||||
if (blob->overrun)
|
||||
return NULL;
|
||||
|
||||
union brw_any_prog_data prog_data;
|
||||
memcpy(&prog_data, prog_data_bytes,
|
||||
MIN2(sizeof(prog_data), prog_data_size));
|
||||
prog_data.base.relocs =
|
||||
blob_read_bytes(blob, prog_data.base.num_relocs *
|
||||
sizeof(prog_data.base.relocs[0]));
|
||||
|
||||
uint32_t num_stats = blob_read_uint32(blob);
|
||||
const struct brw_compile_stats *stats =
|
||||
blob_read_bytes(blob, num_stats * sizeof(stats[0]));
|
||||
|
||||
const nir_xfb_info *xfb_info = NULL;
|
||||
uint32_t xfb_size = blob_read_uint32(blob);
|
||||
if (xfb_size)
|
||||
xfb_info = blob_read_bytes(blob, xfb_size);
|
||||
|
||||
struct anv_pipeline_bind_map bind_map;
|
||||
blob_copy_bytes(blob, bind_map.surface_sha1, sizeof(bind_map.surface_sha1));
|
||||
blob_copy_bytes(blob, bind_map.sampler_sha1, sizeof(bind_map.sampler_sha1));
|
||||
blob_copy_bytes(blob, bind_map.push_sha1, sizeof(bind_map.push_sha1));
|
||||
bind_map.surface_count = blob_read_uint32(blob);
|
||||
bind_map.sampler_count = blob_read_uint32(blob);
|
||||
bind_map.surface_to_descriptor = (void *)
|
||||
blob_read_bytes(blob, bind_map.surface_count *
|
||||
sizeof(*bind_map.surface_to_descriptor));
|
||||
bind_map.sampler_to_descriptor = (void *)
|
||||
blob_read_bytes(blob, bind_map.sampler_count *
|
||||
sizeof(*bind_map.sampler_to_descriptor));
|
||||
blob_copy_bytes(blob, bind_map.push_ranges, sizeof(bind_map.push_ranges));
|
||||
|
||||
if (blob->overrun)
|
||||
return NULL;
|
||||
|
||||
struct anv_shader_bin *shader =
|
||||
anv_shader_bin_create(device, stage,
|
||||
key_data, key_size,
|
||||
kernel_data, kernel_size,
|
||||
&prog_data.base, prog_data_size,
|
||||
stats, num_stats, xfb_info, &bind_map);
|
||||
if (shader == NULL)
|
||||
return NULL;
|
||||
|
||||
return &shader->base;
|
||||
}
|
||||
|
||||
struct anv_shader_bin *
|
||||
anv_device_search_for_kernel(struct anv_device *device,
|
||||
struct vk_pipeline_cache *cache,
|
||||
const void *key_data, uint32_t key_size,
|
||||
bool *user_cache_hit)
|
||||
{
|
||||
/* Use the default pipeline cache if none is specified */
|
||||
if (cache == NULL)
|
||||
cache = device->default_pipeline_cache;
|
||||
|
||||
bool cache_hit = false;
|
||||
struct vk_pipeline_cache_object *object =
|
||||
vk_pipeline_cache_lookup_object(cache, key_data, key_size,
|
||||
&anv_shader_bin_ops, &cache_hit);
|
||||
if (user_cache_hit != NULL) {
|
||||
*user_cache_hit = object != NULL && cache_hit &&
|
||||
cache != device->default_pipeline_cache;
|
||||
}
|
||||
if (object == NULL)
|
||||
return NULL;
|
||||
|
||||
return container_of(object, struct anv_shader_bin, base);
|
||||
}
|
||||
|
||||
struct anv_shader_bin *
|
||||
anv_device_upload_kernel(struct anv_device *device,
|
||||
struct vk_pipeline_cache *cache,
|
||||
gl_shader_stage stage,
|
||||
const void *key_data, uint32_t key_size,
|
||||
const void *kernel_data, uint32_t kernel_size,
|
||||
const struct brw_stage_prog_data *prog_data,
|
||||
uint32_t prog_data_size,
|
||||
const struct brw_compile_stats *stats,
|
||||
uint32_t num_stats,
|
||||
const nir_xfb_info *xfb_info,
|
||||
const struct anv_pipeline_bind_map *bind_map)
|
||||
{
|
||||
/* Use the default pipeline cache if none is specified */
|
||||
if (cache == NULL)
|
||||
cache = device->default_pipeline_cache;
|
||||
|
||||
struct anv_shader_bin *shader =
|
||||
anv_shader_bin_create(device, stage,
|
||||
key_data, key_size,
|
||||
kernel_data, kernel_size,
|
||||
prog_data, prog_data_size,
|
||||
stats, num_stats,
|
||||
xfb_info, bind_map);
|
||||
if (shader == NULL)
|
||||
return NULL;
|
||||
|
||||
struct vk_pipeline_cache_object *cached =
|
||||
vk_pipeline_cache_add_object(cache, &shader->base);
|
||||
|
||||
return container_of(cached, struct anv_shader_bin, base);
|
||||
}
|
||||
|
||||
#define SHA1_KEY_SIZE 20
|
||||
|
||||
struct nir_shader *
|
||||
anv_device_search_for_nir(struct anv_device *device,
|
||||
struct vk_pipeline_cache *cache,
|
||||
const nir_shader_compiler_options *nir_options,
|
||||
unsigned char sha1_key[SHA1_KEY_SIZE],
|
||||
void *mem_ctx)
|
||||
{
|
||||
if (cache == NULL)
|
||||
cache = device->default_pipeline_cache;
|
||||
|
||||
return vk_pipeline_cache_lookup_nir(cache, sha1_key, SHA1_KEY_SIZE,
|
||||
nir_options, NULL, mem_ctx);
|
||||
}
|
||||
|
||||
void
|
||||
anv_device_upload_nir(struct anv_device *device,
|
||||
struct vk_pipeline_cache *cache,
|
||||
const struct nir_shader *nir,
|
||||
unsigned char sha1_key[SHA1_KEY_SIZE])
|
||||
{
|
||||
if (cache == NULL)
|
||||
cache = device->default_pipeline_cache;
|
||||
|
||||
vk_pipeline_cache_add_nir(cache, sha1_key, SHA1_KEY_SIZE, nir);
|
||||
}
|
4303
src/intel/vulkan_hasvk/anv_private.h
Normal file
4303
src/intel/vulkan_hasvk/anv_private.h
Normal file
File diff suppressed because it is too large
Load Diff
75
src/intel/vulkan_hasvk/anv_queue.c
Normal file
75
src/intel/vulkan_hasvk/anv_queue.c
Normal file
@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This file implements VkQueue
|
||||
*/
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
VkResult
|
||||
anv_queue_init(struct anv_device *device, struct anv_queue *queue,
|
||||
uint32_t exec_flags,
|
||||
const VkDeviceQueueCreateInfo *pCreateInfo,
|
||||
uint32_t index_in_family)
|
||||
{
|
||||
struct anv_physical_device *pdevice = device->physical;
|
||||
VkResult result;
|
||||
|
||||
result = vk_queue_init(&queue->vk, &device->vk, pCreateInfo,
|
||||
index_in_family);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_SYNC)) {
|
||||
result = vk_sync_create(&device->vk,
|
||||
&device->physical->sync_syncobj_type,
|
||||
0, 0, &queue->sync);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_queue_finish(&queue->vk);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
queue->vk.driver_submit = anv_queue_submit;
|
||||
|
||||
queue->device = device;
|
||||
|
||||
assert(queue->vk.queue_family_index < pdevice->queue.family_count);
|
||||
queue->family = &pdevice->queue.families[queue->vk.queue_family_index];
|
||||
|
||||
queue->index_in_family = index_in_family;
|
||||
|
||||
queue->exec_flags = exec_flags;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
anv_queue_finish(struct anv_queue *queue)
|
||||
{
|
||||
if (queue->sync)
|
||||
vk_sync_destroy(&queue->device->vk, queue->sync);
|
||||
|
||||
vk_queue_finish(&queue->vk);
|
||||
}
|
92
src/intel/vulkan_hasvk/anv_util.c
Normal file
92
src/intel/vulkan_hasvk/anv_util.c
Normal file
@ -0,0 +1,92 @@
|
||||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
#include "vk_enum_to_str.h"
|
||||
|
||||
void
|
||||
__anv_perf_warn(struct anv_device *device,
|
||||
const struct vk_object_base *object,
|
||||
const char *file, int line, const char *format, ...)
|
||||
{
|
||||
va_list ap;
|
||||
char buffer[256];
|
||||
|
||||
va_start(ap, format);
|
||||
vsnprintf(buffer, sizeof(buffer), format, ap);
|
||||
va_end(ap);
|
||||
|
||||
if (object) {
|
||||
__vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT,
|
||||
VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
|
||||
VK_LOG_OBJS(object), file, line,
|
||||
"PERF: %s", buffer);
|
||||
} else {
|
||||
__vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT,
|
||||
VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
|
||||
VK_LOG_NO_OBJS(device->physical->instance), file, line,
|
||||
"PERF: %s", buffer);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
anv_dump_pipe_bits(enum anv_pipe_bits bits)
|
||||
{
|
||||
if (bits & ANV_PIPE_DEPTH_CACHE_FLUSH_BIT)
|
||||
fputs("+depth_flush ", stderr);
|
||||
if (bits & ANV_PIPE_DATA_CACHE_FLUSH_BIT)
|
||||
fputs("+dc_flush ", stderr);
|
||||
if (bits & ANV_PIPE_HDC_PIPELINE_FLUSH_BIT)
|
||||
fputs("+hdc_flush ", stderr);
|
||||
if (bits & ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT)
|
||||
fputs("+rt_flush ", stderr);
|
||||
if (bits & ANV_PIPE_TILE_CACHE_FLUSH_BIT)
|
||||
fputs("+tile_flush ", stderr);
|
||||
if (bits & ANV_PIPE_STATE_CACHE_INVALIDATE_BIT)
|
||||
fputs("+state_inval ", stderr);
|
||||
if (bits & ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT)
|
||||
fputs("+const_inval ", stderr);
|
||||
if (bits & ANV_PIPE_VF_CACHE_INVALIDATE_BIT)
|
||||
fputs("+vf_inval ", stderr);
|
||||
if (bits & ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT)
|
||||
fputs("+tex_inval ", stderr);
|
||||
if (bits & ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT)
|
||||
fputs("+ic_inval ", stderr);
|
||||
if (bits & ANV_PIPE_STALL_AT_SCOREBOARD_BIT)
|
||||
fputs("+pb_stall ", stderr);
|
||||
if (bits & ANV_PIPE_PSS_STALL_SYNC_BIT)
|
||||
fputs("+pss_stall ", stderr);
|
||||
if (bits & ANV_PIPE_DEPTH_STALL_BIT)
|
||||
fputs("+depth_stall ", stderr);
|
||||
if (bits & ANV_PIPE_CS_STALL_BIT)
|
||||
fputs("+cs_stall ", stderr);
|
||||
if (bits & ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT)
|
||||
fputs("+utdp_flush", stderr);
|
||||
}
|
346
src/intel/vulkan_hasvk/anv_utrace.c
Normal file
346
src/intel/vulkan_hasvk/anv_utrace.c
Normal file
@ -0,0 +1,346 @@
|
||||
/*
|
||||
* Copyright © 2021 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
#include "perf/intel_perf.h"
|
||||
|
||||
static uint32_t
|
||||
command_buffers_count_utraces(struct anv_device *device,
|
||||
uint32_t cmd_buffer_count,
|
||||
struct anv_cmd_buffer **cmd_buffers,
|
||||
uint32_t *utrace_copies)
|
||||
{
|
||||
if (!u_trace_context_actively_tracing(&device->ds.trace_context))
|
||||
return 0;
|
||||
|
||||
uint32_t utraces = 0;
|
||||
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
|
||||
if (u_trace_has_points(&cmd_buffers[i]->trace)) {
|
||||
utraces++;
|
||||
if (!(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT))
|
||||
*utrace_copies += list_length(&cmd_buffers[i]->trace.trace_chunks);
|
||||
}
|
||||
}
|
||||
|
||||
return utraces;
|
||||
}
|
||||
|
||||
static void
|
||||
anv_utrace_delete_flush_data(struct u_trace_context *utctx,
|
||||
void *flush_data)
|
||||
{
|
||||
struct anv_device *device =
|
||||
container_of(utctx, struct anv_device, ds.trace_context);
|
||||
struct anv_utrace_flush_copy *flush = flush_data;
|
||||
|
||||
intel_ds_flush_data_fini(&flush->ds);
|
||||
|
||||
if (flush->trace_bo) {
|
||||
assert(flush->batch_bo);
|
||||
anv_reloc_list_finish(&flush->relocs, &device->vk.alloc);
|
||||
anv_device_release_bo(device, flush->batch_bo);
|
||||
anv_device_release_bo(device, flush->trace_bo);
|
||||
}
|
||||
|
||||
vk_sync_destroy(&device->vk, flush->sync);
|
||||
|
||||
vk_free(&device->vk.alloc, flush);
|
||||
}
|
||||
|
||||
static void
|
||||
anv_device_utrace_emit_copy_ts_buffer(struct u_trace_context *utctx,
|
||||
void *cmdstream,
|
||||
void *ts_from, uint32_t from_offset,
|
||||
void *ts_to, uint32_t to_offset,
|
||||
uint32_t count)
|
||||
{
|
||||
struct anv_device *device =
|
||||
container_of(utctx, struct anv_device, ds.trace_context);
|
||||
struct anv_utrace_flush_copy *flush = cmdstream;
|
||||
struct anv_address from_addr = (struct anv_address) {
|
||||
.bo = ts_from, .offset = from_offset * sizeof(uint64_t) };
|
||||
struct anv_address to_addr = (struct anv_address) {
|
||||
.bo = ts_to, .offset = to_offset * sizeof(uint64_t) };
|
||||
|
||||
anv_genX(device->info, emit_so_memcpy)(&flush->memcpy_state,
|
||||
to_addr, from_addr, count * sizeof(uint64_t));
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
|
||||
uint32_t cmd_buffer_count,
|
||||
struct anv_cmd_buffer **cmd_buffers,
|
||||
struct anv_utrace_flush_copy **out_flush_data)
|
||||
{
|
||||
struct anv_device *device = queue->device;
|
||||
uint32_t utrace_copies = 0;
|
||||
uint32_t utraces = command_buffers_count_utraces(device,
|
||||
cmd_buffer_count,
|
||||
cmd_buffers,
|
||||
&utrace_copies);
|
||||
if (!utraces) {
|
||||
*out_flush_data = NULL;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult result;
|
||||
struct anv_utrace_flush_copy *flush =
|
||||
vk_zalloc(&device->vk.alloc, sizeof(struct anv_utrace_flush_copy),
|
||||
8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
if (!flush)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
intel_ds_flush_data_init(&flush->ds, queue->ds, queue->ds->submission_id);
|
||||
|
||||
result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type,
|
||||
0, 0, &flush->sync);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error_sync;
|
||||
|
||||
if (utrace_copies > 0) {
|
||||
result = anv_bo_pool_alloc(&device->utrace_bo_pool,
|
||||
utrace_copies * 4096,
|
||||
&flush->trace_bo);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error_trace_buf;
|
||||
|
||||
result = anv_bo_pool_alloc(&device->utrace_bo_pool,
|
||||
/* 128 dwords of setup + 64 dwords per copy */
|
||||
align_u32(512 + 64 * utrace_copies, 4096),
|
||||
&flush->batch_bo);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error_batch_buf;
|
||||
|
||||
result = anv_reloc_list_init(&flush->relocs, &device->vk.alloc);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error_reloc_list;
|
||||
|
||||
flush->batch.alloc = &device->vk.alloc;
|
||||
flush->batch.relocs = &flush->relocs;
|
||||
anv_batch_set_storage(&flush->batch,
|
||||
(struct anv_address) { .bo = flush->batch_bo, },
|
||||
flush->batch_bo->map, flush->batch_bo->size);
|
||||
|
||||
/* Emit the copies */
|
||||
anv_genX(device->info, emit_so_memcpy_init)(&flush->memcpy_state,
|
||||
device,
|
||||
&flush->batch);
|
||||
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
|
||||
if (cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) {
|
||||
u_trace_flush(&cmd_buffers[i]->trace, flush, false);
|
||||
} else {
|
||||
u_trace_clone_append(u_trace_begin_iterator(&cmd_buffers[i]->trace),
|
||||
u_trace_end_iterator(&cmd_buffers[i]->trace),
|
||||
&flush->ds.trace,
|
||||
flush,
|
||||
anv_device_utrace_emit_copy_ts_buffer);
|
||||
}
|
||||
}
|
||||
anv_genX(device->info, emit_so_memcpy_fini)(&flush->memcpy_state);
|
||||
|
||||
u_trace_flush(&flush->ds.trace, flush, true);
|
||||
|
||||
if (flush->batch.status != VK_SUCCESS) {
|
||||
result = flush->batch.status;
|
||||
goto error_batch;
|
||||
}
|
||||
} else {
|
||||
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
|
||||
assert(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT);
|
||||
u_trace_flush(&cmd_buffers[i]->trace, flush, i == (cmd_buffer_count - 1));
|
||||
}
|
||||
}
|
||||
|
||||
flush->queue = queue;
|
||||
|
||||
*out_flush_data = flush;
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
error_batch:
|
||||
anv_reloc_list_finish(&flush->relocs, &device->vk.alloc);
|
||||
error_reloc_list:
|
||||
anv_bo_pool_free(&device->utrace_bo_pool, flush->batch_bo);
|
||||
error_batch_buf:
|
||||
anv_bo_pool_free(&device->utrace_bo_pool, flush->trace_bo);
|
||||
error_trace_buf:
|
||||
vk_sync_destroy(&device->vk, flush->sync);
|
||||
error_sync:
|
||||
vk_free(&device->vk.alloc, flush);
|
||||
return result;
|
||||
}
|
||||
|
||||
static void *
|
||||
anv_utrace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size_b)
|
||||
{
|
||||
struct anv_device *device =
|
||||
container_of(utctx, struct anv_device, ds.trace_context);
|
||||
|
||||
struct anv_bo *bo = NULL;
|
||||
UNUSED VkResult result =
|
||||
anv_bo_pool_alloc(&device->utrace_bo_pool,
|
||||
align_u32(size_b, 4096),
|
||||
&bo);
|
||||
assert(result == VK_SUCCESS);
|
||||
|
||||
return bo;
|
||||
}
|
||||
|
||||
static void
|
||||
anv_utrace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
|
||||
{
|
||||
struct anv_device *device =
|
||||
container_of(utctx, struct anv_device, ds.trace_context);
|
||||
struct anv_bo *bo = timestamps;
|
||||
|
||||
anv_bo_pool_free(&device->utrace_bo_pool, bo);
|
||||
}
|
||||
|
||||
static void
|
||||
anv_utrace_record_ts(struct u_trace *ut, void *cs,
|
||||
void *timestamps, unsigned idx,
|
||||
bool end_of_pipe)
|
||||
{
|
||||
struct anv_cmd_buffer *cmd_buffer =
|
||||
container_of(ut, struct anv_cmd_buffer, trace);
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
struct anv_bo *bo = timestamps;
|
||||
|
||||
device->physical->cmd_emit_timestamp(&cmd_buffer->batch, device,
|
||||
(struct anv_address) {
|
||||
.bo = bo,
|
||||
.offset = idx * sizeof(uint64_t) },
|
||||
end_of_pipe);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
anv_utrace_read_ts(struct u_trace_context *utctx,
|
||||
void *timestamps, unsigned idx, void *flush_data)
|
||||
{
|
||||
struct anv_device *device =
|
||||
container_of(utctx, struct anv_device, ds.trace_context);
|
||||
struct anv_bo *bo = timestamps;
|
||||
struct anv_utrace_flush_copy *flush = flush_data;
|
||||
|
||||
/* Only need to stall on results for the first entry: */
|
||||
if (idx == 0) {
|
||||
UNUSED VkResult result =
|
||||
vk_sync_wait(&device->vk,
|
||||
flush->sync,
|
||||
0,
|
||||
VK_SYNC_WAIT_COMPLETE,
|
||||
os_time_get_absolute_timeout(OS_TIMEOUT_INFINITE));
|
||||
assert(result == VK_SUCCESS);
|
||||
}
|
||||
|
||||
uint64_t *ts = bo->map;
|
||||
|
||||
/* Don't translate the no-timestamp marker: */
|
||||
if (ts[idx] == U_TRACE_NO_TIMESTAMP)
|
||||
return U_TRACE_NO_TIMESTAMP;
|
||||
|
||||
return intel_device_info_timebase_scale(device->info, ts[idx]);
|
||||
}
|
||||
|
||||
static const char *
|
||||
queue_family_to_name(const struct anv_queue_family *family)
|
||||
{
|
||||
switch (family->engine_class) {
|
||||
case I915_ENGINE_CLASS_RENDER:
|
||||
return "render";
|
||||
case I915_ENGINE_CLASS_COPY:
|
||||
return "copy";
|
||||
case I915_ENGINE_CLASS_VIDEO:
|
||||
return "video";
|
||||
case I915_ENGINE_CLASS_VIDEO_ENHANCE:
|
||||
return "video-enh";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
anv_device_utrace_init(struct anv_device *device)
|
||||
{
|
||||
anv_bo_pool_init(&device->utrace_bo_pool, device, "utrace");
|
||||
intel_ds_device_init(&device->ds, device->info, device->fd,
|
||||
device->physical->local_minor - 128,
|
||||
INTEL_DS_API_VULKAN);
|
||||
u_trace_context_init(&device->ds.trace_context,
|
||||
&device->ds,
|
||||
anv_utrace_create_ts_buffer,
|
||||
anv_utrace_destroy_ts_buffer,
|
||||
anv_utrace_record_ts,
|
||||
anv_utrace_read_ts,
|
||||
anv_utrace_delete_flush_data);
|
||||
|
||||
for (uint32_t q = 0; q < device->queue_count; q++) {
|
||||
struct anv_queue *queue = &device->queues[q];
|
||||
|
||||
queue->ds =
|
||||
intel_ds_device_add_queue(&device->ds, "%s%u",
|
||||
queue_family_to_name(queue->family),
|
||||
queue->index_in_family);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
anv_device_utrace_finish(struct anv_device *device)
|
||||
{
|
||||
u_trace_context_process(&device->ds.trace_context, true);
|
||||
intel_ds_device_fini(&device->ds);
|
||||
anv_bo_pool_finish(&device->utrace_bo_pool);
|
||||
}
|
||||
|
||||
enum intel_ds_stall_flag
|
||||
anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits)
|
||||
{
|
||||
static const struct {
|
||||
enum anv_pipe_bits anv;
|
||||
enum intel_ds_stall_flag ds;
|
||||
} anv_to_ds_flags[] = {
|
||||
{ .anv = ANV_PIPE_DEPTH_CACHE_FLUSH_BIT, .ds = INTEL_DS_DEPTH_CACHE_FLUSH_BIT, },
|
||||
{ .anv = ANV_PIPE_DATA_CACHE_FLUSH_BIT, .ds = INTEL_DS_DATA_CACHE_FLUSH_BIT, },
|
||||
{ .anv = ANV_PIPE_TILE_CACHE_FLUSH_BIT, .ds = INTEL_DS_TILE_CACHE_FLUSH_BIT, },
|
||||
{ .anv = ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT, .ds = INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT, },
|
||||
{ .anv = ANV_PIPE_STATE_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_STATE_CACHE_INVALIDATE_BIT, },
|
||||
{ .anv = ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_CONST_CACHE_INVALIDATE_BIT, },
|
||||
{ .anv = ANV_PIPE_VF_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_VF_CACHE_INVALIDATE_BIT, },
|
||||
{ .anv = ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT, },
|
||||
{ .anv = ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_INST_CACHE_INVALIDATE_BIT, },
|
||||
{ .anv = ANV_PIPE_DEPTH_STALL_BIT, .ds = INTEL_DS_DEPTH_STALL_BIT, },
|
||||
{ .anv = ANV_PIPE_CS_STALL_BIT, .ds = INTEL_DS_CS_STALL_BIT, },
|
||||
{ .anv = ANV_PIPE_HDC_PIPELINE_FLUSH_BIT, .ds = INTEL_DS_HDC_PIPELINE_FLUSH_BIT, },
|
||||
{ .anv = ANV_PIPE_STALL_AT_SCOREBOARD_BIT, .ds = INTEL_DS_STALL_AT_SCOREBOARD_BIT, },
|
||||
{ .anv = ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT, .ds = INTEL_DS_UNTYPED_DATAPORT_CACHE_FLUSH_BIT, },
|
||||
};
|
||||
|
||||
enum intel_ds_stall_flag ret = 0;
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(anv_to_ds_flags); i++) {
|
||||
if (anv_to_ds_flags[i].anv & bits)
|
||||
ret |= anv_to_ds_flags[i].ds;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
118
src/intel/vulkan_hasvk/anv_wsi.c
Normal file
118
src/intel/vulkan_hasvk/anv_wsi.c
Normal file
@ -0,0 +1,118 @@
|
||||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_private.h"
|
||||
#include "anv_measure.h"
|
||||
#include "wsi_common.h"
|
||||
#include "vk_fence.h"
|
||||
#include "vk_queue.h"
|
||||
#include "vk_semaphore.h"
|
||||
#include "vk_util.h"
|
||||
|
||||
static PFN_vkVoidFunction
|
||||
anv_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
|
||||
return vk_instance_get_proc_addr_unchecked(&pdevice->instance->vk, pName);
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_init_wsi(struct anv_physical_device *physical_device)
|
||||
{
|
||||
VkResult result;
|
||||
|
||||
result = wsi_device_init(&physical_device->wsi_device,
|
||||
anv_physical_device_to_handle(physical_device),
|
||||
anv_wsi_proc_addr,
|
||||
&physical_device->instance->vk.alloc,
|
||||
physical_device->master_fd,
|
||||
&physical_device->instance->dri_options,
|
||||
false);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
physical_device->wsi_device.supports_modifiers = true;
|
||||
physical_device->wsi_device.signal_semaphore_with_memory = true;
|
||||
physical_device->wsi_device.signal_fence_with_memory = true;
|
||||
|
||||
physical_device->vk.wsi_device = &physical_device->wsi_device;
|
||||
|
||||
wsi_device_setup_syncobj_fd(&physical_device->wsi_device,
|
||||
physical_device->local_fd);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
anv_finish_wsi(struct anv_physical_device *physical_device)
|
||||
{
|
||||
physical_device->vk.wsi_device = NULL;
|
||||
wsi_device_finish(&physical_device->wsi_device,
|
||||
&physical_device->instance->vk.alloc);
|
||||
}
|
||||
|
||||
VkResult anv_AcquireNextImage2KHR(
|
||||
VkDevice _device,
|
||||
const VkAcquireNextImageInfoKHR *pAcquireInfo,
|
||||
uint32_t *pImageIndex)
|
||||
{
|
||||
VK_FROM_HANDLE(anv_device, device, _device);
|
||||
|
||||
VkResult result =
|
||||
wsi_common_acquire_next_image2(&device->physical->wsi_device,
|
||||
_device, pAcquireInfo, pImageIndex);
|
||||
if (result == VK_SUCCESS)
|
||||
anv_measure_acquire(device);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
VkResult anv_QueuePresentKHR(
|
||||
VkQueue _queue,
|
||||
const VkPresentInfoKHR* pPresentInfo)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_queue, queue, _queue);
|
||||
struct anv_device *device = queue->device;
|
||||
VkResult result;
|
||||
|
||||
if (device->debug_frame_desc) {
|
||||
device->debug_frame_desc->frame_id++;
|
||||
if (device->physical->memory.need_clflush) {
|
||||
intel_clflush_range(device->debug_frame_desc,
|
||||
sizeof(*device->debug_frame_desc));
|
||||
}
|
||||
}
|
||||
|
||||
result = vk_queue_wait_before_present(&queue->vk, pPresentInfo);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
result = wsi_common_queue_present(&device->physical->wsi_device,
|
||||
anv_device_to_handle(queue->device),
|
||||
_queue, 0,
|
||||
pPresentInfo);
|
||||
|
||||
u_trace_context_process(&device->ds.trace_context, true);
|
||||
|
||||
return result;
|
||||
}
|
410
src/intel/vulkan_hasvk/genX_blorp_exec.c
Normal file
410
src/intel/vulkan_hasvk/genX_blorp_exec.c
Normal file
@ -0,0 +1,410 @@
|
||||
/*
|
||||
* Copyright © 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
#include "anv_measure.h"
|
||||
|
||||
/* These are defined in anv_private.h and blorp_genX_exec.h */
|
||||
#undef __gen_address_type
|
||||
#undef __gen_user_data
|
||||
#undef __gen_combine_address
|
||||
|
||||
#include "common/intel_l3_config.h"
|
||||
#include "blorp/blorp_genX_exec.h"
|
||||
|
||||
#include "ds/intel_tracepoints.h"
|
||||
|
||||
static void blorp_measure_start(struct blorp_batch *_batch,
|
||||
const struct blorp_params *params)
|
||||
{
|
||||
struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch;
|
||||
trace_intel_begin_blorp(&cmd_buffer->trace);
|
||||
anv_measure_snapshot(cmd_buffer,
|
||||
params->snapshot_type,
|
||||
NULL, 0);
|
||||
}
|
||||
|
||||
static void blorp_measure_end(struct blorp_batch *_batch,
|
||||
const struct blorp_params *params)
|
||||
{
|
||||
struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch;
|
||||
trace_intel_end_blorp(&cmd_buffer->trace,
|
||||
params->x1 - params->x0,
|
||||
params->y1 - params->y0,
|
||||
params->hiz_op,
|
||||
params->fast_clear_op,
|
||||
params->shader_type,
|
||||
params->shader_pipeline);
|
||||
}
|
||||
|
||||
static void *
|
||||
blorp_emit_dwords(struct blorp_batch *batch, unsigned n)
|
||||
{
|
||||
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
|
||||
return anv_batch_emit_dwords(&cmd_buffer->batch, n);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
blorp_emit_reloc(struct blorp_batch *batch,
|
||||
void *location, struct blorp_address address, uint32_t delta)
|
||||
{
|
||||
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
|
||||
assert(cmd_buffer->batch.start <= location &&
|
||||
location < cmd_buffer->batch.end);
|
||||
return anv_batch_emit_reloc(&cmd_buffer->batch, location,
|
||||
address.buffer, address.offset + delta);
|
||||
}
|
||||
|
||||
static void
|
||||
blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
|
||||
struct blorp_address address, uint32_t delta)
|
||||
{
|
||||
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
|
||||
VkResult result;
|
||||
|
||||
if (ANV_ALWAYS_SOFTPIN) {
|
||||
result = anv_reloc_list_add_bo(&cmd_buffer->surface_relocs,
|
||||
&cmd_buffer->vk.pool->alloc,
|
||||
address.buffer);
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
anv_batch_set_error(&cmd_buffer->batch, result);
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t address_u64 = 0;
|
||||
result = anv_reloc_list_add(&cmd_buffer->surface_relocs,
|
||||
&cmd_buffer->vk.pool->alloc,
|
||||
ss_offset, address.buffer,
|
||||
address.offset + delta,
|
||||
&address_u64);
|
||||
if (result != VK_SUCCESS)
|
||||
anv_batch_set_error(&cmd_buffer->batch, result);
|
||||
|
||||
void *dest = anv_block_pool_map(
|
||||
&cmd_buffer->device->surface_state_pool.block_pool, ss_offset, 8);
|
||||
write_reloc(cmd_buffer->device, dest, address_u64, false);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
blorp_get_surface_address(struct blorp_batch *blorp_batch,
|
||||
struct blorp_address address)
|
||||
{
|
||||
if (ANV_ALWAYS_SOFTPIN) {
|
||||
struct anv_address anv_addr = {
|
||||
.bo = address.buffer,
|
||||
.offset = address.offset,
|
||||
};
|
||||
return anv_address_physical(anv_addr);
|
||||
} else {
|
||||
/* We'll let blorp_surface_reloc write the address. */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#if GFX_VER >= 7 && GFX_VER < 10
|
||||
static struct blorp_address
|
||||
blorp_get_surface_base_address(struct blorp_batch *batch)
|
||||
{
|
||||
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
|
||||
return (struct blorp_address) {
|
||||
.buffer = cmd_buffer->device->surface_state_pool.block_pool.bo,
|
||||
.offset = 0,
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
||||
static void *
|
||||
blorp_alloc_dynamic_state(struct blorp_batch *batch,
|
||||
uint32_t size,
|
||||
uint32_t alignment,
|
||||
uint32_t *offset)
|
||||
{
|
||||
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
|
||||
|
||||
struct anv_state state =
|
||||
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment);
|
||||
|
||||
*offset = state.offset;
|
||||
return state.map;
|
||||
}
|
||||
|
||||
UNUSED static void *
|
||||
blorp_alloc_general_state(struct blorp_batch *batch,
|
||||
uint32_t size,
|
||||
uint32_t alignment,
|
||||
uint32_t *offset)
|
||||
{
|
||||
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
|
||||
|
||||
struct anv_state state =
|
||||
anv_state_stream_alloc(&cmd_buffer->general_state_stream, size,
|
||||
alignment);
|
||||
|
||||
*offset = state.offset;
|
||||
return state.map;
|
||||
}
|
||||
|
||||
static void
|
||||
blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries,
|
||||
unsigned state_size, unsigned state_alignment,
|
||||
uint32_t *bt_offset,
|
||||
uint32_t *surface_offsets, void **surface_maps)
|
||||
{
|
||||
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
|
||||
|
||||
uint32_t state_offset;
|
||||
struct anv_state bt_state;
|
||||
|
||||
VkResult result =
|
||||
anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, num_entries,
|
||||
&state_offset, &bt_state);
|
||||
if (result != VK_SUCCESS)
|
||||
return;
|
||||
|
||||
uint32_t *bt_map = bt_state.map;
|
||||
*bt_offset = bt_state.offset;
|
||||
|
||||
for (unsigned i = 0; i < num_entries; i++) {
|
||||
struct anv_state surface_state =
|
||||
anv_cmd_buffer_alloc_surface_state(cmd_buffer);
|
||||
bt_map[i] = surface_state.offset + state_offset;
|
||||
surface_offsets[i] = surface_state.offset;
|
||||
surface_maps[i] = surface_state.map;
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
blorp_binding_table_offset_to_pointer(struct blorp_batch *batch,
|
||||
uint32_t offset)
|
||||
{
|
||||
return offset;
|
||||
}
|
||||
|
||||
static void *
|
||||
blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,
|
||||
struct blorp_address *addr)
|
||||
{
|
||||
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
|
||||
struct anv_state vb_state =
|
||||
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 64);
|
||||
|
||||
*addr = (struct blorp_address) {
|
||||
.buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
|
||||
.offset = vb_state.offset,
|
||||
.mocs = isl_mocs(&cmd_buffer->device->isl_dev,
|
||||
ISL_SURF_USAGE_VERTEX_BUFFER_BIT, false),
|
||||
};
|
||||
|
||||
return vb_state.map;
|
||||
}
|
||||
|
||||
static void
|
||||
blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch,
|
||||
const struct blorp_address *addrs,
|
||||
uint32_t *sizes,
|
||||
unsigned num_vbs)
|
||||
{
|
||||
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
|
||||
|
||||
for (unsigned i = 0; i < num_vbs; i++) {
|
||||
struct anv_address anv_addr = {
|
||||
.bo = addrs[i].buffer,
|
||||
.offset = addrs[i].offset,
|
||||
};
|
||||
genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer,
|
||||
i, anv_addr, sizes[i]);
|
||||
}
|
||||
|
||||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||
|
||||
/* Technically, we should call this *after* 3DPRIMITIVE but it doesn't
|
||||
* really matter for blorp because we never call apply_pipe_flushes after
|
||||
* this point.
|
||||
*/
|
||||
genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(cmd_buffer, SEQUENTIAL,
|
||||
(1 << num_vbs) - 1);
|
||||
}
|
||||
|
||||
UNUSED static struct blorp_address
|
||||
blorp_get_workaround_address(struct blorp_batch *batch)
|
||||
{
|
||||
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
|
||||
|
||||
return (struct blorp_address) {
|
||||
.buffer = cmd_buffer->device->workaround_address.bo,
|
||||
.offset = cmd_buffer->device->workaround_address.offset,
|
||||
};
|
||||
}
|
||||
|
||||
static void
|
||||
blorp_flush_range(struct blorp_batch *batch, void *start, size_t size)
|
||||
{
|
||||
/* We don't need to flush states anymore, since everything will be snooped.
|
||||
*/
|
||||
}
|
||||
|
||||
static const struct intel_l3_config *
|
||||
blorp_get_l3_config(struct blorp_batch *batch)
|
||||
{
|
||||
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
|
||||
return cmd_buffer->state.current_l3_config;
|
||||
}
|
||||
|
||||
static void
|
||||
blorp_exec_on_render(struct blorp_batch *batch,
|
||||
const struct blorp_params *params)
|
||||
{
|
||||
assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
|
||||
|
||||
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
|
||||
assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT);
|
||||
|
||||
const unsigned scale = params->fast_clear_op ? UINT_MAX : 1;
|
||||
genX(cmd_buffer_emit_hashing_mode)(cmd_buffer, params->x1 - params->x0,
|
||||
params->y1 - params->y0, scale);
|
||||
|
||||
#if GFX_VER >= 11
|
||||
/* The PIPE_CONTROL command description says:
|
||||
*
|
||||
* "Whenever a Binding Table Index (BTI) used by a Render Target Message
|
||||
* points to a different RENDER_SURFACE_STATE, SW must issue a Render
|
||||
* Target Cache Flush by enabling this bit. When render target flush
|
||||
* is set due to new association of BTI, PS Scoreboard Stall bit must
|
||||
* be set in this packet."
|
||||
*/
|
||||
anv_add_pending_pipe_bits(cmd_buffer,
|
||||
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
|
||||
ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
|
||||
"before blorp BTI change");
|
||||
#endif
|
||||
|
||||
if (params->depth.enabled &&
|
||||
!(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL))
|
||||
genX(cmd_buffer_emit_gfx12_depth_wa)(cmd_buffer, ¶ms->depth.surf);
|
||||
|
||||
genX(flush_pipeline_select_3d)(cmd_buffer);
|
||||
|
||||
/* Apply any outstanding flushes in case pipeline select haven't. */
|
||||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||
|
||||
genX(cmd_buffer_emit_gfx7_depth_flush)(cmd_buffer);
|
||||
|
||||
/* BLORP doesn't do anything fancy with depth such as discards, so we want
|
||||
* the PMA fix off. Also, off is always the safe option.
|
||||
*/
|
||||
genX(cmd_buffer_enable_pma_fix)(cmd_buffer, false);
|
||||
|
||||
blorp_exec(batch, params);
|
||||
|
||||
#if GFX_VER >= 11
|
||||
/* The PIPE_CONTROL command description says:
|
||||
*
|
||||
* "Whenever a Binding Table Index (BTI) used by a Render Target Message
|
||||
* points to a different RENDER_SURFACE_STATE, SW must issue a Render
|
||||
* Target Cache Flush by enabling this bit. When render target flush
|
||||
* is set due to new association of BTI, PS Scoreboard Stall bit must
|
||||
* be set in this packet."
|
||||
*/
|
||||
anv_add_pending_pipe_bits(cmd_buffer,
|
||||
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
|
||||
ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
|
||||
"after blorp BTI change");
|
||||
#endif
|
||||
|
||||
/* Calculate state that does not get touched by blorp.
|
||||
* Flush everything else.
|
||||
*/
|
||||
anv_cmd_dirty_mask_t dirty = ~(ANV_CMD_DIRTY_INDEX_BUFFER |
|
||||
ANV_CMD_DIRTY_XFB_ENABLE);
|
||||
|
||||
BITSET_DECLARE(dyn_dirty, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX);
|
||||
BITSET_ONES(dyn_dirty);
|
||||
BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE);
|
||||
BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_VP_SCISSOR_COUNT);
|
||||
BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_VP_SCISSORS);
|
||||
BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE);
|
||||
BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_FSR);
|
||||
BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS);
|
||||
if (!params->wm_prog_data) {
|
||||
BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES);
|
||||
BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP);
|
||||
}
|
||||
|
||||
cmd_buffer->state.gfx.vb_dirty = ~0;
|
||||
cmd_buffer->state.gfx.dirty |= dirty;
|
||||
BITSET_OR(cmd_buffer->vk.dynamic_graphics_state.dirty,
|
||||
cmd_buffer->vk.dynamic_graphics_state.dirty, dyn_dirty);
|
||||
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS;
|
||||
}
|
||||
|
||||
static void
|
||||
blorp_exec_on_compute(struct blorp_batch *batch,
|
||||
const struct blorp_params *params)
|
||||
{
|
||||
assert(batch->flags & BLORP_BATCH_USE_COMPUTE);
|
||||
|
||||
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
|
||||
assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_COMPUTE_BIT);
|
||||
|
||||
genX(flush_pipeline_select_gpgpu)(cmd_buffer);
|
||||
|
||||
/* Apply any outstanding flushes in case pipeline select haven't. */
|
||||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||
|
||||
blorp_exec(batch, params);
|
||||
|
||||
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
}
|
||||
|
||||
void
|
||||
genX(blorp_exec)(struct blorp_batch *batch,
|
||||
const struct blorp_params *params)
|
||||
{
|
||||
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
|
||||
|
||||
if (!cmd_buffer->state.current_l3_config) {
|
||||
const struct intel_l3_config *cfg =
|
||||
intel_get_default_l3_config(cmd_buffer->device->info);
|
||||
genX(cmd_buffer_config_l3)(cmd_buffer, cfg);
|
||||
}
|
||||
|
||||
#if GFX_VER == 7
|
||||
/* The MI_LOAD/STORE_REGISTER_MEM commands which BLORP uses to implement
|
||||
* indirect fast-clear colors can cause GPU hangs if we don't stall first.
|
||||
* See genX(cmd_buffer_mi_memcpy) for more details.
|
||||
*/
|
||||
if (params->src.clear_color_addr.buffer ||
|
||||
params->dst.clear_color_addr.buffer) {
|
||||
anv_add_pending_pipe_bits(cmd_buffer,
|
||||
ANV_PIPE_CS_STALL_BIT,
|
||||
"before blorp prep fast clear");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (batch->flags & BLORP_BATCH_USE_COMPUTE)
|
||||
blorp_exec_on_compute(batch, params);
|
||||
else
|
||||
blorp_exec_on_render(batch, params);
|
||||
}
|
7488
src/intel/vulkan_hasvk/genX_cmd_buffer.c
Normal file
7488
src/intel/vulkan_hasvk/genX_cmd_buffer.c
Normal file
File diff suppressed because it is too large
Load Diff
324
src/intel/vulkan_hasvk/genX_gpu_memcpy.c
Normal file
324
src/intel/vulkan_hasvk/genX_gpu_memcpy.c
Normal file
@ -0,0 +1,324 @@
|
||||
/*
|
||||
* Copyright © 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
#include "genxml/gen_macros.h"
|
||||
#include "genxml/genX_pack.h"
|
||||
|
||||
#include "common/intel_l3_config.h"
|
||||
|
||||
/**
|
||||
* This file implements some lightweight memcpy/memset operations on the GPU
|
||||
* using a vertex buffer and streamout.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Returns the greatest common divisor of a and b that is a power of two.
|
||||
*/
|
||||
static uint64_t
|
||||
gcd_pow2_u64(uint64_t a, uint64_t b)
|
||||
{
|
||||
assert(a > 0 || b > 0);
|
||||
|
||||
unsigned a_log2 = ffsll(a) - 1;
|
||||
unsigned b_log2 = ffsll(b) - 1;
|
||||
|
||||
/* If either a or b is 0, then a_log2 or b_log2 will be UINT_MAX in which
|
||||
* case, the MIN2() will take the other one. If both are 0 then we will
|
||||
* hit the assert above.
|
||||
*/
|
||||
return 1 << MIN2(a_log2, b_log2);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_common_so_memcpy(struct anv_batch *batch, struct anv_device *device,
|
||||
const struct intel_l3_config *l3_config)
|
||||
{
|
||||
#if GFX_VER >= 8
|
||||
anv_batch_emit(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
|
||||
vfi.InstancingEnable = false;
|
||||
vfi.VertexElementIndex = 0;
|
||||
}
|
||||
anv_batch_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs);
|
||||
#endif
|
||||
|
||||
/* Disable all shader stages */
|
||||
anv_batch_emit(batch, GENX(3DSTATE_VS), vs);
|
||||
anv_batch_emit(batch, GENX(3DSTATE_HS), hs);
|
||||
anv_batch_emit(batch, GENX(3DSTATE_TE), te);
|
||||
anv_batch_emit(batch, GENX(3DSTATE_DS), DS);
|
||||
anv_batch_emit(batch, GENX(3DSTATE_GS), gs);
|
||||
anv_batch_emit(batch, GENX(3DSTATE_PS), gs);
|
||||
|
||||
anv_batch_emit(batch, GENX(3DSTATE_SBE), sbe) {
|
||||
sbe.VertexURBEntryReadOffset = 1;
|
||||
sbe.NumberofSFOutputAttributes = 1;
|
||||
sbe.VertexURBEntryReadLength = 1;
|
||||
#if GFX_VER >= 8
|
||||
sbe.ForceVertexURBEntryReadLength = true;
|
||||
sbe.ForceVertexURBEntryReadOffset = true;
|
||||
#endif
|
||||
|
||||
#if GFX_VER >= 9
|
||||
for (unsigned i = 0; i < 32; i++)
|
||||
sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Emit URB setup. We tell it that the VS is active because we want it to
|
||||
* allocate space for the VS. Even though one isn't run, we need VUEs to
|
||||
* store the data that VF is going to pass to SOL.
|
||||
*/
|
||||
const unsigned entry_size[4] = { DIV_ROUND_UP(32, 64), 1, 1, 1 };
|
||||
|
||||
genX(emit_urb_setup)(device, batch, l3_config,
|
||||
VK_SHADER_STAGE_VERTEX_BIT, entry_size, NULL);
|
||||
|
||||
#if GFX_VER >= 12
|
||||
/* Disable Primitive Replication. */
|
||||
anv_batch_emit(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
|
||||
#endif
|
||||
|
||||
#if GFX_VER >= 8
|
||||
anv_batch_emit(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
|
||||
topo.PrimitiveTopologyType = _3DPRIM_POINTLIST;
|
||||
}
|
||||
#endif
|
||||
|
||||
anv_batch_emit(batch, GENX(3DSTATE_VF_STATISTICS), vf) {
|
||||
vf.StatisticsEnable = false;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_so_memcpy(struct anv_batch *batch, struct anv_device *device,
|
||||
struct anv_address dst, struct anv_address src,
|
||||
uint32_t size)
|
||||
{
|
||||
/* The maximum copy block size is 4 32-bit components at a time. */
|
||||
assert(size % 4 == 0);
|
||||
unsigned bs = gcd_pow2_u64(16, size);
|
||||
|
||||
enum isl_format format;
|
||||
switch (bs) {
|
||||
case 4: format = ISL_FORMAT_R32_UINT; break;
|
||||
case 8: format = ISL_FORMAT_R32G32_UINT; break;
|
||||
case 16: format = ISL_FORMAT_R32G32B32A32_UINT; break;
|
||||
default:
|
||||
unreachable("Invalid size");
|
||||
}
|
||||
|
||||
uint32_t *dw;
|
||||
dw = anv_batch_emitn(batch, 5, GENX(3DSTATE_VERTEX_BUFFERS));
|
||||
GENX(VERTEX_BUFFER_STATE_pack)(batch, dw + 1,
|
||||
&(struct GENX(VERTEX_BUFFER_STATE)) {
|
||||
.VertexBufferIndex = 32, /* Reserved for this */
|
||||
.AddressModifyEnable = true,
|
||||
.BufferStartingAddress = src,
|
||||
.BufferPitch = bs,
|
||||
.MOCS = anv_mocs(device, src.bo, 0),
|
||||
#if GFX_VER >= 12
|
||||
.L3BypassDisable = true,
|
||||
#endif
|
||||
#if (GFX_VER >= 8)
|
||||
.BufferSize = size,
|
||||
#else
|
||||
.EndAddress = anv_address_add(src, size - 1),
|
||||
#endif
|
||||
});
|
||||
|
||||
dw = anv_batch_emitn(batch, 3, GENX(3DSTATE_VERTEX_ELEMENTS));
|
||||
GENX(VERTEX_ELEMENT_STATE_pack)(batch, dw + 1,
|
||||
&(struct GENX(VERTEX_ELEMENT_STATE)) {
|
||||
.VertexBufferIndex = 32,
|
||||
.Valid = true,
|
||||
.SourceElementFormat = format,
|
||||
.SourceElementOffset = 0,
|
||||
.Component0Control = (bs >= 4) ? VFCOMP_STORE_SRC : VFCOMP_STORE_0,
|
||||
.Component1Control = (bs >= 8) ? VFCOMP_STORE_SRC : VFCOMP_STORE_0,
|
||||
.Component2Control = (bs >= 12) ? VFCOMP_STORE_SRC : VFCOMP_STORE_0,
|
||||
.Component3Control = (bs >= 16) ? VFCOMP_STORE_SRC : VFCOMP_STORE_0,
|
||||
});
|
||||
|
||||
|
||||
anv_batch_emit(batch, GENX(3DSTATE_SO_BUFFER), sob) {
|
||||
#if GFX_VER < 12
|
||||
sob.SOBufferIndex = 0;
|
||||
#else
|
||||
sob._3DCommandOpcode = 0;
|
||||
sob._3DCommandSubOpcode = SO_BUFFER_INDEX_0_CMD;
|
||||
#endif
|
||||
sob.MOCS = anv_mocs(device, dst.bo, 0),
|
||||
sob.SurfaceBaseAddress = dst;
|
||||
|
||||
#if GFX_VER >= 8
|
||||
sob.SOBufferEnable = true;
|
||||
sob.SurfaceSize = size / 4 - 1;
|
||||
#else
|
||||
sob.SurfacePitch = bs;
|
||||
sob.SurfaceEndAddress = anv_address_add(dst, size);
|
||||
#endif
|
||||
|
||||
#if GFX_VER >= 8
|
||||
/* As SOL writes out data, it updates the SO_WRITE_OFFSET registers with
|
||||
* the end position of the stream. We need to reset this value to 0 at
|
||||
* the beginning of the run or else SOL will start at the offset from
|
||||
* the previous draw.
|
||||
*/
|
||||
sob.StreamOffsetWriteEnable = true;
|
||||
sob.StreamOffset = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if GFX_VER <= 7
|
||||
/* The hardware can do this for us on BDW+ (see above) */
|
||||
anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), load) {
|
||||
load.RegisterOffset = GENX(SO_WRITE_OFFSET0_num);
|
||||
load.DataDWord = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
dw = anv_batch_emitn(batch, 5, GENX(3DSTATE_SO_DECL_LIST),
|
||||
.StreamtoBufferSelects0 = (1 << 0),
|
||||
.NumEntries0 = 1);
|
||||
GENX(SO_DECL_ENTRY_pack)(batch, dw + 3,
|
||||
&(struct GENX(SO_DECL_ENTRY)) {
|
||||
.Stream0Decl = {
|
||||
.OutputBufferSlot = 0,
|
||||
.RegisterIndex = 0,
|
||||
.ComponentMask = (1 << (bs / 4)) - 1,
|
||||
},
|
||||
});
|
||||
|
||||
anv_batch_emit(batch, GENX(3DSTATE_STREAMOUT), so) {
|
||||
so.SOFunctionEnable = true;
|
||||
so.RenderingDisable = true;
|
||||
so.Stream0VertexReadOffset = 0;
|
||||
so.Stream0VertexReadLength = DIV_ROUND_UP(32, 64);
|
||||
#if GFX_VER >= 8
|
||||
so.Buffer0SurfacePitch = bs;
|
||||
#else
|
||||
so.SOBufferEnable0 = true;
|
||||
#endif
|
||||
}
|
||||
|
||||
anv_batch_emit(batch, GENX(3DPRIMITIVE), prim) {
|
||||
prim.VertexAccessType = SEQUENTIAL;
|
||||
prim.PrimitiveTopologyType = _3DPRIM_POINTLIST;
|
||||
prim.VertexCountPerInstance = size / bs;
|
||||
prim.StartVertexLocation = 0;
|
||||
prim.InstanceCount = 1;
|
||||
prim.StartInstanceLocation = 0;
|
||||
prim.BaseVertexLocation = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
genX(emit_so_memcpy_init)(struct anv_memcpy_state *state,
|
||||
struct anv_device *device,
|
||||
struct anv_batch *batch)
|
||||
{
|
||||
memset(state, 0, sizeof(*state));
|
||||
|
||||
state->batch = batch;
|
||||
state->device = device;
|
||||
|
||||
const struct intel_l3_config *cfg = intel_get_default_l3_config(device->info);
|
||||
genX(emit_l3_config)(batch, device, cfg);
|
||||
|
||||
anv_batch_emit(batch, GENX(PIPELINE_SELECT), ps) {
|
||||
#if GFX_VER >= 9
|
||||
ps.MaskBits = GFX_VER >= 12 ? 0x13 : 3;
|
||||
ps.MediaSamplerDOPClockGateEnable = GFX_VER >= 12;
|
||||
#endif
|
||||
ps.PipelineSelection = _3D;
|
||||
}
|
||||
|
||||
emit_common_so_memcpy(batch, device, cfg);
|
||||
}
|
||||
|
||||
void
|
||||
genX(emit_so_memcpy_fini)(struct anv_memcpy_state *state)
|
||||
{
|
||||
genX(emit_apply_pipe_flushes)(state->batch, state->device, _3D,
|
||||
ANV_PIPE_END_OF_PIPE_SYNC_BIT);
|
||||
|
||||
anv_batch_emit(state->batch, GENX(MI_BATCH_BUFFER_END), end);
|
||||
|
||||
if ((state->batch->next - state->batch->start) & 4)
|
||||
anv_batch_emit(state->batch, GENX(MI_NOOP), noop);
|
||||
}
|
||||
|
||||
void
|
||||
genX(emit_so_memcpy)(struct anv_memcpy_state *state,
|
||||
struct anv_address dst, struct anv_address src,
|
||||
uint32_t size)
|
||||
{
|
||||
if (GFX_VER >= 8 && GFX_VER <= 9 &&
|
||||
!anv_use_relocations(state->device->physical) &&
|
||||
anv_gfx8_9_vb_cache_range_needs_workaround(&state->vb_bound,
|
||||
&state->vb_dirty,
|
||||
src, size)) {
|
||||
genX(emit_apply_pipe_flushes)(state->batch, state->device, _3D,
|
||||
ANV_PIPE_CS_STALL_BIT |
|
||||
ANV_PIPE_VF_CACHE_INVALIDATE_BIT);
|
||||
memset(&state->vb_dirty, 0, sizeof(state->vb_dirty));
|
||||
}
|
||||
|
||||
emit_so_memcpy(state->batch, state->device, dst, src, size);
|
||||
}
|
||||
|
||||
void
|
||||
genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_address dst, struct anv_address src,
|
||||
uint32_t size)
|
||||
{
|
||||
if (size == 0)
|
||||
return;
|
||||
|
||||
if (!cmd_buffer->state.current_l3_config) {
|
||||
const struct intel_l3_config *cfg =
|
||||
intel_get_default_l3_config(cmd_buffer->device->info);
|
||||
genX(cmd_buffer_config_l3)(cmd_buffer, cfg);
|
||||
}
|
||||
|
||||
genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, 32, src, size);
|
||||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||
|
||||
genX(flush_pipeline_select_3d)(cmd_buffer);
|
||||
|
||||
emit_common_so_memcpy(&cmd_buffer->batch, cmd_buffer->device,
|
||||
cmd_buffer->state.current_l3_config);
|
||||
emit_so_memcpy(&cmd_buffer->batch, cmd_buffer->device, dst, src, size);
|
||||
|
||||
genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(cmd_buffer, SEQUENTIAL,
|
||||
1ull << 32);
|
||||
|
||||
/* Invalidate pipeline & raster discard since we touch
|
||||
* 3DSTATE_STREAMOUT.
|
||||
*/
|
||||
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_PIPELINE;
|
||||
BITSET_SET(cmd_buffer->vk.dynamic_graphics_state.dirty,
|
||||
MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE);
|
||||
}
|
2563
src/intel/vulkan_hasvk/genX_pipeline.c
Normal file
2563
src/intel/vulkan_hasvk/genX_pipeline.c
Normal file
File diff suppressed because it is too large
Load Diff
1530
src/intel/vulkan_hasvk/genX_query.c
Normal file
1530
src/intel/vulkan_hasvk/genX_query.c
Normal file
File diff suppressed because it is too large
Load Diff
1141
src/intel/vulkan_hasvk/genX_state.c
Normal file
1141
src/intel/vulkan_hasvk/genX_state.c
Normal file
File diff suppressed because it is too large
Load Diff
314
src/intel/vulkan_hasvk/gfx7_cmd_buffer.c
Normal file
314
src/intel/vulkan_hasvk/gfx7_cmd_buffer.c
Normal file
@ -0,0 +1,314 @@
|
||||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
#include "vk_format.h"
|
||||
|
||||
#include "genxml/gen_macros.h"
|
||||
#include "genxml/genX_pack.h"
|
||||
|
||||
static uint32_t
|
||||
get_depth_format(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
|
||||
|
||||
switch (gfx->depth_att.vk_format) {
|
||||
case VK_FORMAT_D16_UNORM:
|
||||
case VK_FORMAT_D16_UNORM_S8_UINT:
|
||||
return D16_UNORM;
|
||||
|
||||
case VK_FORMAT_X8_D24_UNORM_PACK32:
|
||||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||||
return D24_UNORM_X8_UINT;
|
||||
|
||||
case VK_FORMAT_D32_SFLOAT:
|
||||
case VK_FORMAT_D32_SFLOAT_S8_UINT:
|
||||
return D32_FLOAT;
|
||||
|
||||
default:
|
||||
return D16_UNORM;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||
const struct vk_dynamic_graphics_state *dyn =
|
||||
&cmd_buffer->vk.dynamic_graphics_state;
|
||||
|
||||
if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
|
||||
ANV_CMD_DIRTY_RENDER_TARGETS)) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CULL_MODE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_FRONT_FACE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_ENABLE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH)) {
|
||||
/* Take dynamic primitive topology in to account with
|
||||
* 3DSTATE_SF::MultisampleRasterizationMode
|
||||
*/
|
||||
VkPolygonMode dynamic_raster_mode =
|
||||
genX(raster_polygon_mode)(cmd_buffer->state.gfx.pipeline,
|
||||
dyn->ia.primitive_topology);
|
||||
uint32_t ms_rast_mode =
|
||||
genX(ms_rasterization_mode)(pipeline, dynamic_raster_mode);
|
||||
|
||||
bool aa_enable = anv_rasterization_aa_mode(dynamic_raster_mode,
|
||||
pipeline->line_mode);
|
||||
|
||||
uint32_t sf_dw[GENX(3DSTATE_SF_length)];
|
||||
struct GENX(3DSTATE_SF) sf = {
|
||||
GENX(3DSTATE_SF_header),
|
||||
.DepthBufferSurfaceFormat = get_depth_format(cmd_buffer),
|
||||
.LineWidth = dyn->rs.line.width,
|
||||
.AntialiasingEnable = aa_enable,
|
||||
.CullMode = genX(vk_to_intel_cullmode)[dyn->rs.cull_mode],
|
||||
.FrontWinding = genX(vk_to_intel_front_face)[dyn->rs.front_face],
|
||||
.MultisampleRasterizationMode = ms_rast_mode,
|
||||
.GlobalDepthOffsetEnableSolid = dyn->rs.depth_bias.enable,
|
||||
.GlobalDepthOffsetEnableWireframe = dyn->rs.depth_bias.enable,
|
||||
.GlobalDepthOffsetEnablePoint = dyn->rs.depth_bias.enable,
|
||||
.GlobalDepthOffsetConstant = dyn->rs.depth_bias.constant,
|
||||
.GlobalDepthOffsetScale = dyn->rs.depth_bias.slope,
|
||||
.GlobalDepthOffsetClamp = dyn->rs.depth_bias.clamp,
|
||||
};
|
||||
GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
|
||||
|
||||
anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gfx7.sf);
|
||||
}
|
||||
|
||||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
|
||||
struct anv_state cc_state =
|
||||
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
|
||||
GENX(COLOR_CALC_STATE_length) * 4,
|
||||
64);
|
||||
struct GENX(COLOR_CALC_STATE) cc = {
|
||||
.BlendConstantColorRed = dyn->cb.blend_constants[0],
|
||||
.BlendConstantColorGreen = dyn->cb.blend_constants[1],
|
||||
.BlendConstantColorBlue = dyn->cb.blend_constants[2],
|
||||
.BlendConstantColorAlpha = dyn->cb.blend_constants[3],
|
||||
.StencilReferenceValue = dyn->ds.stencil.front.reference & 0xff,
|
||||
.BackfaceStencilReferenceValue = dyn->ds.stencil.back.reference & 0xff,
|
||||
};
|
||||
GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
|
||||
ccp.ColorCalcStatePointer = cc_state.offset;
|
||||
}
|
||||
}
|
||||
|
||||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE)) {
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_LINE_STIPPLE), ls) {
|
||||
ls.LineStipplePattern = dyn->rs.line.stipple.pattern;
|
||||
ls.LineStippleInverseRepeatCount =
|
||||
1.0f / MAX2(1, dyn->rs.line.stipple.factor);
|
||||
ls.LineStippleRepeatCount = dyn->rs.line.stipple.factor;
|
||||
}
|
||||
}
|
||||
|
||||
if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
|
||||
ANV_CMD_DIRTY_RENDER_TARGETS)) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK)) {
|
||||
uint32_t depth_stencil_dw[GENX(DEPTH_STENCIL_STATE_length)];
|
||||
|
||||
VkImageAspectFlags ds_aspects = 0;
|
||||
if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED)
|
||||
ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED)
|
||||
ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
|
||||
struct vk_depth_stencil_state opt_ds = dyn->ds;
|
||||
vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
|
||||
|
||||
struct GENX(DEPTH_STENCIL_STATE) depth_stencil = {
|
||||
.DoubleSidedStencilEnable = true,
|
||||
|
||||
.StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff,
|
||||
.StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff,
|
||||
|
||||
.BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff,
|
||||
.BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff,
|
||||
|
||||
.DepthTestEnable = opt_ds.depth.test_enable,
|
||||
.DepthBufferWriteEnable = opt_ds.depth.write_enable,
|
||||
.DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op],
|
||||
.StencilTestEnable = opt_ds.stencil.test_enable,
|
||||
.StencilBufferWriteEnable = opt_ds.stencil.write_enable,
|
||||
.StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail],
|
||||
.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass],
|
||||
.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail],
|
||||
.StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare],
|
||||
.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail],
|
||||
.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass],
|
||||
.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail],
|
||||
.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare],
|
||||
};
|
||||
GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil);
|
||||
|
||||
struct anv_state ds_state =
|
||||
anv_cmd_buffer_emit_dynamic(cmd_buffer, depth_stencil_dw,
|
||||
sizeof(depth_stencil_dw), 64);
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch,
|
||||
GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), dsp) {
|
||||
dsp.PointertoDEPTH_STENCIL_STATE = ds_state.offset;
|
||||
}
|
||||
}
|
||||
|
||||
if (cmd_buffer->state.gfx.index_buffer &&
|
||||
((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
|
||||
ANV_CMD_DIRTY_INDEX_BUFFER)) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE))) {
|
||||
struct anv_buffer *buffer = cmd_buffer->state.gfx.index_buffer;
|
||||
uint32_t offset = cmd_buffer->state.gfx.index_offset;
|
||||
|
||||
#if GFX_VERx10 == 75
|
||||
anv_batch_emit(&cmd_buffer->batch, GFX75_3DSTATE_VF, vf) {
|
||||
vf.IndexedDrawCutIndexEnable = dyn->ia.primitive_restart_enable;
|
||||
vf.CutIndex = cmd_buffer->state.gfx.restart_index;
|
||||
}
|
||||
#endif
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
|
||||
#if GFX_VERx10 != 75
|
||||
ib.CutIndexEnable = dyn->ia.primitive_restart_enable;
|
||||
#endif
|
||||
ib.IndexFormat = cmd_buffer->state.gfx.index_type;
|
||||
ib.MOCS = anv_mocs(cmd_buffer->device,
|
||||
buffer->address.bo,
|
||||
ISL_SURF_USAGE_INDEX_BUFFER_BIT);
|
||||
|
||||
ib.BufferStartingAddress = anv_address_add(buffer->address, offset);
|
||||
ib.BufferEndingAddress = anv_address_add(buffer->address,
|
||||
buffer->vk.size);
|
||||
}
|
||||
}
|
||||
|
||||
/* 3DSTATE_WM in the hope we can avoid spawning fragment shaders
|
||||
* threads or if we have dirty dynamic primitive topology state and
|
||||
* need to toggle 3DSTATE_WM::MultisampleRasterizationMode dynamically.
|
||||
*/
|
||||
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
|
||||
VkPolygonMode dynamic_raster_mode =
|
||||
genX(raster_polygon_mode)(cmd_buffer->state.gfx.pipeline,
|
||||
dyn->ia.primitive_topology);
|
||||
|
||||
uint32_t dwords[GENX(3DSTATE_WM_length)];
|
||||
struct GENX(3DSTATE_WM) wm = {
|
||||
GENX(3DSTATE_WM_header),
|
||||
|
||||
.ThreadDispatchEnable = anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) &&
|
||||
(pipeline->force_fragment_thread_dispatch ||
|
||||
!anv_cmd_buffer_all_color_write_masked(cmd_buffer)),
|
||||
.MultisampleRasterizationMode =
|
||||
genX(ms_rasterization_mode)(pipeline,
|
||||
dynamic_raster_mode),
|
||||
};
|
||||
GENX(3DSTATE_WM_pack)(NULL, dwords, &wm);
|
||||
|
||||
anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx7.wm);
|
||||
}
|
||||
|
||||
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS)) {
|
||||
const uint32_t samples = MAX2(1, cmd_buffer->state.gfx.samples);
|
||||
const struct vk_sample_locations_state *sl = dyn->ms.sample_locations;
|
||||
genX(emit_multisample)(&cmd_buffer->batch, samples,
|
||||
sl->per_pixel == samples ? sl : NULL);
|
||||
}
|
||||
|
||||
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
|
||||
const uint8_t color_writes = dyn->cb.color_write_enables;
|
||||
|
||||
/* Blend states of each RT */
|
||||
uint32_t blend_dws[GENX(BLEND_STATE_length) +
|
||||
MAX_RTS * GENX(BLEND_STATE_ENTRY_length)];
|
||||
uint32_t *dws = blend_dws;
|
||||
memset(blend_dws, 0, sizeof(blend_dws));
|
||||
|
||||
/* Skip this part */
|
||||
dws += GENX(BLEND_STATE_length);
|
||||
|
||||
for (uint32_t i = 0; i < MAX_RTS; i++) {
|
||||
/* Disable anything above the current number of color attachments. */
|
||||
bool write_disabled = i >= cmd_buffer->state.gfx.color_att_count ||
|
||||
(color_writes & BITFIELD_BIT(i)) == 0;
|
||||
struct GENX(BLEND_STATE_ENTRY) entry = {
|
||||
.WriteDisableAlpha = write_disabled ||
|
||||
(pipeline->color_comp_writes[i] &
|
||||
VK_COLOR_COMPONENT_A_BIT) == 0,
|
||||
.WriteDisableRed = write_disabled ||
|
||||
(pipeline->color_comp_writes[i] &
|
||||
VK_COLOR_COMPONENT_R_BIT) == 0,
|
||||
.WriteDisableGreen = write_disabled ||
|
||||
(pipeline->color_comp_writes[i] &
|
||||
VK_COLOR_COMPONENT_G_BIT) == 0,
|
||||
.WriteDisableBlue = write_disabled ||
|
||||
(pipeline->color_comp_writes[i] &
|
||||
VK_COLOR_COMPONENT_B_BIT) == 0,
|
||||
.LogicOpFunction = genX(vk_to_intel_logic_op)[dyn->cb.logic_op],
|
||||
};
|
||||
GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry);
|
||||
dws += GENX(BLEND_STATE_ENTRY_length);
|
||||
}
|
||||
|
||||
uint32_t num_dwords = GENX(BLEND_STATE_length) +
|
||||
GENX(BLEND_STATE_ENTRY_length) * MAX_RTS;
|
||||
|
||||
struct anv_state blend_states =
|
||||
anv_cmd_buffer_merge_dynamic(cmd_buffer, blend_dws,
|
||||
pipeline->gfx7.blend_state, num_dwords, 64);
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
|
||||
bsp.BlendStatePointer = blend_states.offset;
|
||||
}
|
||||
}
|
||||
|
||||
/* When we're done, there is no more dirty gfx state. */
|
||||
vk_dynamic_graphics_state_clear_dirty(&cmd_buffer->vk.dynamic_graphics_state);
|
||||
cmd_buffer->state.gfx.dirty = 0;
|
||||
}
|
||||
|
||||
void
|
||||
genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer,
|
||||
bool enable)
|
||||
{
|
||||
/* The NP PMA fix doesn't exist on gfx7 */
|
||||
}
|
706
src/intel/vulkan_hasvk/gfx8_cmd_buffer.c
Normal file
706
src/intel/vulkan_hasvk/gfx8_cmd_buffer.c
Normal file
@ -0,0 +1,706 @@
|
||||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
#include "genxml/gen_macros.h"
|
||||
#include "genxml/genX_pack.h"
|
||||
|
||||
void
|
||||
genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
|
||||
{
|
||||
if (cmd_buffer->state.pma_fix_enabled == enable)
|
||||
return;
|
||||
|
||||
cmd_buffer->state.pma_fix_enabled = enable;
|
||||
|
||||
/* According to the Broadwell PIPE_CONTROL documentation, software should
|
||||
* emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set
|
||||
* prior to the LRI. If stencil buffer writes are enabled, then a Render
|
||||
* Cache Flush is also necessary.
|
||||
*
|
||||
* The Skylake docs say to use a depth stall rather than a command
|
||||
* streamer stall. However, the hardware seems to violently disagree.
|
||||
* A full command streamer stall seems to be needed in both cases.
|
||||
*/
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
|
||||
pc.DepthCacheFlushEnable = true;
|
||||
pc.CommandStreamerStallEnable = true;
|
||||
pc.RenderTargetCacheFlushEnable = true;
|
||||
#if GFX_VER >= 12
|
||||
pc.TileCacheFlushEnable = true;
|
||||
|
||||
/* Wa_1409600907: "PIPE_CONTROL with Depth Stall Enable bit must
|
||||
* be set with any PIPE_CONTROL with Depth Flush Enable bit set.
|
||||
*/
|
||||
pc.DepthStallEnable = true;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if GFX_VER == 9
|
||||
|
||||
uint32_t cache_mode;
|
||||
anv_pack_struct(&cache_mode, GENX(CACHE_MODE_0),
|
||||
.STCPMAOptimizationEnable = enable,
|
||||
.STCPMAOptimizationEnableMask = true);
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
|
||||
lri.RegisterOffset = GENX(CACHE_MODE_0_num);
|
||||
lri.DataDWord = cache_mode;
|
||||
}
|
||||
|
||||
#elif GFX_VER == 8
|
||||
|
||||
uint32_t cache_mode;
|
||||
anv_pack_struct(&cache_mode, GENX(CACHE_MODE_1),
|
||||
.NPPMAFixEnable = enable,
|
||||
.NPEarlyZFailsDisable = enable,
|
||||
.NPPMAFixEnableMask = true,
|
||||
.NPEarlyZFailsDisableMask = true);
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
|
||||
lri.RegisterOffset = GENX(CACHE_MODE_1_num);
|
||||
lri.DataDWord = cache_mode;
|
||||
}
|
||||
|
||||
#endif /* GFX_VER == 8 */
|
||||
|
||||
/* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
|
||||
* Flush bits is often necessary. We do it regardless because it's easier.
|
||||
* The render cache flush is also necessary if stencil writes are enabled.
|
||||
*
|
||||
* Again, the Skylake docs give a different set of flushes but the BDW
|
||||
* flushes seem to work just as well.
|
||||
*/
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
|
||||
pc.DepthStallEnable = true;
|
||||
pc.DepthCacheFlushEnable = true;
|
||||
pc.RenderTargetCacheFlushEnable = true;
|
||||
#if GFX_VER >= 12
|
||||
pc.TileCacheFlushEnable = true;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
UNUSED static bool
|
||||
want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer,
|
||||
const struct vk_depth_stencil_state *ds)
|
||||
{
|
||||
assert(GFX_VER == 8);
|
||||
|
||||
/* From the Broadwell PRM Vol. 2c CACHE_MODE_1::NP_PMA_FIX_ENABLE:
|
||||
*
|
||||
* SW must set this bit in order to enable this fix when following
|
||||
* expression is TRUE.
|
||||
*
|
||||
* 3DSTATE_WM::ForceThreadDispatch != 1 &&
|
||||
* !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
|
||||
* (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
|
||||
* (3DSTATE_DEPTH_BUFFER::HIZ Enable) &&
|
||||
* !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) &&
|
||||
* (3DSTATE_PS_EXTRA::PixelShaderValid) &&
|
||||
* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
|
||||
* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
|
||||
* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
|
||||
* 3DSTATE_WM_HZ_OP::StencilBufferClear) &&
|
||||
* (3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable) &&
|
||||
* (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
|
||||
* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
|
||||
* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
|
||||
* 3DSTATE_PS_BLEND::AlphaTestEnable ||
|
||||
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) &&
|
||||
* 3DSTATE_WM::ForceKillPix != ForceOff &&
|
||||
* ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
|
||||
* 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) ||
|
||||
* (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
|
||||
* 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
|
||||
* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||
|
||||
* (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
|
||||
*/
|
||||
|
||||
/* These are always true:
|
||||
* 3DSTATE_WM::ForceThreadDispatch != 1 &&
|
||||
* !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
|
||||
*/
|
||||
|
||||
/* We only enable the PMA fix if we know for certain that HiZ is enabled.
|
||||
* If we don't know whether HiZ is enabled or not, we disable the PMA fix
|
||||
* and there is no harm.
|
||||
*
|
||||
* (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
|
||||
* 3DSTATE_DEPTH_BUFFER::HIZ Enable
|
||||
*/
|
||||
if (!cmd_buffer->state.hiz_enabled)
|
||||
return false;
|
||||
|
||||
/* 3DSTATE_PS_EXTRA::PixelShaderValid */
|
||||
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
|
||||
return false;
|
||||
|
||||
/* !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) */
|
||||
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
||||
if (wm_prog_data->early_fragment_tests)
|
||||
return false;
|
||||
|
||||
/* We never use anv_pipeline for HiZ ops so this is trivially true:
|
||||
* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
|
||||
* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
|
||||
* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
|
||||
* 3DSTATE_WM_HZ_OP::StencilBufferClear)
|
||||
*/
|
||||
|
||||
/* 3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable */
|
||||
if (!ds->depth.test_enable)
|
||||
return false;
|
||||
|
||||
/* (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
|
||||
* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
|
||||
* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
|
||||
* 3DSTATE_PS_BLEND::AlphaTestEnable ||
|
||||
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) &&
|
||||
* 3DSTATE_WM::ForceKillPix != ForceOff &&
|
||||
* ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
|
||||
* 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) ||
|
||||
* (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
|
||||
* 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
|
||||
* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||
|
||||
* (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
|
||||
*/
|
||||
return (pipeline->kill_pixel && (ds->depth.write_enable ||
|
||||
ds->stencil.write_enable)) ||
|
||||
wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
|
||||
}
|
||||
|
||||
UNUSED static bool
|
||||
want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer,
|
||||
const struct vk_depth_stencil_state *ds)
|
||||
{
|
||||
if (GFX_VER > 9)
|
||||
return false;
|
||||
assert(GFX_VER == 9);
|
||||
|
||||
/* From the Skylake PRM Vol. 2c CACHE_MODE_1::STC PMA Optimization Enable:
|
||||
*
|
||||
* Clearing this bit will force the STC cache to wait for pending
|
||||
* retirement of pixels at the HZ-read stage and do the STC-test for
|
||||
* Non-promoted, R-computed and Computed depth modes instead of
|
||||
* postponing the STC-test to RCPFE.
|
||||
*
|
||||
* STC_TEST_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
|
||||
* 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
|
||||
*
|
||||
* STC_WRITE_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
|
||||
* (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
|
||||
* 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
|
||||
*
|
||||
* COMP_STC_EN = STC_TEST_EN &&
|
||||
* 3DSTATE_PS_EXTRA::PixelShaderComputesStencil
|
||||
*
|
||||
* SW parses the pipeline states to generate the following logical
|
||||
* signal indicating if PMA FIX can be enabled.
|
||||
*
|
||||
* STC_PMA_OPT =
|
||||
* 3DSTATE_WM::ForceThreadDispatch != 1 &&
|
||||
* !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
|
||||
* 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
|
||||
* 3DSTATE_DEPTH_BUFFER::HIZ Enable &&
|
||||
* !(3DSTATE_WM::EDSC_Mode == 2) &&
|
||||
* 3DSTATE_PS_EXTRA::PixelShaderValid &&
|
||||
* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
|
||||
* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
|
||||
* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
|
||||
* 3DSTATE_WM_HZ_OP::StencilBufferClear) &&
|
||||
* (COMP_STC_EN || STC_WRITE_EN) &&
|
||||
* ((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
|
||||
* 3DSTATE_WM::ForceKillPix == ON ||
|
||||
* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
|
||||
* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
|
||||
* 3DSTATE_PS_BLEND::AlphaTestEnable ||
|
||||
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
|
||||
* (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
|
||||
*/
|
||||
|
||||
/* These are always true:
|
||||
* 3DSTATE_WM::ForceThreadDispatch != 1 &&
|
||||
* !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
|
||||
*/
|
||||
|
||||
/* We only enable the PMA fix if we know for certain that HiZ is enabled.
|
||||
* If we don't know whether HiZ is enabled or not, we disable the PMA fix
|
||||
* and there is no harm.
|
||||
*
|
||||
* (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
|
||||
* 3DSTATE_DEPTH_BUFFER::HIZ Enable
|
||||
*/
|
||||
if (!cmd_buffer->state.hiz_enabled)
|
||||
return false;
|
||||
|
||||
/* We can't possibly know if HiZ is enabled without the depth attachment */
|
||||
ASSERTED const struct anv_image_view *d_iview =
|
||||
cmd_buffer->state.gfx.depth_att.iview;
|
||||
assert(d_iview && d_iview->image->planes[0].aux_usage == ISL_AUX_USAGE_HIZ);
|
||||
|
||||
/* 3DSTATE_PS_EXTRA::PixelShaderValid */
|
||||
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
|
||||
return false;
|
||||
|
||||
/* !(3DSTATE_WM::EDSC_Mode == 2) */
|
||||
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
||||
if (wm_prog_data->early_fragment_tests)
|
||||
return false;
|
||||
|
||||
/* We never use anv_pipeline for HiZ ops so this is trivially true:
|
||||
* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
|
||||
* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
|
||||
* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
|
||||
* 3DSTATE_WM_HZ_OP::StencilBufferClear)
|
||||
*/
|
||||
|
||||
/* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
|
||||
* 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
|
||||
*/
|
||||
const bool stc_test_en = ds->stencil.test_enable;
|
||||
|
||||
/* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
|
||||
* (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
|
||||
* 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
|
||||
*/
|
||||
const bool stc_write_en = ds->stencil.write_enable;
|
||||
|
||||
/* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */
|
||||
const bool comp_stc_en = stc_test_en && wm_prog_data->computed_stencil;
|
||||
|
||||
/* COMP_STC_EN || STC_WRITE_EN */
|
||||
if (!(comp_stc_en || stc_write_en))
|
||||
return false;
|
||||
|
||||
/* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
|
||||
* 3DSTATE_WM::ForceKillPix == ON ||
|
||||
* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
|
||||
* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
|
||||
* 3DSTATE_PS_BLEND::AlphaTestEnable ||
|
||||
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
|
||||
* (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF)
|
||||
*/
|
||||
return pipeline->kill_pixel ||
|
||||
wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
|
||||
}
|
||||
|
||||
void
|
||||
genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||
const struct vk_dynamic_graphics_state *dyn =
|
||||
&cmd_buffer->vk.dynamic_graphics_state;
|
||||
|
||||
#if GFX_VER >= 11
|
||||
if (cmd_buffer->device->vk.enabled_extensions.KHR_fragment_shading_rate &&
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR))
|
||||
genX(emit_shading_rate)(&cmd_buffer->batch, pipeline, &dyn->fsr);
|
||||
#endif /* GFX_VER >= 11 */
|
||||
|
||||
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH)) {
|
||||
uint32_t sf_dw[GENX(3DSTATE_SF_length)];
|
||||
struct GENX(3DSTATE_SF) sf = {
|
||||
GENX(3DSTATE_SF_header),
|
||||
};
|
||||
#if GFX_VER == 8
|
||||
if (cmd_buffer->device->info->platform == INTEL_PLATFORM_CHV) {
|
||||
sf.CHVLineWidth = dyn->rs.line.width;
|
||||
} else {
|
||||
sf.LineWidth = dyn->rs.line.width;
|
||||
}
|
||||
#else
|
||||
sf.LineWidth = dyn->rs.line.width,
|
||||
#endif
|
||||
GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
|
||||
anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gfx8.sf);
|
||||
}
|
||||
|
||||
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CULL_MODE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_FRONT_FACE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_ENABLE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS)) {
|
||||
/* Take dynamic primitive topology in to account with
|
||||
* 3DSTATE_RASTER::APIMode
|
||||
* 3DSTATE_RASTER::DXMultisampleRasterizationEnable
|
||||
* 3DSTATE_RASTER::AntialiasingEnable
|
||||
*/
|
||||
uint32_t api_mode = 0;
|
||||
bool msaa_raster_enable = false;
|
||||
|
||||
VkPolygonMode dynamic_raster_mode =
|
||||
genX(raster_polygon_mode)(cmd_buffer->state.gfx.pipeline,
|
||||
dyn->ia.primitive_topology);
|
||||
|
||||
genX(rasterization_mode)(dynamic_raster_mode,
|
||||
pipeline->line_mode, dyn->rs.line.width,
|
||||
&api_mode, &msaa_raster_enable);
|
||||
|
||||
bool aa_enable = anv_rasterization_aa_mode(dynamic_raster_mode,
|
||||
pipeline->line_mode);
|
||||
|
||||
uint32_t raster_dw[GENX(3DSTATE_RASTER_length)];
|
||||
struct GENX(3DSTATE_RASTER) raster = {
|
||||
GENX(3DSTATE_RASTER_header),
|
||||
.APIMode = api_mode,
|
||||
.DXMultisampleRasterizationEnable = msaa_raster_enable,
|
||||
.AntialiasingEnable = aa_enable,
|
||||
.CullMode = genX(vk_to_intel_cullmode)[dyn->rs.cull_mode],
|
||||
.FrontWinding = genX(vk_to_intel_front_face)[dyn->rs.front_face],
|
||||
.GlobalDepthOffsetEnableSolid = dyn->rs.depth_bias.enable,
|
||||
.GlobalDepthOffsetEnableWireframe = dyn->rs.depth_bias.enable,
|
||||
.GlobalDepthOffsetEnablePoint = dyn->rs.depth_bias.enable,
|
||||
.GlobalDepthOffsetConstant = dyn->rs.depth_bias.constant,
|
||||
.GlobalDepthOffsetScale = dyn->rs.depth_bias.slope,
|
||||
.GlobalDepthOffsetClamp = dyn->rs.depth_bias.clamp,
|
||||
};
|
||||
GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster);
|
||||
anv_batch_emit_merge(&cmd_buffer->batch, raster_dw,
|
||||
pipeline->gfx8.raster);
|
||||
}
|
||||
|
||||
/* Stencil reference values moved from COLOR_CALC_STATE in gfx8 to
|
||||
* 3DSTATE_WM_DEPTH_STENCIL in gfx9. That means the dirty bits gets split
|
||||
* across different state packets for gfx8 and gfx9. We handle that by
|
||||
* using a big old #if switch here.
|
||||
*/
|
||||
#if GFX_VER == 8
|
||||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
|
||||
struct anv_state cc_state =
|
||||
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
|
||||
GENX(COLOR_CALC_STATE_length) * 4,
|
||||
64);
|
||||
struct GENX(COLOR_CALC_STATE) cc = {
|
||||
.BlendConstantColorRed = dyn->cb.blend_constants[0],
|
||||
.BlendConstantColorGreen = dyn->cb.blend_constants[1],
|
||||
.BlendConstantColorBlue = dyn->cb.blend_constants[2],
|
||||
.BlendConstantColorAlpha = dyn->cb.blend_constants[3],
|
||||
.StencilReferenceValue = dyn->ds.stencil.front.reference & 0xff,
|
||||
.BackfaceStencilReferenceValue = dyn->ds.stencil.back.reference & 0xff,
|
||||
};
|
||||
GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
|
||||
ccp.ColorCalcStatePointer = cc_state.offset;
|
||||
ccp.ColorCalcStatePointerValid = true;
|
||||
}
|
||||
}
|
||||
|
||||
if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
|
||||
ANV_CMD_DIRTY_RENDER_TARGETS)) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK)) {
|
||||
VkImageAspectFlags ds_aspects = 0;
|
||||
if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED)
|
||||
ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED)
|
||||
ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
|
||||
struct vk_depth_stencil_state opt_ds = dyn->ds;
|
||||
vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) {
|
||||
ds.DoubleSidedStencilEnable = true;
|
||||
|
||||
ds.StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff;
|
||||
ds.StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff;
|
||||
|
||||
ds.BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff;
|
||||
ds.BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff;
|
||||
|
||||
ds.DepthTestEnable = opt_ds.depth.test_enable;
|
||||
ds.DepthBufferWriteEnable = opt_ds.depth.write_enable;
|
||||
ds.DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op];
|
||||
ds.StencilTestEnable = opt_ds.stencil.test_enable;
|
||||
ds.StencilBufferWriteEnable = opt_ds.stencil.write_enable;
|
||||
ds.StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail];
|
||||
ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass];
|
||||
ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail];
|
||||
ds.StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare];
|
||||
ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail];
|
||||
ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass];
|
||||
ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail];
|
||||
ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare];
|
||||
}
|
||||
|
||||
const bool pma = want_depth_pma_fix(cmd_buffer, &opt_ds);
|
||||
genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma);
|
||||
}
|
||||
#else
|
||||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
|
||||
struct anv_state cc_state =
|
||||
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
|
||||
GENX(COLOR_CALC_STATE_length) * 4,
|
||||
64);
|
||||
struct GENX(COLOR_CALC_STATE) cc = {
|
||||
.BlendConstantColorRed = dyn->cb.blend_constants[0],
|
||||
.BlendConstantColorGreen = dyn->cb.blend_constants[1],
|
||||
.BlendConstantColorBlue = dyn->cb.blend_constants[2],
|
||||
.BlendConstantColorAlpha = dyn->cb.blend_constants[3],
|
||||
};
|
||||
GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
|
||||
ccp.ColorCalcStatePointer = cc_state.offset;
|
||||
ccp.ColorCalcStatePointerValid = true;
|
||||
}
|
||||
}
|
||||
|
||||
if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
|
||||
ANV_CMD_DIRTY_RENDER_TARGETS)) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE)) {
|
||||
VkImageAspectFlags ds_aspects = 0;
|
||||
if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED)
|
||||
ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED)
|
||||
ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
|
||||
struct vk_depth_stencil_state opt_ds = dyn->ds;
|
||||
vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) {
|
||||
ds.DoubleSidedStencilEnable = true;
|
||||
|
||||
ds.StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff;
|
||||
ds.StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff;
|
||||
|
||||
ds.BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff;
|
||||
ds.BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff;
|
||||
|
||||
ds.StencilReferenceValue = opt_ds.stencil.front.reference & 0xff;
|
||||
ds.BackfaceStencilReferenceValue = opt_ds.stencil.back.reference & 0xff;
|
||||
|
||||
ds.DepthTestEnable = opt_ds.depth.test_enable;
|
||||
ds.DepthBufferWriteEnable = opt_ds.depth.write_enable;
|
||||
ds.DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op];
|
||||
ds.StencilTestEnable = opt_ds.stencil.test_enable;
|
||||
ds.StencilBufferWriteEnable = opt_ds.stencil.write_enable;
|
||||
ds.StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail];
|
||||
ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass];
|
||||
ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail];
|
||||
ds.StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare];
|
||||
ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail];
|
||||
ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass];
|
||||
ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail];
|
||||
ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare];
|
||||
}
|
||||
|
||||
const bool pma = want_stencil_pma_fix(cmd_buffer, &opt_ds);
|
||||
genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if GFX_VER >= 12
|
||||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_BOUNDS)) {
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
|
||||
db.DepthBoundsTestEnable = dyn->ds.depth.bounds_test.enable;
|
||||
db.DepthBoundsTestMinValue = dyn->ds.depth.bounds_test.min;
|
||||
db.DepthBoundsTestMaxValue = dyn->ds.depth.bounds_test.max;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE)) {
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_LINE_STIPPLE), ls) {
|
||||
ls.LineStipplePattern = dyn->rs.line.stipple.pattern;
|
||||
ls.LineStippleInverseRepeatCount =
|
||||
1.0f / MAX2(1, dyn->rs.line.stipple.factor);
|
||||
ls.LineStippleRepeatCount = dyn->rs.line.stipple.factor;
|
||||
}
|
||||
}
|
||||
|
||||
if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
|
||||
ANV_CMD_DIRTY_INDEX_BUFFER)) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) {
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) {
|
||||
#if GFX_VERx10 >= 125
|
||||
vf.GeometryDistributionEnable = true;
|
||||
#endif
|
||||
vf.IndexedDrawCutIndexEnable = dyn->ia.primitive_restart_enable;
|
||||
vf.CutIndex = cmd_buffer->state.gfx.restart_index;
|
||||
}
|
||||
}
|
||||
|
||||
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_INDEX_BUFFER) {
|
||||
struct anv_buffer *buffer = cmd_buffer->state.gfx.index_buffer;
|
||||
uint32_t offset = cmd_buffer->state.gfx.index_offset;
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
|
||||
ib.IndexFormat = cmd_buffer->state.gfx.index_type;
|
||||
ib.MOCS = anv_mocs(cmd_buffer->device,
|
||||
buffer->address.bo,
|
||||
ISL_SURF_USAGE_INDEX_BUFFER_BIT);
|
||||
#if GFX_VER >= 12
|
||||
ib.L3BypassDisable = true;
|
||||
#endif
|
||||
ib.BufferStartingAddress = anv_address_add(buffer->address, offset);
|
||||
ib.BufferSize = vk_buffer_range(&buffer->vk, offset,
|
||||
VK_WHOLE_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) {
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VFG), vfg) {
|
||||
/* If 3DSTATE_TE: TE Enable == 1 then RR_STRICT else RR_FREE*/
|
||||
vfg.DistributionMode =
|
||||
anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL) ? RR_STRICT :
|
||||
RR_FREE;
|
||||
vfg.DistributionGranularity = BatchLevelGranularity;
|
||||
/* Wa_14014890652 */
|
||||
if (intel_device_info_is_dg2(cmd_buffer->device->info))
|
||||
vfg.GranularityThresholdDisable = 1;
|
||||
vfg.ListCutIndexEnable = dyn->ia.primitive_restart_enable;
|
||||
/* 192 vertices for TRILIST_ADJ */
|
||||
vfg.ListNBatchSizeScale = 0;
|
||||
/* Batch size of 384 vertices */
|
||||
vfg.List3BatchSizeScale = 2;
|
||||
/* Batch size of 128 vertices */
|
||||
vfg.List2BatchSizeScale = 1;
|
||||
/* Batch size of 128 vertices */
|
||||
vfg.List1BatchSizeScale = 2;
|
||||
/* Batch size of 256 vertices for STRIP topologies */
|
||||
vfg.StripBatchSizeScale = 3;
|
||||
/* 192 control points for PATCHLIST_3 */
|
||||
vfg.PatchBatchSizeScale = 1;
|
||||
/* 192 control points for PATCHLIST_3 */
|
||||
vfg.PatchBatchSizeMultiplier = 31;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations &&
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS))
|
||||
genX(emit_sample_pattern)(&cmd_buffer->batch, dyn->ms.sample_locations);
|
||||
|
||||
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
|
||||
/* 3DSTATE_WM in the hope we can avoid spawning fragment shaders
|
||||
* threads.
|
||||
*/
|
||||
uint32_t wm_dwords[GENX(3DSTATE_WM_length)];
|
||||
struct GENX(3DSTATE_WM) wm = {
|
||||
GENX(3DSTATE_WM_header),
|
||||
|
||||
.ForceThreadDispatchEnable = anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) &&
|
||||
(pipeline->force_fragment_thread_dispatch ||
|
||||
anv_cmd_buffer_all_color_write_masked(cmd_buffer)) ?
|
||||
ForceON : 0,
|
||||
};
|
||||
GENX(3DSTATE_WM_pack)(NULL, wm_dwords, &wm);
|
||||
|
||||
anv_batch_emit_merge(&cmd_buffer->batch, wm_dwords, pipeline->gfx8.wm);
|
||||
}
|
||||
|
||||
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
|
||||
const uint8_t color_writes = dyn->cb.color_write_enables;
|
||||
const struct anv_cmd_graphics_state *state = &cmd_buffer->state.gfx;
|
||||
bool has_writeable_rt =
|
||||
anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) &&
|
||||
(color_writes & ((1u << state->color_att_count) - 1)) != 0;
|
||||
|
||||
/* 3DSTATE_PS_BLEND to be consistent with the rest of the
|
||||
* BLEND_STATE_ENTRY.
|
||||
*/
|
||||
uint32_t ps_blend_dwords[GENX(3DSTATE_PS_BLEND_length)];
|
||||
struct GENX(3DSTATE_PS_BLEND) ps_blend = {
|
||||
GENX(3DSTATE_PS_BLEND_header),
|
||||
.HasWriteableRT = has_writeable_rt,
|
||||
};
|
||||
GENX(3DSTATE_PS_BLEND_pack)(NULL, ps_blend_dwords, &ps_blend);
|
||||
anv_batch_emit_merge(&cmd_buffer->batch, ps_blend_dwords,
|
||||
pipeline->gfx8.ps_blend);
|
||||
|
||||
uint32_t blend_dws[GENX(BLEND_STATE_length) +
|
||||
MAX_RTS * GENX(BLEND_STATE_ENTRY_length)];
|
||||
uint32_t *dws = blend_dws;
|
||||
memset(blend_dws, 0, sizeof(blend_dws));
|
||||
|
||||
/* Skip this part */
|
||||
dws += GENX(BLEND_STATE_length);
|
||||
|
||||
for (uint32_t i = 0; i < MAX_RTS; i++) {
|
||||
/* Disable anything above the current number of color attachments. */
|
||||
bool write_disabled = i >= cmd_buffer->state.gfx.color_att_count ||
|
||||
(color_writes & BITFIELD_BIT(i)) == 0;
|
||||
struct GENX(BLEND_STATE_ENTRY) entry = {
|
||||
.WriteDisableAlpha = write_disabled ||
|
||||
(pipeline->color_comp_writes[i] &
|
||||
VK_COLOR_COMPONENT_A_BIT) == 0,
|
||||
.WriteDisableRed = write_disabled ||
|
||||
(pipeline->color_comp_writes[i] &
|
||||
VK_COLOR_COMPONENT_R_BIT) == 0,
|
||||
.WriteDisableGreen = write_disabled ||
|
||||
(pipeline->color_comp_writes[i] &
|
||||
VK_COLOR_COMPONENT_G_BIT) == 0,
|
||||
.WriteDisableBlue = write_disabled ||
|
||||
(pipeline->color_comp_writes[i] &
|
||||
VK_COLOR_COMPONENT_B_BIT) == 0,
|
||||
.LogicOpFunction = genX(vk_to_intel_logic_op)[dyn->cb.logic_op],
|
||||
};
|
||||
GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry);
|
||||
dws += GENX(BLEND_STATE_ENTRY_length);
|
||||
}
|
||||
|
||||
uint32_t num_dwords = GENX(BLEND_STATE_length) +
|
||||
GENX(BLEND_STATE_ENTRY_length) * MAX_RTS;
|
||||
|
||||
struct anv_state blend_states =
|
||||
anv_cmd_buffer_merge_dynamic(cmd_buffer, blend_dws,
|
||||
pipeline->gfx8.blend_state, num_dwords, 64);
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
|
||||
bsp.BlendStatePointer = blend_states.offset;
|
||||
bsp.BlendStatePointerValid = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* When we're done, there is no more dirty gfx state. */
|
||||
vk_dynamic_graphics_state_clear_dirty(&cmd_buffer->vk.dynamic_graphics_state);
|
||||
cmd_buffer->state.gfx.dirty = 0;
|
||||
}
|
265
src/intel/vulkan_hasvk/meson.build
Normal file
265
src/intel/vulkan_hasvk/meson.build
Normal file
@ -0,0 +1,265 @@
|
||||
# Copyright © 2017-2019 Intel Corporation
|
||||
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
anv_hasvk_entrypoints = custom_target(
|
||||
'anv_hasvk_entrypoints',
|
||||
input : [vk_entrypoints_gen, vk_api_xml],
|
||||
output : ['anv_entrypoints.h', 'anv_entrypoints.c'],
|
||||
command : [
|
||||
prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak',
|
||||
'--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'anv',
|
||||
'--device-prefix', 'gfx7', '--device-prefix', 'gfx75',
|
||||
'--device-prefix', 'gfx8', '--device-prefix', 'gfx9',
|
||||
'--device-prefix', 'gfx11', '--device-prefix', 'gfx12',
|
||||
'--device-prefix', 'gfx125',
|
||||
],
|
||||
depend_files : vk_entrypoints_gen_depend_files,
|
||||
)
|
||||
|
||||
intel_hasvk_icd = custom_target(
|
||||
'intel_hasvk_icd',
|
||||
input : [vk_icd_gen, vk_api_xml],
|
||||
output : 'intel_hasvk_icd.@0@.json'.format(host_machine.cpu()),
|
||||
command : [
|
||||
prog_python, '@INPUT0@',
|
||||
'--api-version', '1.3', '--xml', '@INPUT1@',
|
||||
'--lib-path', join_paths(get_option('prefix'), get_option('libdir'),
|
||||
'libvulkan_intel_hasvk.so'),
|
||||
'--out', '@OUTPUT@',
|
||||
],
|
||||
build_by_default : true,
|
||||
install_dir : with_vulkan_icd_dir,
|
||||
install : true,
|
||||
)
|
||||
|
||||
if meson.version().version_compare('>= 0.58')
|
||||
_dev_icdname = 'intel_hasvk_devenv_icd.@0@.json'.format(host_machine.cpu())
|
||||
custom_target(
|
||||
'intel_hasvk_devenv_icd',
|
||||
input : [vk_icd_gen, vk_api_xml],
|
||||
output : _dev_icdname,
|
||||
command : [
|
||||
prog_python, '@INPUT0@',
|
||||
'--api-version', '1.3', '--xml', '@INPUT1@',
|
||||
'--lib-path', meson.current_build_dir() / 'libvulkan_intel_hasvk.so',
|
||||
'--out', '@OUTPUT@',
|
||||
],
|
||||
build_by_default : true,
|
||||
)
|
||||
|
||||
devenv.append('VK_ICD_FILENAMES', meson.current_build_dir() / _dev_icdname)
|
||||
endif
|
||||
|
||||
libanv_per_hw_ver_libs = []
|
||||
anv_per_hw_ver_files = files(
|
||||
'genX_blorp_exec.c',
|
||||
'genX_cmd_buffer.c',
|
||||
'genX_gpu_memcpy.c',
|
||||
'genX_pipeline.c',
|
||||
'genX_query.c',
|
||||
'genX_state.c',
|
||||
)
|
||||
foreach g : [['70', ['gfx7_cmd_buffer.c']], ['75', ['gfx7_cmd_buffer.c']],
|
||||
['80', ['gfx8_cmd_buffer.c']], ['90', ['gfx8_cmd_buffer.c']],
|
||||
['110', ['gfx8_cmd_buffer.c']], ['120', ['gfx8_cmd_buffer.c']],
|
||||
['125', ['gfx8_cmd_buffer.c']]]
|
||||
_gfx_ver = g[0]
|
||||
libanv_per_hw_ver_libs += static_library(
|
||||
'anv_per_hw_ver@0@'.format(_gfx_ver),
|
||||
[anv_per_hw_ver_files, g[1], anv_hasvk_entrypoints[0]],
|
||||
include_directories : [
|
||||
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_compiler, inc_intel,
|
||||
],
|
||||
c_args : [
|
||||
no_override_init_args, c_sse2_args,
|
||||
'-DGFX_VERx10=@0@'.format(_gfx_ver),
|
||||
],
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
dependencies : [
|
||||
dep_libdrm, dep_valgrind, idep_nir_headers, idep_genxml,
|
||||
idep_vulkan_util_headers, idep_vulkan_wsi_headers,
|
||||
idep_vulkan_runtime_headers, idep_intel_driver_ds_headers,
|
||||
],
|
||||
)
|
||||
endforeach
|
||||
|
||||
libanv_files = files(
|
||||
'anv_acceleration_structure.c',
|
||||
'anv_allocator.c',
|
||||
'anv_android.h',
|
||||
'anv_batch_chain.c',
|
||||
'anv_blorp.c',
|
||||
'anv_bo_sync.c',
|
||||
'anv_cmd_buffer.c',
|
||||
'anv_descriptor_set.c',
|
||||
'anv_device.c',
|
||||
'anv_formats.c',
|
||||
'anv_genX.h',
|
||||
'anv_image.c',
|
||||
'anv_measure.c',
|
||||
'anv_measure.h',
|
||||
'anv_nir.h',
|
||||
'anv_nir_add_base_work_group_id.c',
|
||||
'anv_nir_apply_pipeline_layout.c',
|
||||
'anv_nir_compute_push_layout.c',
|
||||
'anv_nir_lower_multiview.c',
|
||||
'anv_nir_lower_ubo_loads.c',
|
||||
'anv_nir_lower_ycbcr_textures.c',
|
||||
'anv_perf.c',
|
||||
'anv_pipeline.c',
|
||||
'anv_pipeline_cache.c',
|
||||
'anv_private.h',
|
||||
'anv_queue.c',
|
||||
'anv_util.c',
|
||||
'anv_utrace.c',
|
||||
'anv_wsi.c',
|
||||
)
|
||||
|
||||
anv_deps = [
|
||||
dep_libdrm,
|
||||
dep_valgrind,
|
||||
idep_genxml,
|
||||
idep_nir_headers,
|
||||
idep_vulkan_util_headers,
|
||||
idep_vulkan_runtime_headers,
|
||||
idep_vulkan_wsi_headers,
|
||||
]
|
||||
anv_flags = [
|
||||
no_override_init_args,
|
||||
c_sse2_args,
|
||||
]
|
||||
|
||||
anv_cpp_flags = []
|
||||
|
||||
if with_platform_x11
|
||||
anv_deps += dep_xcb_dri3
|
||||
endif
|
||||
|
||||
if with_platform_wayland
|
||||
anv_deps += dep_wayland_client
|
||||
endif
|
||||
|
||||
if with_xlib_lease
|
||||
anv_deps += [dep_xlib_xrandr]
|
||||
endif
|
||||
|
||||
if with_platform_android
|
||||
libanv_files += files('anv_android.c')
|
||||
else
|
||||
libanv_files += files('anv_android_stubs.c')
|
||||
endif
|
||||
|
||||
anv_deps += idep_intel_driver_ds_headers
|
||||
|
||||
libanv_hasvk_common = static_library(
|
||||
'anv_hasvk_common',
|
||||
[
|
||||
libanv_files, anv_hasvk_entrypoints, sha1_h,
|
||||
gen_xml_pack,
|
||||
],
|
||||
include_directories : [
|
||||
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
|
||||
inc_util,
|
||||
],
|
||||
c_args : anv_flags,
|
||||
cpp_args : anv_cpp_flags,
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
dependencies : anv_deps,
|
||||
)
|
||||
|
||||
libvulkan_intel_hasvk = shared_library(
|
||||
'vulkan_intel_hasvk',
|
||||
[files('anv_gem.c'), anv_hasvk_entrypoints[0]],
|
||||
include_directories : [
|
||||
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
|
||||
],
|
||||
link_whole : [libanv_hasvk_common, libanv_per_hw_ver_libs],
|
||||
link_with : [
|
||||
libintel_compiler, libintel_dev, libisl, libblorp, libintel_perf,
|
||||
],
|
||||
dependencies : [
|
||||
dep_thread, dep_dl, dep_m, anv_deps, idep_libintel_common,
|
||||
idep_nir, idep_genxml, idep_vulkan_util, idep_vulkan_wsi,
|
||||
idep_vulkan_runtime, idep_mesautil, idep_xmlconfig,
|
||||
idep_intel_driver_ds,
|
||||
],
|
||||
c_args : anv_flags,
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
link_args : [ld_args_build_id, ld_args_bsymbolic, ld_args_gc_sections],
|
||||
install : true,
|
||||
)
|
||||
|
||||
if with_symbols_check
|
||||
test(
|
||||
'anv symbols check',
|
||||
symbols_check,
|
||||
args : [
|
||||
'--lib', libvulkan_intel_hasvk,
|
||||
'--symbols-file', vulkan_icd_symbols,
|
||||
symbols_check_args,
|
||||
],
|
||||
suite : ['intel'],
|
||||
)
|
||||
endif
|
||||
|
||||
if with_tests
|
||||
libvulkan_intel_hasvk_test = static_library(
|
||||
'vulkan_intel_hasvk_test',
|
||||
[files('anv_gem_stubs.c'), anv_hasvk_entrypoints[0]],
|
||||
include_directories : [
|
||||
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
|
||||
],
|
||||
link_whole : libanv_hasvk_common,
|
||||
link_with : [
|
||||
libanv_per_hw_ver_libs, libintel_compiler, libintel_common, libintel_dev,
|
||||
libisl, libblorp, libintel_perf,
|
||||
],
|
||||
dependencies : [
|
||||
dep_thread, dep_dl, dep_m, anv_deps,
|
||||
idep_nir, idep_vulkan_util, idep_vulkan_wsi, idep_vulkan_runtime,
|
||||
idep_mesautil,
|
||||
],
|
||||
c_args : anv_flags,
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
)
|
||||
|
||||
foreach t : ['block_pool_no_free', 'block_pool_grow_first',
|
||||
'state_pool_no_free', 'state_pool_free_list_only',
|
||||
'state_pool', 'state_pool_padding']
|
||||
test(
|
||||
'anv_hasvk_@0@'.format(t),
|
||||
executable(
|
||||
t,
|
||||
['tests/@0@.c'.format(t), anv_hasvk_entrypoints[0]],
|
||||
c_args : [ c_sse2_args ],
|
||||
link_with : libvulkan_intel_hasvk_test,
|
||||
dependencies : [
|
||||
dep_libdrm, dep_thread, dep_m, dep_valgrind,
|
||||
idep_vulkan_util, idep_vulkan_wsi_headers,
|
||||
idep_vulkan_runtime, idep_intel_driver_ds,
|
||||
],
|
||||
include_directories : [
|
||||
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
|
||||
],
|
||||
),
|
||||
suite : ['intel'],
|
||||
)
|
||||
endforeach
|
||||
endif
|
67
src/intel/vulkan_hasvk/tests/block_pool_grow_first.c
Normal file
67
src/intel/vulkan_hasvk/tests/block_pool_grow_first.c
Normal file
@ -0,0 +1,67 @@
|
||||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_private.h"
|
||||
#include "test_common.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
struct anv_physical_device physical_device = {
|
||||
.use_softpin = true,
|
||||
};
|
||||
struct anv_device device = {};
|
||||
struct anv_block_pool pool;
|
||||
|
||||
/* Create a pool with initial size smaller than the block allocated, so
|
||||
* that it must grow in the first allocation.
|
||||
*/
|
||||
const uint32_t block_size = 16 * 1024;
|
||||
const uint32_t initial_size = block_size / 2;
|
||||
|
||||
anv_device_set_physical(&device, &physical_device);
|
||||
pthread_mutex_init(&device.mutex, NULL);
|
||||
anv_bo_cache_init(&device.bo_cache, &device);
|
||||
anv_block_pool_init(&pool, &device, "test", 4096, initial_size);
|
||||
ASSERT(pool.size == initial_size);
|
||||
|
||||
uint32_t padding;
|
||||
int32_t offset = anv_block_pool_alloc(&pool, block_size, &padding);
|
||||
|
||||
/* Pool will have grown at least space to fit the new allocation. */
|
||||
ASSERT(pool.size > initial_size);
|
||||
ASSERT(pool.size >= initial_size + block_size);
|
||||
|
||||
/* The whole initial size is considered padding and the allocation should be
|
||||
* right next to it.
|
||||
*/
|
||||
ASSERT(padding == initial_size);
|
||||
ASSERT(offset == initial_size);
|
||||
|
||||
/* Use the memory to ensure it is valid. */
|
||||
void *map = anv_block_pool_map(&pool, offset, block_size);
|
||||
memset(map, 22, block_size);
|
||||
|
||||
anv_block_pool_finish(&pool);
|
||||
anv_bo_cache_finish(&device.bo_cache);
|
||||
pthread_mutex_destroy(&device.mutex);
|
||||
}
|
153
src/intel/vulkan_hasvk/tests/block_pool_no_free.c
Normal file
153
src/intel/vulkan_hasvk/tests/block_pool_no_free.c
Normal file
@ -0,0 +1,153 @@
|
||||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <pthread.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
#include "test_common.h"
|
||||
|
||||
#define NUM_THREADS 16
|
||||
#define BLOCKS_PER_THREAD 1024
|
||||
#define NUM_RUNS 64
|
||||
|
||||
struct job {
|
||||
pthread_t thread;
|
||||
unsigned id;
|
||||
struct anv_block_pool *pool;
|
||||
int32_t blocks[BLOCKS_PER_THREAD];
|
||||
int32_t back_blocks[BLOCKS_PER_THREAD];
|
||||
} jobs[NUM_THREADS];
|
||||
|
||||
|
||||
static void *alloc_blocks(void *_job)
|
||||
{
|
||||
struct job *job = _job;
|
||||
uint32_t job_id = job - jobs;
|
||||
uint32_t block_size = 16 * ((job_id % 4) + 1);
|
||||
int32_t block, *data;
|
||||
|
||||
for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) {
|
||||
block = anv_block_pool_alloc(job->pool, block_size, NULL);
|
||||
data = anv_block_pool_map(job->pool, block, block_size);
|
||||
*data = block;
|
||||
ASSERT(block >= 0);
|
||||
job->blocks[i] = block;
|
||||
|
||||
block = anv_block_pool_alloc_back(job->pool, block_size);
|
||||
data = anv_block_pool_map(job->pool, block, block_size);
|
||||
*data = block;
|
||||
ASSERT(block < 0);
|
||||
job->back_blocks[i] = -block;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) {
|
||||
block = job->blocks[i];
|
||||
data = anv_block_pool_map(job->pool, block, block_size);
|
||||
ASSERT(*data == block);
|
||||
|
||||
block = -job->back_blocks[i];
|
||||
data = anv_block_pool_map(job->pool, block, block_size);
|
||||
ASSERT(*data == block);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void validate_monotonic(int32_t **blocks)
|
||||
{
|
||||
/* A list of indices, one per thread */
|
||||
unsigned next[NUM_THREADS];
|
||||
memset(next, 0, sizeof(next));
|
||||
|
||||
int highest = -1;
|
||||
while (true) {
|
||||
/* First, we find which thread has the lowest next element */
|
||||
int32_t thread_min = INT32_MAX;
|
||||
int min_thread_idx = -1;
|
||||
for (unsigned i = 0; i < NUM_THREADS; i++) {
|
||||
if (next[i] >= BLOCKS_PER_THREAD)
|
||||
continue;
|
||||
|
||||
if (thread_min > blocks[i][next[i]]) {
|
||||
thread_min = blocks[i][next[i]];
|
||||
min_thread_idx = i;
|
||||
}
|
||||
}
|
||||
|
||||
/* The only way this can happen is if all of the next[] values are at
|
||||
* BLOCKS_PER_THREAD, in which case, we're done.
|
||||
*/
|
||||
if (thread_min == INT32_MAX)
|
||||
break;
|
||||
|
||||
/* That next element had better be higher than the previous highest */
|
||||
ASSERT(blocks[min_thread_idx][next[min_thread_idx]] > highest);
|
||||
|
||||
highest = blocks[min_thread_idx][next[min_thread_idx]];
|
||||
next[min_thread_idx]++;
|
||||
}
|
||||
}
|
||||
|
||||
static void run_test()
|
||||
{
|
||||
struct anv_physical_device physical_device = {
|
||||
.use_relocations = true,
|
||||
};
|
||||
struct anv_device device = {};
|
||||
struct anv_block_pool pool;
|
||||
|
||||
anv_device_set_physical(&device, &physical_device);
|
||||
pthread_mutex_init(&device.mutex, NULL);
|
||||
anv_bo_cache_init(&device.bo_cache, &device);
|
||||
anv_block_pool_init(&pool, &device, "test", 4096, 4096);
|
||||
|
||||
for (unsigned i = 0; i < NUM_THREADS; i++) {
|
||||
jobs[i].pool = &pool;
|
||||
jobs[i].id = i;
|
||||
pthread_create(&jobs[i].thread, NULL, alloc_blocks, &jobs[i]);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < NUM_THREADS; i++)
|
||||
pthread_join(jobs[i].thread, NULL);
|
||||
|
||||
/* Validate that the block allocations were monotonic */
|
||||
int32_t *block_ptrs[NUM_THREADS];
|
||||
for (unsigned i = 0; i < NUM_THREADS; i++)
|
||||
block_ptrs[i] = jobs[i].blocks;
|
||||
validate_monotonic(block_ptrs);
|
||||
|
||||
/* Validate that the back block allocations were monotonic */
|
||||
for (unsigned i = 0; i < NUM_THREADS; i++)
|
||||
block_ptrs[i] = jobs[i].back_blocks;
|
||||
validate_monotonic(block_ptrs);
|
||||
|
||||
anv_block_pool_finish(&pool);
|
||||
anv_bo_cache_finish(&device.bo_cache);
|
||||
pthread_mutex_destroy(&device.mutex);
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
for (unsigned i = 0; i < NUM_RUNS; i++)
|
||||
run_test();
|
||||
}
|
59
src/intel/vulkan_hasvk/tests/state_pool.c
Normal file
59
src/intel/vulkan_hasvk/tests/state_pool.c
Normal file
@ -0,0 +1,59 @@
|
||||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <pthread.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
#include "test_common.h"
|
||||
|
||||
#define NUM_THREADS 8
|
||||
#define STATES_PER_THREAD_LOG2 10
|
||||
#define STATES_PER_THREAD (1 << STATES_PER_THREAD_LOG2)
|
||||
#define NUM_RUNS 64
|
||||
|
||||
#include "state_pool_test_helper.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
struct anv_physical_device physical_device = { };
|
||||
struct anv_device device = {};
|
||||
struct anv_state_pool state_pool;
|
||||
|
||||
anv_device_set_physical(&device, &physical_device);
|
||||
pthread_mutex_init(&device.mutex, NULL);
|
||||
anv_bo_cache_init(&device.bo_cache, &device);
|
||||
|
||||
for (unsigned i = 0; i < NUM_RUNS; i++) {
|
||||
anv_state_pool_init(&state_pool, &device, "test", 4096, 0, 256);
|
||||
|
||||
/* Grab one so a zero offset is impossible */
|
||||
anv_state_pool_alloc(&state_pool, 16, 16);
|
||||
|
||||
run_state_pool_test(&state_pool);
|
||||
|
||||
anv_state_pool_finish(&state_pool);
|
||||
}
|
||||
|
||||
anv_bo_cache_finish(&device.bo_cache);
|
||||
pthread_mutex_destroy(&device.mutex);
|
||||
}
|
68
src/intel/vulkan_hasvk/tests/state_pool_free_list_only.c
Normal file
68
src/intel/vulkan_hasvk/tests/state_pool_free_list_only.c
Normal file
@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <pthread.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
#include "test_common.h"
|
||||
|
||||
#define NUM_THREADS 8
|
||||
#define STATES_PER_THREAD_LOG2 12
|
||||
#define STATES_PER_THREAD (1 << STATES_PER_THREAD_LOG2)
|
||||
|
||||
#include "state_pool_test_helper.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
struct anv_physical_device physical_device = { };
|
||||
struct anv_device device = {};
|
||||
struct anv_state_pool state_pool;
|
||||
|
||||
anv_device_set_physical(&device, &physical_device);
|
||||
pthread_mutex_init(&device.mutex, NULL);
|
||||
anv_bo_cache_init(&device.bo_cache, &device);
|
||||
anv_state_pool_init(&state_pool, &device, "test", 4096, 0, 4096);
|
||||
|
||||
/* Grab one so a zero offset is impossible */
|
||||
anv_state_pool_alloc(&state_pool, 16, 16);
|
||||
|
||||
/* Grab and return enough states that the state pool test below won't
|
||||
* actually ever resize anything.
|
||||
*/
|
||||
{
|
||||
struct anv_state states[NUM_THREADS * STATES_PER_THREAD];
|
||||
for (unsigned i = 0; i < NUM_THREADS * STATES_PER_THREAD; i++) {
|
||||
states[i] = anv_state_pool_alloc(&state_pool, 16, 16);
|
||||
ASSERT(states[i].offset != 0);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < NUM_THREADS * STATES_PER_THREAD; i++)
|
||||
anv_state_pool_free(&state_pool, states[i]);
|
||||
}
|
||||
|
||||
run_state_pool_test(&state_pool);
|
||||
|
||||
anv_state_pool_finish(&state_pool);
|
||||
anv_bo_cache_finish(&device.bo_cache);
|
||||
pthread_mutex_destroy(&device.mutex);
|
||||
}
|
119
src/intel/vulkan_hasvk/tests/state_pool_no_free.c
Normal file
119
src/intel/vulkan_hasvk/tests/state_pool_no_free.c
Normal file
@ -0,0 +1,119 @@
|
||||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <pthread.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
#include "test_common.h"
|
||||
|
||||
#define NUM_THREADS 16
|
||||
#define STATES_PER_THREAD 1024
|
||||
#define NUM_RUNS 64
|
||||
|
||||
struct job {
|
||||
pthread_t thread;
|
||||
unsigned id;
|
||||
struct anv_state_pool *pool;
|
||||
uint32_t offsets[STATES_PER_THREAD];
|
||||
} jobs[NUM_THREADS];
|
||||
|
||||
pthread_barrier_t barrier;
|
||||
|
||||
static void *alloc_states(void *_job)
|
||||
{
|
||||
struct job *job = _job;
|
||||
|
||||
pthread_barrier_wait(&barrier);
|
||||
|
||||
for (unsigned i = 0; i < STATES_PER_THREAD; i++) {
|
||||
struct anv_state state = anv_state_pool_alloc(job->pool, 16, 16);
|
||||
job->offsets[i] = state.offset;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void run_test()
|
||||
{
|
||||
struct anv_physical_device physical_device = { };
|
||||
struct anv_device device = {};
|
||||
struct anv_state_pool state_pool;
|
||||
|
||||
anv_device_set_physical(&device, &physical_device);
|
||||
pthread_mutex_init(&device.mutex, NULL);
|
||||
anv_bo_cache_init(&device.bo_cache, &device);
|
||||
anv_state_pool_init(&state_pool, &device, "test", 4096, 0, 64);
|
||||
|
||||
pthread_barrier_init(&barrier, NULL, NUM_THREADS);
|
||||
|
||||
for (unsigned i = 0; i < NUM_THREADS; i++) {
|
||||
jobs[i].pool = &state_pool;
|
||||
jobs[i].id = i;
|
||||
pthread_create(&jobs[i].thread, NULL, alloc_states, &jobs[i]);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < NUM_THREADS; i++)
|
||||
pthread_join(jobs[i].thread, NULL);
|
||||
|
||||
/* A list of indices, one per thread */
|
||||
unsigned next[NUM_THREADS];
|
||||
memset(next, 0, sizeof(next));
|
||||
|
||||
int highest = -1;
|
||||
while (true) {
|
||||
/* First, we find which thread has the highest next element */
|
||||
int thread_max = -1;
|
||||
int max_thread_idx = -1;
|
||||
for (unsigned i = 0; i < NUM_THREADS; i++) {
|
||||
if (next[i] >= STATES_PER_THREAD)
|
||||
continue;
|
||||
|
||||
if (thread_max < jobs[i].offsets[next[i]]) {
|
||||
thread_max = jobs[i].offsets[next[i]];
|
||||
max_thread_idx = i;
|
||||
}
|
||||
}
|
||||
|
||||
/* The only way this can happen is if all of the next[] values are at
|
||||
* BLOCKS_PER_THREAD, in which case, we're done.
|
||||
*/
|
||||
if (thread_max == -1)
|
||||
break;
|
||||
|
||||
/* That next element had better be higher than the previous highest */
|
||||
ASSERT(jobs[max_thread_idx].offsets[next[max_thread_idx]] > highest);
|
||||
|
||||
highest = jobs[max_thread_idx].offsets[next[max_thread_idx]];
|
||||
next[max_thread_idx]++;
|
||||
}
|
||||
|
||||
anv_state_pool_finish(&state_pool);
|
||||
anv_bo_cache_finish(&device.bo_cache);
|
||||
pthread_mutex_destroy(&device.mutex);
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
for (unsigned i = 0; i < NUM_RUNS; i++)
|
||||
run_test();
|
||||
}
|
79
src/intel/vulkan_hasvk/tests/state_pool_padding.c
Normal file
79
src/intel/vulkan_hasvk/tests/state_pool_padding.c
Normal file
@ -0,0 +1,79 @@
|
||||
/*
|
||||
* Copyright © 2018 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_private.h"
|
||||
#include "test_common.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
struct anv_physical_device physical_device = {
|
||||
.use_softpin = true,
|
||||
};
|
||||
struct anv_device device = {};
|
||||
struct anv_state_pool state_pool;
|
||||
|
||||
anv_device_set_physical(&device, &physical_device);
|
||||
pthread_mutex_init(&device.mutex, NULL);
|
||||
anv_bo_cache_init(&device.bo_cache, &device);
|
||||
anv_state_pool_init(&state_pool, &device, "test", 4096, 0, 4096);
|
||||
|
||||
/* Get the size of the underlying block_pool */
|
||||
struct anv_block_pool *bp = &state_pool.block_pool;
|
||||
uint64_t pool_size = bp->size;
|
||||
|
||||
/* Grab one so the pool has some initial usage */
|
||||
anv_state_pool_alloc(&state_pool, 16, 16);
|
||||
|
||||
/* Grab a state that is the size of the initial allocation */
|
||||
struct anv_state state = anv_state_pool_alloc(&state_pool, pool_size, 16);
|
||||
|
||||
/* The pool must have grown */
|
||||
ASSERT(bp->size > pool_size);
|
||||
|
||||
/* And the state must have been allocated at the end of the original size */
|
||||
ASSERT(state.offset == pool_size);
|
||||
|
||||
/* A new allocation that fits into the returned empty space should have an
|
||||
* offset within the original pool size
|
||||
*/
|
||||
state = anv_state_pool_alloc(&state_pool, 4096, 16);
|
||||
ASSERT(state.offset + state.alloc_size <= pool_size);
|
||||
|
||||
/* We should be able to allocate pool->block_size'd chunks in the returned area
|
||||
*/
|
||||
int left_chunks = pool_size / 4096 - 2;
|
||||
for (int i = 0; i < left_chunks; i++) {
|
||||
state = anv_state_pool_alloc(&state_pool, 4096, 16);
|
||||
ASSERT(state.offset + state.alloc_size <= pool_size);
|
||||
}
|
||||
|
||||
/* Now the next chunk to be allocated should make the pool grow again */
|
||||
pool_size = bp->size;
|
||||
state = anv_state_pool_alloc(&state_pool, 4096, 16);
|
||||
ASSERT(bp->size > pool_size);
|
||||
ASSERT(state.offset == pool_size);
|
||||
|
||||
anv_state_pool_finish(&state_pool);
|
||||
anv_bo_cache_finish(&device.bo_cache);
|
||||
pthread_mutex_destroy(&device.mutex);
|
||||
}
|
71
src/intel/vulkan_hasvk/tests/state_pool_test_helper.h
Normal file
71
src/intel/vulkan_hasvk/tests/state_pool_test_helper.h
Normal file
@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <pthread.h>
|
||||
|
||||
struct job {
|
||||
struct anv_state_pool *pool;
|
||||
unsigned id;
|
||||
pthread_t thread;
|
||||
} jobs[NUM_THREADS];
|
||||
|
||||
pthread_barrier_t barrier;
|
||||
|
||||
static void *alloc_states(void *void_job)
|
||||
{
|
||||
struct job *job = void_job;
|
||||
|
||||
const unsigned chunk_size = 1 << (job->id % STATES_PER_THREAD_LOG2);
|
||||
const unsigned num_chunks = STATES_PER_THREAD / chunk_size;
|
||||
|
||||
struct anv_state states[chunk_size];
|
||||
|
||||
pthread_barrier_wait(&barrier);
|
||||
|
||||
for (unsigned c = 0; c < num_chunks; c++) {
|
||||
for (unsigned i = 0; i < chunk_size; i++) {
|
||||
states[i] = anv_state_pool_alloc(job->pool, 16, 16);
|
||||
memset(states[i].map, 139, 16);
|
||||
ASSERT(states[i].offset != 0);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < chunk_size; i++)
|
||||
anv_state_pool_free(job->pool, states[i]);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void run_state_pool_test(struct anv_state_pool *state_pool)
|
||||
{
|
||||
pthread_barrier_init(&barrier, NULL, NUM_THREADS);
|
||||
|
||||
for (unsigned i = 0; i < NUM_THREADS; i++) {
|
||||
jobs[i].pool = state_pool;
|
||||
jobs[i].id = i;
|
||||
pthread_create(&jobs[i].thread, NULL, alloc_states, &jobs[i]);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < NUM_THREADS; i++)
|
||||
pthread_join(jobs[i].thread, NULL);
|
||||
}
|
34
src/intel/vulkan_hasvk/tests/test_common.h
Normal file
34
src/intel/vulkan_hasvk/tests/test_common.h
Normal file
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright © 2020 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define ASSERT(cond) \
|
||||
do { \
|
||||
if (!(cond)) { \
|
||||
fprintf(stderr, "%s:%d: Test assertion `%s` failed.\n", \
|
||||
__FILE__, __LINE__, # cond); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (false)
|
Loading…
Reference in New Issue
Block a user