intel: add a hasvk vulkan driver

This new driver is a copy of the current Anv code, it will only load
on gfx7/8 platforms though.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Acked-by: Jason Ekstrand <jason.ekstrand@collabora.com>
Acked-by: Jason Ekstrand <jason@jlekstrand.net>
Acked-by: Jason Ekstrand <jason.ekstrand@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18208>
This commit is contained in:
Lionel Landwerlin 2022-08-03 11:43:36 +03:00 committed by Marge Bot
parent 0013ef89bf
commit 50013ca9a5
54 changed files with 49575 additions and 4 deletions

View File

@ -250,7 +250,7 @@ _vulkan_drivers = get_option('vulkan-drivers')
if _vulkan_drivers.contains('auto')
if system_has_kms_drm
if host_machine.cpu_family().startswith('x86')
_vulkan_drivers = ['amd', 'intel', 'swrast']
_vulkan_drivers = ['amd', 'intel', 'intel_hasvk', 'swrast']
elif ['arm', 'aarch64'].contains(host_machine.cpu_family())
_vulkan_drivers = ['swrast']
elif ['mips', 'mips64', 'riscv32', 'riscv64'].contains(host_machine.cpu_family())
@ -269,6 +269,7 @@ if _vulkan_drivers.contains('auto')
endif
with_intel_vk = _vulkan_drivers.contains('intel')
with_intel_hasvk = _vulkan_drivers.contains('intel_hasvk')
with_amd_vk = _vulkan_drivers.contains('amd')
with_freedreno_vk = _vulkan_drivers.contains('freedreno')
with_panfrost_vk = _vulkan_drivers.contains('panfrost')
@ -283,7 +284,7 @@ with_microsoft_vk = _vulkan_drivers.contains('microsoft-experimental')
with_any_vk = _vulkan_drivers.length() != 0
with_any_broadcom = with_gallium_vc4 or with_gallium_v3d or with_broadcom_vk
with_any_intel = with_intel_vk or with_gallium_iris or with_gallium_crocus or with_intel_tools
with_any_intel = with_intel_vk or with_intel_hasvk or with_gallium_iris or with_gallium_crocus or with_intel_tools
if with_swrast_vk and not with_gallium_softpipe
error('swrast vulkan requires gallium swrast')
@ -1549,7 +1550,7 @@ endif
if cc.has_function('dl_iterate_phdr')
pre_args += '-DHAVE_DL_ITERATE_PHDR'
elif with_intel_vk
elif with_intel_vk or with_intel_hasvk
error('Intel "Anvil" Vulkan driver requires the dl_iterate_phdr function')
endif

View File

@ -198,7 +198,7 @@ option(
'vulkan-drivers',
type : 'array',
value : ['auto'],
choices : ['auto', 'amd', 'broadcom', 'freedreno', 'imagination-experimental', 'intel', 'microsoft-experimental', 'panfrost', 'swrast', 'virtio-experimental'],
choices : ['auto', 'amd', 'broadcom', 'freedreno', 'imagination-experimental', 'intel', 'intel_hasvk', 'microsoft-experimental', 'panfrost', 'swrast', 'virtio-experimental'],
description : 'List of vulkan drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built'
)
option(

View File

@ -38,3 +38,6 @@ endif
if with_intel_vk
subdir('vulkan')
endif
if with_intel_hasvk
subdir('vulkan_hasvk')
endif

View File

@ -0,0 +1,13 @@
Intel Vulkan ToDo
=================
Missing Features:
- Investigate CTS failures on HSW
- Sparse memory
Performance:
- Multi-{sampled/gfx8,LOD} HiZ
- MSAA fast clears
- Pushing pieces of UBOs?
- Enable guardband clipping
- Use soft-pin to avoid relocations

View File

@ -0,0 +1,251 @@
/*
* Copyright © 2020 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_private.h"
void
anv_GetAccelerationStructureBuildSizesKHR(
VkDevice device,
VkAccelerationStructureBuildTypeKHR buildType,
const VkAccelerationStructureBuildGeometryInfoKHR* pBuildInfo,
const uint32_t* pMaxPrimitiveCounts,
VkAccelerationStructureBuildSizesInfoKHR* pSizeInfo)
{
assert(pSizeInfo->sType ==
VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR);
pSizeInfo->accelerationStructureSize = 0; /* TODO */
uint64_t cpu_build_scratch_size = 0; /* TODO */
uint64_t cpu_update_scratch_size = cpu_build_scratch_size;
uint64_t gpu_build_scratch_size = 0; /* TODO */
uint64_t gpu_update_scratch_size = gpu_build_scratch_size;
switch (buildType) {
case VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR:
pSizeInfo->buildScratchSize = cpu_build_scratch_size;
pSizeInfo->updateScratchSize = cpu_update_scratch_size;
break;
case VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR:
pSizeInfo->buildScratchSize = gpu_build_scratch_size;
pSizeInfo->updateScratchSize = gpu_update_scratch_size;
break;
case VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_OR_DEVICE_KHR:
pSizeInfo->buildScratchSize = MAX2(cpu_build_scratch_size,
gpu_build_scratch_size);
pSizeInfo->updateScratchSize = MAX2(cpu_update_scratch_size,
gpu_update_scratch_size);
break;
default:
unreachable("Invalid acceleration structure build type");
}
}
VkResult
anv_CreateAccelerationStructureKHR(
VkDevice _device,
const VkAccelerationStructureCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkAccelerationStructureKHR* pAccelerationStructure)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer);
struct anv_acceleration_structure *accel;
accel = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*accel), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (accel == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
vk_object_base_init(&device->vk, &accel->base,
VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR);
accel->size = pCreateInfo->size;
accel->address = anv_address_add(buffer->address, pCreateInfo->offset);
*pAccelerationStructure = anv_acceleration_structure_to_handle(accel);
return VK_SUCCESS;
}
void
anv_DestroyAccelerationStructureKHR(
VkDevice _device,
VkAccelerationStructureKHR accelerationStructure,
const VkAllocationCallbacks* pAllocator)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_acceleration_structure, accel, accelerationStructure);
if (!accel)
return;
vk_object_base_finish(&accel->base);
vk_free2(&device->vk.alloc, pAllocator, accel);
}
VkDeviceAddress
anv_GetAccelerationStructureDeviceAddressKHR(
VkDevice device,
const VkAccelerationStructureDeviceAddressInfoKHR* pInfo)
{
ANV_FROM_HANDLE(anv_acceleration_structure, accel,
pInfo->accelerationStructure);
assert(!anv_address_is_null(accel->address));
assert(anv_bo_is_pinned(accel->address.bo));
return anv_address_physical(accel->address);
}
void
anv_GetDeviceAccelerationStructureCompatibilityKHR(
VkDevice device,
const VkAccelerationStructureVersionInfoKHR* pVersionInfo,
VkAccelerationStructureCompatibilityKHR* pCompatibility)
{
unreachable("Unimplemented");
}
VkResult
anv_BuildAccelerationStructuresKHR(
VkDevice _device,
VkDeferredOperationKHR deferredOperation,
uint32_t infoCount,
const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos)
{
ANV_FROM_HANDLE(anv_device, device, _device);
unreachable("Unimplemented");
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
}
VkResult
anv_CopyAccelerationStructureKHR(
VkDevice _device,
VkDeferredOperationKHR deferredOperation,
const VkCopyAccelerationStructureInfoKHR* pInfo)
{
ANV_FROM_HANDLE(anv_device, device, _device);
unreachable("Unimplemented");
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
}
VkResult
anv_CopyAccelerationStructureToMemoryKHR(
VkDevice _device,
VkDeferredOperationKHR deferredOperation,
const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo)
{
ANV_FROM_HANDLE(anv_device, device, _device);
unreachable("Unimplemented");
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
}
VkResult
anv_CopyMemoryToAccelerationStructureKHR(
VkDevice _device,
VkDeferredOperationKHR deferredOperation,
const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo)
{
ANV_FROM_HANDLE(anv_device, device, _device);
unreachable("Unimplemented");
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
}
VkResult
anv_WriteAccelerationStructuresPropertiesKHR(
VkDevice _device,
uint32_t accelerationStructureCount,
const VkAccelerationStructureKHR* pAccelerationStructures,
VkQueryType queryType,
size_t dataSize,
void* pData,
size_t stride)
{
ANV_FROM_HANDLE(anv_device, device, _device);
unreachable("Unimplemented");
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
}
void
anv_CmdBuildAccelerationStructuresKHR(
VkCommandBuffer commandBuffer,
uint32_t infoCount,
const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos)
{
unreachable("Unimplemented");
}
void
anv_CmdBuildAccelerationStructuresIndirectKHR(
VkCommandBuffer commandBuffer,
uint32_t infoCount,
const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
const VkDeviceAddress* pIndirectDeviceAddresses,
const uint32_t* pIndirectStrides,
const uint32_t* const* ppMaxPrimitiveCounts)
{
unreachable("Unimplemented");
}
void
anv_CmdCopyAccelerationStructureKHR(
VkCommandBuffer commandBuffer,
const VkCopyAccelerationStructureInfoKHR* pInfo)
{
unreachable("Unimplemented");
}
void
anv_CmdCopyAccelerationStructureToMemoryKHR(
VkCommandBuffer commandBuffer,
const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo)
{
unreachable("Unimplemented");
}
void
anv_CmdCopyMemoryToAccelerationStructureKHR(
VkCommandBuffer commandBuffer,
const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo)
{
unreachable("Unimplemented");
}
void
anv_CmdWriteAccelerationStructuresPropertiesKHR(
VkCommandBuffer commandBuffer,
uint32_t accelerationStructureCount,
const VkAccelerationStructureKHR* pAccelerationStructures,
VkQueryType queryType,
VkQueryPool queryPool,
uint32_t firstQuery)
{
unreachable("Unimplemented");
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,792 @@
/*
* Copyright © 2017, Google Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <hardware/gralloc.h>
#if ANDROID_API_LEVEL >= 26
#include <hardware/gralloc1.h>
#endif
#include <hardware/hardware.h>
#include <hardware/hwvulkan.h>
#include <vulkan/vk_android_native_buffer.h>
#include <vulkan/vk_icd.h>
#include <sync/sync.h>
#include "anv_private.h"
#include "vk_common_entrypoints.h"
#include "vk_util.h"
static int anv_hal_open(const struct hw_module_t* mod, const char* id, struct hw_device_t** dev);
static int anv_hal_close(struct hw_device_t *dev);
static void UNUSED
static_asserts(void)
{
STATIC_ASSERT(HWVULKAN_DISPATCH_MAGIC == ICD_LOADER_MAGIC);
}
PUBLIC struct hwvulkan_module_t HAL_MODULE_INFO_SYM = {
.common = {
.tag = HARDWARE_MODULE_TAG,
.module_api_version = HWVULKAN_MODULE_API_VERSION_0_1,
.hal_api_version = HARDWARE_MAKE_API_VERSION(1, 0),
.id = HWVULKAN_HARDWARE_MODULE_ID,
.name = "Intel Vulkan HAL",
.author = "Intel",
.methods = &(hw_module_methods_t) {
.open = anv_hal_open,
},
},
};
/* If any bits in test_mask are set, then unset them and return true. */
static inline bool
unmask32(uint32_t *inout_mask, uint32_t test_mask)
{
uint32_t orig_mask = *inout_mask;
*inout_mask &= ~test_mask;
return *inout_mask != orig_mask;
}
static int
anv_hal_open(const struct hw_module_t* mod, const char* id,
struct hw_device_t** dev)
{
assert(mod == &HAL_MODULE_INFO_SYM.common);
assert(strcmp(id, HWVULKAN_DEVICE_0) == 0);
hwvulkan_device_t *hal_dev = malloc(sizeof(*hal_dev));
if (!hal_dev)
return -1;
*hal_dev = (hwvulkan_device_t) {
.common = {
.tag = HARDWARE_DEVICE_TAG,
.version = HWVULKAN_DEVICE_API_VERSION_0_1,
.module = &HAL_MODULE_INFO_SYM.common,
.close = anv_hal_close,
},
.EnumerateInstanceExtensionProperties = anv_EnumerateInstanceExtensionProperties,
.CreateInstance = anv_CreateInstance,
.GetInstanceProcAddr = anv_GetInstanceProcAddr,
};
*dev = &hal_dev->common;
return 0;
}
static int
anv_hal_close(struct hw_device_t *dev)
{
/* hwvulkan.h claims that hw_device_t::close() is never called. */
return -1;
}
#if ANDROID_API_LEVEL >= 26
#include <vndk/hardware_buffer.h>
/* See i915_private_android_types.h in minigbm. */
#define HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL 0x100
enum {
/* Usage bit equal to GRALLOC_USAGE_HW_CAMERA_MASK */
BUFFER_USAGE_CAMERA_MASK = 0x00060000U,
};
inline VkFormat
vk_format_from_android(unsigned android_format, unsigned android_usage)
{
switch (android_format) {
case AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM:
return VK_FORMAT_R8G8B8A8_UNORM;
case AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM:
case AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM:
return VK_FORMAT_R8G8B8_UNORM;
case AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM:
return VK_FORMAT_R5G6B5_UNORM_PACK16;
case AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT:
return VK_FORMAT_R16G16B16A16_SFLOAT;
case AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM:
return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
case AHARDWAREBUFFER_FORMAT_Y8Cb8Cr8_420:
case HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL:
return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
case AHARDWAREBUFFER_FORMAT_IMPLEMENTATION_DEFINED:
if (android_usage & BUFFER_USAGE_CAMERA_MASK)
return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
else
return VK_FORMAT_R8G8B8_UNORM;
case AHARDWAREBUFFER_FORMAT_BLOB:
default:
return VK_FORMAT_UNDEFINED;
}
}
static inline unsigned
android_format_from_vk(unsigned vk_format)
{
switch (vk_format) {
case VK_FORMAT_R8G8B8A8_UNORM:
return AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM;
case VK_FORMAT_R8G8B8_UNORM:
return AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM;
case VK_FORMAT_R5G6B5_UNORM_PACK16:
return AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM;
case VK_FORMAT_R16G16B16A16_SFLOAT:
return AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT;
case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
return AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM;
case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
#ifdef HAVE_CROS_GRALLOC
return AHARDWAREBUFFER_FORMAT_Y8Cb8Cr8_420;
#else
return HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL;
#endif
default:
return AHARDWAREBUFFER_FORMAT_BLOB;
}
}
static VkFormatFeatureFlags
features2_to_features(VkFormatFeatureFlags2 features2)
{
return features2 & VK_ALL_FORMAT_FEATURE_FLAG_BITS;
}
static VkResult
get_ahw_buffer_format_properties2(
VkDevice device_h,
const struct AHardwareBuffer *buffer,
VkAndroidHardwareBufferFormatProperties2ANDROID *pProperties)
{
ANV_FROM_HANDLE(anv_device, device, device_h);
/* Get a description of buffer contents . */
AHardwareBuffer_Desc desc;
AHardwareBuffer_describe(buffer, &desc);
/* Verify description. */
uint64_t gpu_usage =
AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE |
AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT |
AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
/* "Buffer must be a valid Android hardware buffer object with at least
* one of the AHARDWAREBUFFER_USAGE_GPU_* usage flags."
*/
if (!(desc.usage & (gpu_usage)))
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
/* Fill properties fields based on description. */
VkAndroidHardwareBufferFormatProperties2ANDROID *p = pProperties;
p->format = vk_format_from_android(desc.format, desc.usage);
const struct anv_format *anv_format = anv_get_format(p->format);
p->externalFormat = (uint64_t) (uintptr_t) anv_format;
/* Default to OPTIMAL tiling but set to linear in case
* of AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER usage.
*/
VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL;
if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER)
tiling = VK_IMAGE_TILING_LINEAR;
p->formatFeatures =
anv_get_image_format_features2(device->info, p->format, anv_format,
tiling, NULL);
/* "Images can be created with an external format even if the Android hardware
* buffer has a format which has an equivalent Vulkan format to enable
* consistent handling of images from sources that might use either category
* of format. However, all images created with an external format are subject
* to the valid usage requirements associated with external formats, even if
* the Android hardware buffers format has a Vulkan equivalent."
*
* "The formatFeatures member *must* include
* VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT and at least one of
* VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT or
* VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT"
*/
p->formatFeatures |=
VK_FORMAT_FEATURE_2_MIDPOINT_CHROMA_SAMPLES_BIT;
/* "Implementations may not always be able to determine the color model,
* numerical range, or chroma offsets of the image contents, so the values
* in VkAndroidHardwareBufferFormatPropertiesANDROID are only suggestions.
* Applications should treat these values as sensible defaults to use in
* the absence of more reliable information obtained through some other
* means."
*/
p->samplerYcbcrConversionComponents.r = VK_COMPONENT_SWIZZLE_IDENTITY;
p->samplerYcbcrConversionComponents.g = VK_COMPONENT_SWIZZLE_IDENTITY;
p->samplerYcbcrConversionComponents.b = VK_COMPONENT_SWIZZLE_IDENTITY;
p->samplerYcbcrConversionComponents.a = VK_COMPONENT_SWIZZLE_IDENTITY;
p->suggestedYcbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601;
p->suggestedYcbcrRange = VK_SAMPLER_YCBCR_RANGE_ITU_FULL;
p->suggestedXChromaOffset = VK_CHROMA_LOCATION_MIDPOINT;
p->suggestedYChromaOffset = VK_CHROMA_LOCATION_MIDPOINT;
return VK_SUCCESS;
}
VkResult
anv_GetAndroidHardwareBufferPropertiesANDROID(
VkDevice device_h,
const struct AHardwareBuffer *buffer,
VkAndroidHardwareBufferPropertiesANDROID *pProperties)
{
ANV_FROM_HANDLE(anv_device, dev, device_h);
VkAndroidHardwareBufferFormatPropertiesANDROID *format_prop =
vk_find_struct(pProperties->pNext,
ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_ANDROID);
/* Fill format properties of an Android hardware buffer. */
if (format_prop) {
VkAndroidHardwareBufferFormatProperties2ANDROID format_prop2 = {
.sType = VK_STRUCTURE_TYPE_ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_2_ANDROID,
};
get_ahw_buffer_format_properties2(device_h, buffer, &format_prop2);
format_prop->format = format_prop2.format;
format_prop->externalFormat = format_prop2.externalFormat;
format_prop->formatFeatures =
features2_to_features(format_prop2.formatFeatures);
format_prop->samplerYcbcrConversionComponents =
format_prop2.samplerYcbcrConversionComponents;
format_prop->suggestedYcbcrModel = format_prop2.suggestedYcbcrModel;
format_prop->suggestedYcbcrRange = format_prop2.suggestedYcbcrRange;
format_prop->suggestedXChromaOffset = format_prop2.suggestedXChromaOffset;
format_prop->suggestedYChromaOffset = format_prop2.suggestedYChromaOffset;
}
VkAndroidHardwareBufferFormatProperties2ANDROID *format_prop2 =
vk_find_struct(pProperties->pNext,
ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_2_ANDROID);
if (format_prop2)
get_ahw_buffer_format_properties2(device_h, buffer, format_prop2);
/* NOTE - We support buffers with only one handle but do not error on
* multiple handle case. Reason is that we want to support YUV formats
* where we have many logical planes but they all point to the same
* buffer, like is the case with VK_FORMAT_G8_B8R8_2PLANE_420_UNORM.
*/
const native_handle_t *handle =
AHardwareBuffer_getNativeHandle(buffer);
int dma_buf = (handle && handle->numFds) ? handle->data[0] : -1;
if (dma_buf < 0)
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
/* All memory types. */
uint32_t memory_types = (1ull << dev->physical->memory.type_count) - 1;
pProperties->allocationSize = lseek(dma_buf, 0, SEEK_END);
pProperties->memoryTypeBits = memory_types;
return VK_SUCCESS;
}
VkResult
anv_GetMemoryAndroidHardwareBufferANDROID(
VkDevice device_h,
const VkMemoryGetAndroidHardwareBufferInfoANDROID *pInfo,
struct AHardwareBuffer **pBuffer)
{
ANV_FROM_HANDLE(anv_device_memory, mem, pInfo->memory);
/* Some quotes from Vulkan spec:
*
* "If the device memory was created by importing an Android hardware
* buffer, vkGetMemoryAndroidHardwareBufferANDROID must return that same
* Android hardware buffer object."
*
* "VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID must
* have been included in VkExportMemoryAllocateInfo::handleTypes when
* memory was created."
*/
if (mem->ahw) {
*pBuffer = mem->ahw;
/* Increase refcount. */
AHardwareBuffer_acquire(mem->ahw);
return VK_SUCCESS;
}
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
#endif
/* Construct ahw usage mask from image usage bits, see
* 'AHardwareBuffer Usage Equivalence' in Vulkan spec.
*/
uint64_t
anv_ahw_usage_from_vk_usage(const VkImageCreateFlags vk_create,
const VkImageUsageFlags vk_usage)
{
uint64_t ahw_usage = 0;
#if ANDROID_API_LEVEL >= 26
if (vk_usage & VK_IMAGE_USAGE_SAMPLED_BIT)
ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE;
if (vk_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)
ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE;
if (vk_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)
ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT;
if (vk_create & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)
ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP;
if (vk_create & VK_IMAGE_CREATE_PROTECTED_BIT)
ahw_usage |= AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT;
/* No usage bits set - set at least one GPU usage. */
if (ahw_usage == 0)
ahw_usage = AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE;
#endif
return ahw_usage;
}
/*
* Called from anv_AllocateMemory when import AHardwareBuffer.
*/
VkResult
anv_import_ahw_memory(VkDevice device_h,
struct anv_device_memory *mem,
const VkImportAndroidHardwareBufferInfoANDROID *info)
{
#if ANDROID_API_LEVEL >= 26
ANV_FROM_HANDLE(anv_device, device, device_h);
/* Import from AHardwareBuffer to anv_device_memory. */
const native_handle_t *handle =
AHardwareBuffer_getNativeHandle(info->buffer);
/* NOTE - We support buffers with only one handle but do not error on
* multiple handle case. Reason is that we want to support YUV formats
* where we have many logical planes but they all point to the same
* buffer, like is the case with VK_FORMAT_G8_B8R8_2PLANE_420_UNORM.
*/
int dma_buf = (handle && handle->numFds) ? handle->data[0] : -1;
if (dma_buf < 0)
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
VkResult result = anv_device_import_bo(device, dma_buf, 0,
0 /* client_address */,
&mem->bo);
assert(result == VK_SUCCESS);
/* "If the vkAllocateMemory command succeeds, the implementation must
* acquire a reference to the imported hardware buffer, which it must
* release when the device memory object is freed. If the command fails,
* the implementation must not retain a reference."
*/
AHardwareBuffer_acquire(info->buffer);
mem->ahw = info->buffer;
return VK_SUCCESS;
#else
return VK_ERROR_EXTENSION_NOT_PRESENT;
#endif
}
VkResult
anv_create_ahw_memory(VkDevice device_h,
struct anv_device_memory *mem,
const VkMemoryAllocateInfo *pAllocateInfo)
{
#if ANDROID_API_LEVEL >= 26
const VkMemoryDedicatedAllocateInfo *dedicated_info =
vk_find_struct_const(pAllocateInfo->pNext,
MEMORY_DEDICATED_ALLOCATE_INFO);
uint32_t w = 0;
uint32_t h = 1;
uint32_t layers = 1;
uint32_t format = 0;
uint64_t usage = 0;
/* If caller passed dedicated information. */
if (dedicated_info && dedicated_info->image) {
ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
w = image->vk.extent.width;
h = image->vk.extent.height;
layers = image->vk.array_layers;
format = android_format_from_vk(image->vk.format);
usage = anv_ahw_usage_from_vk_usage(image->vk.create_flags, image->vk.usage);
} else if (dedicated_info && dedicated_info->buffer) {
ANV_FROM_HANDLE(anv_buffer, buffer, dedicated_info->buffer);
w = buffer->vk.size;
format = AHARDWAREBUFFER_FORMAT_BLOB;
usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN |
AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN;
} else {
w = pAllocateInfo->allocationSize;
format = AHARDWAREBUFFER_FORMAT_BLOB;
usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN |
AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN;
}
struct AHardwareBuffer *ahw = NULL;
struct AHardwareBuffer_Desc desc = {
.width = w,
.height = h,
.layers = layers,
.format = format,
.usage = usage,
};
if (AHardwareBuffer_allocate(&desc, &ahw) != 0)
return VK_ERROR_OUT_OF_HOST_MEMORY;
const VkImportAndroidHardwareBufferInfoANDROID import_info = {
.buffer = ahw,
};
VkResult result = anv_import_ahw_memory(device_h, mem, &import_info);
/* Release a reference to avoid leak for AHB allocation. */
AHardwareBuffer_release(ahw);
return result;
#else
return VK_ERROR_EXTENSION_NOT_PRESENT;
#endif
}
VkResult
anv_image_init_from_gralloc(struct anv_device *device,
struct anv_image *image,
const VkImageCreateInfo *base_info,
const VkNativeBufferANDROID *gralloc_info)
{
struct anv_bo *bo = NULL;
VkResult result;
struct anv_image_create_info anv_info = {
.vk_info = base_info,
.isl_extra_usage_flags = ISL_SURF_USAGE_DISABLE_AUX_BIT,
};
if (gralloc_info->handle->numFds != 1) {
return vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
"VkNativeBufferANDROID::handle::numFds is %d, "
"expected 1", gralloc_info->handle->numFds);
}
/* Do not close the gralloc handle's dma_buf. The lifetime of the dma_buf
* must exceed that of the gralloc handle, and we do not own the gralloc
* handle.
*/
int dma_buf = gralloc_info->handle->data[0];
/* We need to set the WRITE flag on window system buffers so that GEM will
* know we're writing to them and synchronize uses on other rings (for
* example, if the display server uses the blitter ring).
*
* If this function fails and if the imported bo was resident in the cache,
* we should avoid updating the bo's flags. Therefore, we defer updating
* the flags until success is certain.
*
*/
result = anv_device_import_bo(device, dma_buf,
ANV_BO_ALLOC_IMPLICIT_SYNC |
ANV_BO_ALLOC_IMPLICIT_WRITE,
0 /* client_address */,
&bo);
if (result != VK_SUCCESS) {
return vk_errorf(device, result,
"failed to import dma-buf from VkNativeBufferANDROID");
}
enum isl_tiling tiling;
result = anv_device_get_bo_tiling(device, bo, &tiling);
if (result != VK_SUCCESS) {
return vk_errorf(device, result,
"failed to get tiling from VkNativeBufferANDROID");
}
anv_info.isl_tiling_flags = 1u << tiling;
enum isl_format format = anv_get_isl_format(device->info,
base_info->format,
VK_IMAGE_ASPECT_COLOR_BIT,
base_info->tiling);
assert(format != ISL_FORMAT_UNSUPPORTED);
result = anv_image_init(device, image, &anv_info);
if (result != VK_SUCCESS)
goto fail_init;
VkMemoryRequirements2 mem_reqs = {
.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
};
anv_image_get_memory_requirements(device, image, image->vk.aspects,
&mem_reqs);
VkDeviceSize aligned_image_size =
align_u64(mem_reqs.memoryRequirements.size,
mem_reqs.memoryRequirements.alignment);
if (bo->size < aligned_image_size) {
result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
"dma-buf from VkNativeBufferANDROID is too small for "
"VkImage: %"PRIu64"B < %"PRIu64"B",
bo->size, aligned_image_size);
goto fail_size;
}
assert(!image->disjoint);
assert(image->n_planes == 1);
assert(image->planes[0].primary_surface.memory_range.binding ==
ANV_IMAGE_MEMORY_BINDING_MAIN);
assert(image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN].address.bo == NULL);
assert(image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN].address.offset == 0);
image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN].address.bo = bo;
image->from_gralloc = true;
return VK_SUCCESS;
fail_size:
anv_image_finish(image);
fail_init:
anv_device_release_bo(device, bo);
return result;
}
VkResult
anv_image_bind_from_gralloc(struct anv_device *device,
struct anv_image *image,
const VkNativeBufferANDROID *gralloc_info)
{
/* Do not close the gralloc handle's dma_buf. The lifetime of the dma_buf
* must exceed that of the gralloc handle, and we do not own the gralloc
* handle.
*/
int dma_buf = gralloc_info->handle->data[0];
/* We need to set the WRITE flag on window system buffers so that GEM will
* know we're writing to them and synchronize uses on other rings (for
* example, if the display server uses the blitter ring).
*
* If this function fails and if the imported bo was resident in the cache,
* we should avoid updating the bo's flags. Therefore, we defer updating
* the flags until success is certain.
*
*/
struct anv_bo *bo = NULL;
VkResult result = anv_device_import_bo(device, dma_buf,
ANV_BO_ALLOC_IMPLICIT_SYNC |
ANV_BO_ALLOC_IMPLICIT_WRITE,
0 /* client_address */,
&bo);
if (result != VK_SUCCESS) {
return vk_errorf(device, result,
"failed to import dma-buf from VkNativeBufferANDROID");
}
uint64_t img_size = image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN].memory_range.size;
if (img_size < bo->size) {
result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
"dma-buf from VkNativeBufferANDROID is too small for "
"VkImage: %"PRIu64"B < %"PRIu64"B",
bo->size, img_size);
anv_device_release_bo(device, bo);
return result;
}
assert(!image->disjoint);
assert(image->n_planes == 1);
assert(image->planes[0].primary_surface.memory_range.binding ==
ANV_IMAGE_MEMORY_BINDING_MAIN);
assert(image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN].address.bo == NULL);
assert(image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN].address.offset == 0);
image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN].address.bo = bo;
image->from_gralloc = true;
return VK_SUCCESS;
}
static VkResult
format_supported_with_usage(VkDevice device_h, VkFormat format,
VkImageUsageFlags imageUsage)
{
ANV_FROM_HANDLE(anv_device, device, device_h);
VkPhysicalDevice phys_dev_h = anv_physical_device_to_handle(device->physical);
VkResult result;
const VkPhysicalDeviceImageFormatInfo2 image_format_info = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
.format = format,
.type = VK_IMAGE_TYPE_2D,
.tiling = VK_IMAGE_TILING_OPTIMAL,
.usage = imageUsage,
};
VkImageFormatProperties2 image_format_props = {
.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
};
/* Check that requested format and usage are supported. */
result = anv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h,
&image_format_info, &image_format_props);
if (result != VK_SUCCESS) {
return vk_errorf(device, result,
"anv_GetPhysicalDeviceImageFormatProperties2 failed "
"inside %s", __func__);
}
return VK_SUCCESS;
}
static VkResult
setup_gralloc0_usage(struct anv_device *device, VkFormat format,
VkImageUsageFlags imageUsage, int *grallocUsage)
{
/* WARNING: Android's libvulkan.so hardcodes the VkImageUsageFlags
* returned to applications via VkSurfaceCapabilitiesKHR::supportedUsageFlags.
* The relevant code in libvulkan/swapchain.cpp contains this fun comment:
*
* TODO(jessehall): I think these are right, but haven't thought hard
* about it. Do we need to query the driver for support of any of
* these?
*
* Any disagreement between this function and the hardcoded
* VkSurfaceCapabilitiesKHR:supportedUsageFlags causes tests
* dEQP-VK.wsi.android.swapchain.*.image_usage to fail.
*/
if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT |
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
*grallocUsage |= GRALLOC_USAGE_HW_RENDER;
if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_STORAGE_BIT |
VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))
*grallocUsage |= GRALLOC_USAGE_HW_TEXTURE;
/* All VkImageUsageFlags not explicitly checked here are unsupported for
* gralloc swapchains.
*/
if (imageUsage != 0) {
return vk_errorf(device, VK_ERROR_FORMAT_NOT_SUPPORTED,
"unsupported VkImageUsageFlags(0x%x) for gralloc "
"swapchain", imageUsage);
}
/* The below formats support GRALLOC_USAGE_HW_FB (that is, display
* scanout). This short list of formats is univserally supported on Intel
* but is incomplete. The full set of supported formats is dependent on
* kernel and hardware.
*
* FINISHME: Advertise all display-supported formats.
*/
switch (format) {
case VK_FORMAT_B8G8R8A8_UNORM:
case VK_FORMAT_R5G6B5_UNORM_PACK16:
case VK_FORMAT_R8G8B8A8_UNORM:
case VK_FORMAT_R8G8B8A8_SRGB:
*grallocUsage |= GRALLOC_USAGE_HW_FB |
GRALLOC_USAGE_HW_COMPOSER |
GRALLOC_USAGE_EXTERNAL_DISP;
break;
default:
mesa_logw("%s: unsupported format=%d", __func__, format);
}
if (*grallocUsage == 0)
return VK_ERROR_FORMAT_NOT_SUPPORTED;
return VK_SUCCESS;
}
#if ANDROID_API_LEVEL >= 26
VkResult anv_GetSwapchainGrallocUsage2ANDROID(
VkDevice device_h,
VkFormat format,
VkImageUsageFlags imageUsage,
VkSwapchainImageUsageFlagsANDROID swapchainImageUsage,
uint64_t* grallocConsumerUsage,
uint64_t* grallocProducerUsage)
{
ANV_FROM_HANDLE(anv_device, device, device_h);
VkResult result;
*grallocConsumerUsage = 0;
*grallocProducerUsage = 0;
mesa_logd("%s: format=%d, usage=0x%x", __func__, format, imageUsage);
result = format_supported_with_usage(device_h, format, imageUsage);
if (result != VK_SUCCESS)
return result;
int32_t grallocUsage = 0;
result = setup_gralloc0_usage(device, format, imageUsage, &grallocUsage);
if (result != VK_SUCCESS)
return result;
/* Setup gralloc1 usage flags from gralloc0 flags. */
if (grallocUsage & GRALLOC_USAGE_HW_RENDER) {
*grallocProducerUsage |= GRALLOC1_PRODUCER_USAGE_GPU_RENDER_TARGET;
*grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_CLIENT_TARGET;
}
if (grallocUsage & GRALLOC_USAGE_HW_TEXTURE) {
*grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_GPU_TEXTURE;
}
if (grallocUsage & (GRALLOC_USAGE_HW_FB |
GRALLOC_USAGE_HW_COMPOSER |
GRALLOC_USAGE_EXTERNAL_DISP)) {
*grallocProducerUsage |= GRALLOC1_PRODUCER_USAGE_GPU_RENDER_TARGET;
*grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_HWCOMPOSER;
}
return VK_SUCCESS;
}
#endif
VkResult anv_GetSwapchainGrallocUsageANDROID(
VkDevice device_h,
VkFormat format,
VkImageUsageFlags imageUsage,
int* grallocUsage)
{
ANV_FROM_HANDLE(anv_device, device, device_h);
VkResult result;
*grallocUsage = 0;
mesa_logd("%s: format=%d, usage=0x%x", __func__, format, imageUsage);
result = format_supported_with_usage(device_h, format, imageUsage);
if (result != VK_SUCCESS)
return result;
return setup_gralloc0_usage(device, format, imageUsage, grallocUsage);
}

View File

@ -0,0 +1,57 @@
/*
* Copyright © 2018 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef ANV_ANDROID_H
#define ANV_ANDROID_H
#if defined(ANDROID) && ANDROID_API_LEVEL >= 26
#include <vndk/hardware_buffer.h>
#endif
#include <vulkan/vulkan.h>
#include <vulkan/vulkan_android.h>
#include <vulkan/vk_android_native_buffer.h>
struct anv_device_memory;
struct anv_device;
struct anv_image;
VkResult anv_image_init_from_gralloc(struct anv_device *device,
struct anv_image *image,
const VkImageCreateInfo *base_info,
const VkNativeBufferANDROID *gralloc_info);
VkResult anv_image_bind_from_gralloc(struct anv_device *device,
struct anv_image *image,
const VkNativeBufferANDROID *gralloc_info);
uint64_t anv_ahw_usage_from_vk_usage(const VkImageCreateFlags vk_create,
const VkImageUsageFlags vk_usage);
VkResult anv_import_ahw_memory(VkDevice device_h,
struct anv_device_memory *mem,
const VkImportAndroidHardwareBufferInfoANDROID *info);
VkResult anv_create_ahw_memory(VkDevice device_h,
struct anv_device_memory *mem,
const VkMemoryAllocateInfo *pAllocateInfo);
#endif /* ANV_ANDROID_H */

View File

@ -0,0 +1,63 @@
/*
* Copyright © 2018 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_android.h"
VkResult
anv_image_init_from_gralloc(struct anv_device *device,
struct anv_image *image,
const VkImageCreateInfo *base_info,
const VkNativeBufferANDROID *gralloc_info)
{
return VK_ERROR_EXTENSION_NOT_PRESENT;
}
VkResult anv_image_bind_from_gralloc(struct anv_device *device,
struct anv_image *image,
const VkNativeBufferANDROID *gralloc_info)
{
return VK_ERROR_EXTENSION_NOT_PRESENT;
}
uint64_t
anv_ahw_usage_from_vk_usage(const VkImageCreateFlags vk_create,
const VkImageUsageFlags vk_usage)
{
return 0;
}
VkResult
anv_import_ahw_memory(VkDevice device_h,
struct anv_device_memory *mem,
const VkImportAndroidHardwareBufferInfoANDROID *info)
{
return VK_ERROR_EXTENSION_NOT_PRESENT;
}
VkResult
anv_create_ahw_memory(VkDevice device_h,
struct anv_device_memory *mem,
const VkMemoryAllocateInfo *pAllocateInfo)
{
return VK_ERROR_EXTENSION_NOT_PRESENT;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,237 @@
/*
* Copyright © 2021 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_private.h"
#include "util/os_time.h"
static struct anv_bo_sync *
to_anv_bo_sync(struct vk_sync *sync)
{
assert(sync->type == &anv_bo_sync_type);
return container_of(sync, struct anv_bo_sync, sync);
}
static VkResult
anv_bo_sync_init(struct vk_device *vk_device,
struct vk_sync *vk_sync,
uint64_t initial_value)
{
struct anv_device *device = container_of(vk_device, struct anv_device, vk);
struct anv_bo_sync *sync = to_anv_bo_sync(vk_sync);
sync->state = initial_value ? ANV_BO_SYNC_STATE_SIGNALED :
ANV_BO_SYNC_STATE_RESET;
return anv_device_alloc_bo(device, "bo-sync", 4096,
ANV_BO_ALLOC_EXTERNAL |
ANV_BO_ALLOC_IMPLICIT_SYNC,
0 /* explicit_address */,
&sync->bo);
}
static void
anv_bo_sync_finish(struct vk_device *vk_device,
struct vk_sync *vk_sync)
{
struct anv_device *device = container_of(vk_device, struct anv_device, vk);
struct anv_bo_sync *sync = to_anv_bo_sync(vk_sync);
anv_device_release_bo(device, sync->bo);
}
static VkResult
anv_bo_sync_reset(struct vk_device *vk_device,
struct vk_sync *vk_sync)
{
struct anv_bo_sync *sync = to_anv_bo_sync(vk_sync);
sync->state = ANV_BO_SYNC_STATE_RESET;
return VK_SUCCESS;
}
static int64_t
anv_get_relative_timeout(uint64_t abs_timeout)
{
uint64_t now = os_time_get_nano();
/* We don't want negative timeouts.
*
* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is
* supposed to block indefinitely timeouts < 0. Unfortunately,
* this was broken for a couple of kernel releases. Since there's
* no way to know whether or not the kernel we're using is one of
* the broken ones, the best we can do is to clamp the timeout to
* INT64_MAX. This limits the maximum timeout from 584 years to
* 292 years - likely not a big deal.
*/
if (abs_timeout < now)
return 0;
uint64_t rel_timeout = abs_timeout - now;
if (rel_timeout > (uint64_t) INT64_MAX)
rel_timeout = INT64_MAX;
return rel_timeout;
}
static VkResult
anv_bo_sync_wait(struct vk_device *vk_device,
uint32_t wait_count,
const struct vk_sync_wait *waits,
enum vk_sync_wait_flags wait_flags,
uint64_t abs_timeout_ns)
{
struct anv_device *device = container_of(vk_device, struct anv_device, vk);
VkResult result;
uint32_t pending = wait_count;
while (pending) {
pending = 0;
bool signaled = false;
for (uint32_t i = 0; i < wait_count; i++) {
struct anv_bo_sync *sync = to_anv_bo_sync(waits[i].sync);
switch (sync->state) {
case ANV_BO_SYNC_STATE_RESET:
/* This fence hasn't been submitted yet, we'll catch it the next
* time around. Yes, this may mean we dead-loop but, short of
* lots of locking and a condition variable, there's not much that
* we can do about that.
*/
assert(!(wait_flags & VK_SYNC_WAIT_PENDING));
pending++;
continue;
case ANV_BO_SYNC_STATE_SIGNALED:
/* This fence is not pending. If waitAll isn't set, we can return
* early. Otherwise, we have to keep going.
*/
if (wait_flags & VK_SYNC_WAIT_ANY)
return VK_SUCCESS;
continue;
case ANV_BO_SYNC_STATE_SUBMITTED:
/* These are the fences we really care about. Go ahead and wait
* on it until we hit a timeout.
*/
if (!(wait_flags & VK_SYNC_WAIT_PENDING)) {
uint64_t rel_timeout = anv_get_relative_timeout(abs_timeout_ns);
result = anv_device_wait(device, sync->bo, rel_timeout);
/* This also covers VK_TIMEOUT */
if (result != VK_SUCCESS)
return result;
sync->state = ANV_BO_SYNC_STATE_SIGNALED;
signaled = true;
}
if (wait_flags & VK_SYNC_WAIT_ANY)
return VK_SUCCESS;
break;
default:
unreachable("Invalid BO sync state");
}
}
if (pending && !signaled) {
/* If we've hit this then someone decided to vkWaitForFences before
* they've actually submitted any of them to a queue. This is a
* fairly pessimal case, so it's ok to lock here and use a standard
* pthreads condition variable.
*/
pthread_mutex_lock(&device->mutex);
/* It's possible that some of the fences have changed state since the
* last time we checked. Now that we have the lock, check for
* pending fences again and don't wait if it's changed.
*/
uint32_t now_pending = 0;
for (uint32_t i = 0; i < wait_count; i++) {
struct anv_bo_sync *sync = to_anv_bo_sync(waits[i].sync);
if (sync->state == ANV_BO_SYNC_STATE_RESET)
now_pending++;
}
assert(now_pending <= pending);
if (now_pending == pending) {
struct timespec abstime = {
.tv_sec = abs_timeout_ns / NSEC_PER_SEC,
.tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
};
ASSERTED int ret;
ret = pthread_cond_timedwait(&device->queue_submit,
&device->mutex, &abstime);
assert(ret != EINVAL);
if (os_time_get_nano() >= abs_timeout_ns) {
pthread_mutex_unlock(&device->mutex);
return VK_TIMEOUT;
}
}
pthread_mutex_unlock(&device->mutex);
}
}
return VK_SUCCESS;
}
const struct vk_sync_type anv_bo_sync_type = {
.size = sizeof(struct anv_bo_sync),
.features = VK_SYNC_FEATURE_BINARY |
VK_SYNC_FEATURE_GPU_WAIT |
VK_SYNC_FEATURE_GPU_MULTI_WAIT |
VK_SYNC_FEATURE_CPU_WAIT |
VK_SYNC_FEATURE_CPU_RESET |
VK_SYNC_FEATURE_WAIT_ANY |
VK_SYNC_FEATURE_WAIT_PENDING,
.init = anv_bo_sync_init,
.finish = anv_bo_sync_finish,
.reset = anv_bo_sync_reset,
.wait_many = anv_bo_sync_wait,
};
VkResult
anv_create_sync_for_memory(struct vk_device *device,
VkDeviceMemory memory,
bool signal_memory,
struct vk_sync **sync_out)
{
ANV_FROM_HANDLE(anv_device_memory, mem, memory);
struct anv_bo_sync *bo_sync;
bo_sync = vk_zalloc(&device->alloc, sizeof(*bo_sync), 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (bo_sync == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
bo_sync->sync.type = &anv_bo_sync_type;
bo_sync->state = signal_memory ? ANV_BO_SYNC_STATE_RESET :
ANV_BO_SYNC_STATE_SUBMITTED;
bo_sync->bo = anv_bo_ref(mem->bo);
*sync_out = &bo_sync->sync;
return VK_SUCCESS;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,405 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <fcntl.h>
#include "anv_private.h"
#include "common/intel_defines.h"
#include "common/intel_gem.h"
/**
* Wrapper around DRM_IOCTL_I915_GEM_CREATE.
*
* Return gem handle, or 0 on failure. Gem handles are never 0.
*/
uint32_t
anv_gem_create(struct anv_device *device, uint64_t size)
{
struct drm_i915_gem_create gem_create = {
.size = size,
};
int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create);
if (ret != 0) {
/* FIXME: What do we do if this fails? */
return 0;
}
return gem_create.handle;
}
void
anv_gem_close(struct anv_device *device, uint32_t gem_handle)
{
struct drm_gem_close close = {
.handle = gem_handle,
};
intel_ioctl(device->fd, DRM_IOCTL_GEM_CLOSE, &close);
}
uint32_t
anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,
uint32_t flags, uint32_t num_regions,
struct drm_i915_gem_memory_class_instance *regions)
{
/* Check for invalid flags */
assert((flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) == 0);
struct drm_i915_gem_create_ext_memory_regions ext_regions = {
.base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS },
.num_regions = num_regions,
.regions = (uintptr_t)regions,
};
struct drm_i915_gem_create_ext gem_create = {
.size = anv_bo_size,
.extensions = (uintptr_t) &ext_regions,
.flags = flags,
};
int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE_EXT,
&gem_create);
if (ret != 0) {
return 0;
}
return gem_create.handle;
}
/**
* Wrapper around DRM_IOCTL_I915_GEM_MMAP. Returns MAP_FAILED on error.
*/
static void*
anv_gem_mmap_offset(struct anv_device *device, uint32_t gem_handle,
uint64_t offset, uint64_t size, uint32_t flags)
{
struct drm_i915_gem_mmap_offset gem_mmap = {
.handle = gem_handle,
.flags = device->info->has_local_mem ? I915_MMAP_OFFSET_FIXED :
(flags & I915_MMAP_WC) ? I915_MMAP_OFFSET_WC : I915_MMAP_OFFSET_WB,
};
assert(offset == 0);
/* Get the fake offset back */
int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &gem_mmap);
if (ret != 0)
return MAP_FAILED;
/* And map it */
void *map = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
device->fd, gem_mmap.offset);
return map;
}
static void*
anv_gem_mmap_legacy(struct anv_device *device, uint32_t gem_handle,
uint64_t offset, uint64_t size, uint32_t flags)
{
assert(!device->info->has_local_mem);
struct drm_i915_gem_mmap gem_mmap = {
.handle = gem_handle,
.offset = offset,
.size = size,
.flags = flags,
};
int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap);
if (ret != 0)
return MAP_FAILED;
return (void *)(uintptr_t) gem_mmap.addr_ptr;
}
/**
* Wrapper around DRM_IOCTL_I915_GEM_MMAP. Returns MAP_FAILED on error.
*/
void*
anv_gem_mmap(struct anv_device *device, uint32_t gem_handle,
uint64_t offset, uint64_t size, uint32_t flags)
{
void *map;
if (device->physical->has_mmap_offset)
map = anv_gem_mmap_offset(device, gem_handle, offset, size, flags);
else
map = anv_gem_mmap_legacy(device, gem_handle, offset, size, flags);
if (map != MAP_FAILED)
VG(VALGRIND_MALLOCLIKE_BLOCK(map, size, 0, 1));
return map;
}
/* This is just a wrapper around munmap, but it also notifies valgrind that
* this map is no longer valid. Pair this with anv_gem_mmap().
*/
void
anv_gem_munmap(struct anv_device *device, void *p, uint64_t size)
{
VG(VALGRIND_FREELIKE_BLOCK(p, 0));
munmap(p, size);
}
uint32_t
anv_gem_userptr(struct anv_device *device, void *mem, size_t size)
{
struct drm_i915_gem_userptr userptr = {
.user_ptr = (__u64)((unsigned long) mem),
.user_size = size,
.flags = 0,
};
if (device->physical->has_userptr_probe)
userptr.flags |= I915_USERPTR_PROBE;
int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
if (ret == -1)
return 0;
return userptr.handle;
}
int
anv_gem_set_caching(struct anv_device *device,
uint32_t gem_handle, uint32_t caching)
{
struct drm_i915_gem_caching gem_caching = {
.handle = gem_handle,
.caching = caching,
};
return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_CACHING, &gem_caching);
}
/**
* On error, \a timeout_ns holds the remaining time.
*/
int
anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns)
{
struct drm_i915_gem_wait wait = {
.bo_handle = gem_handle,
.timeout_ns = *timeout_ns,
.flags = 0,
};
int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
*timeout_ns = wait.timeout_ns;
return ret;
}
int
anv_gem_execbuffer(struct anv_device *device,
struct drm_i915_gem_execbuffer2 *execbuf)
{
if (execbuf->flags & I915_EXEC_FENCE_OUT)
return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2_WR, execbuf);
else
return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf);
}
/** Return -1 on error. */
int
anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle)
{
if (!device->info->has_tiling_uapi)
return -1;
struct drm_i915_gem_get_tiling get_tiling = {
.handle = gem_handle,
};
/* FIXME: On discrete platforms we don't have DRM_IOCTL_I915_GEM_GET_TILING
* anymore, so we will need another way to get the tiling. Apparently this
* is only used in Android code, so we may need some other way to
* communicate the tiling mode.
*/
if (intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling)) {
assert(!"Failed to get BO tiling");
return -1;
}
return get_tiling.tiling_mode;
}
int
anv_gem_set_tiling(struct anv_device *device,
uint32_t gem_handle, uint32_t stride, uint32_t tiling)
{
int ret;
/* On discrete platforms we don't have DRM_IOCTL_I915_GEM_SET_TILING. So
* nothing needs to be done.
*/
if (!device->info->has_tiling_uapi)
return 0;
/* set_tiling overwrites the input on the error path, so we have to open
* code intel_ioctl.
*/
do {
struct drm_i915_gem_set_tiling set_tiling = {
.handle = gem_handle,
.tiling_mode = tiling,
.stride = stride,
};
ret = ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
return ret;
}
int
anv_gem_get_param(int fd, uint32_t param)
{
int tmp;
drm_i915_getparam_t gp = {
.param = param,
.value = &tmp,
};
int ret = intel_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
if (ret == 0)
return tmp;
return 0;
}
bool
anv_gem_has_context_priority(int fd, int priority)
{
return !anv_gem_set_context_param(fd, 0, I915_CONTEXT_PARAM_PRIORITY,
priority);
}
int
anv_gem_create_context(struct anv_device *device)
{
struct drm_i915_gem_context_create create = { 0 };
int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
if (ret == -1)
return -1;
return create.ctx_id;
}
int
anv_gem_destroy_context(struct anv_device *device, int context)
{
struct drm_i915_gem_context_destroy destroy = {
.ctx_id = context,
};
return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy);
}
int
anv_gem_set_context_param(int fd, int context, uint32_t param, uint64_t value)
{
struct drm_i915_gem_context_param p = {
.ctx_id = context,
.param = param,
.value = value,
};
int err = 0;
if (intel_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p))
err = -errno;
return err;
}
int
anv_gem_context_get_reset_stats(int fd, int context,
uint32_t *active, uint32_t *pending)
{
struct drm_i915_reset_stats stats = {
.ctx_id = context,
};
int ret = intel_ioctl(fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats);
if (ret == 0) {
*active = stats.batch_active;
*pending = stats.batch_pending;
}
return ret;
}
int
anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle)
{
struct drm_prime_handle args = {
.handle = gem_handle,
.flags = DRM_CLOEXEC | DRM_RDWR,
};
int ret = intel_ioctl(device->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
if (ret == -1)
return -1;
return args.fd;
}
uint32_t
anv_gem_fd_to_handle(struct anv_device *device, int fd)
{
struct drm_prime_handle args = {
.fd = fd,
};
int ret = intel_ioctl(device->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args);
if (ret == -1)
return 0;
return args.handle;
}
int
anv_gem_reg_read(int fd, uint32_t offset, uint64_t *result)
{
struct drm_i915_reg_read args = {
.offset = offset
};
int ret = intel_ioctl(fd, DRM_IOCTL_I915_REG_READ, &args);
*result = args.val;
return ret;
}
struct drm_i915_query_engine_info *
anv_gem_get_engine_info(int fd)
{
return intel_i915_query_alloc(fd, DRM_I915_QUERY_ENGINE_INFO, NULL);
}

View File

@ -0,0 +1,187 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <sys/mman.h>
#include <sys/syscall.h>
#include "util/anon_file.h"
#include "anv_private.h"
uint32_t
anv_gem_create(struct anv_device *device, uint64_t size)
{
int fd = os_create_anonymous_file(size, "fake bo");
if (fd == -1)
return 0;
assert(fd != 0);
return fd;
}
void
anv_gem_close(struct anv_device *device, uint32_t gem_handle)
{
close(gem_handle);
}
uint32_t
anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,
uint32_t flags, uint32_t num_regions,
struct drm_i915_gem_memory_class_instance *regions)
{
return 0;
}
void*
anv_gem_mmap(struct anv_device *device, uint32_t gem_handle,
uint64_t offset, uint64_t size, uint32_t flags)
{
/* Ignore flags, as they're specific to I915_GEM_MMAP. */
(void) flags;
return mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
gem_handle, offset);
}
/* This is just a wrapper around munmap, but it also notifies valgrind that
* this map is no longer valid. Pair this with anv_gem_mmap().
*/
void
anv_gem_munmap(struct anv_device *device, void *p, uint64_t size)
{
munmap(p, size);
}
uint32_t
anv_gem_userptr(struct anv_device *device, void *mem, size_t size)
{
int fd = os_create_anonymous_file(size, "fake bo");
if (fd == -1)
return 0;
assert(fd != 0);
return fd;
}
int
anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns)
{
return 0;
}
int
anv_gem_execbuffer(struct anv_device *device,
struct drm_i915_gem_execbuffer2 *execbuf)
{
return 0;
}
int
anv_gem_set_tiling(struct anv_device *device,
uint32_t gem_handle, uint32_t stride, uint32_t tiling)
{
return 0;
}
int
anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle)
{
return 0;
}
int
anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle,
uint32_t caching)
{
return 0;
}
int
anv_gem_get_param(int fd, uint32_t param)
{
unreachable("Unused");
}
int
anv_gem_create_context(struct anv_device *device)
{
unreachable("Unused");
}
int
anv_gem_destroy_context(struct anv_device *device, int context)
{
unreachable("Unused");
}
int
anv_gem_set_context_param(int fd, int context, uint32_t param, uint64_t value)
{
unreachable("Unused");
}
bool
anv_gem_has_context_priority(int fd, int priority)
{
unreachable("Unused");
}
int
anv_gem_context_get_reset_stats(int fd, int context,
uint32_t *active, uint32_t *pending)
{
unreachable("Unused");
}
int
anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle)
{
unreachable("Unused");
}
uint32_t
anv_gem_fd_to_handle(struct anv_device *device, int fd)
{
unreachable("Unused");
}
int
anv_i915_query(int fd, uint64_t query_id, void *buffer,
int32_t *buffer_len)
{
unreachable("Unused");
}
struct drm_i915_query_engine_info *
anv_gem_get_engine_info(int fd)
{
unreachable("Unused");
}
int
anv_gem_reg_read(int fd, uint32_t offset, uint64_t *result)
{
unreachable("Unused");
}

View File

@ -0,0 +1,180 @@
/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/*
* NOTE: The header can be included multiple times, from the same file.
*/
/*
* Gen-specific function declarations. This header must *not* be included
* directly. Instead, it is included multiple times by anv_private.h.
*
* In this header file, the usual genx() macro is available.
*/
#ifndef ANV_PRIVATE_H
#error This file is included by means other than anv_private.h
#endif
struct intel_sample_positions;
typedef struct VkRenderingSelfDependencyInfoMESA VkRenderingSelfDependencyInfoMESA;
extern const uint32_t genX(vk_to_intel_cullmode)[];
extern const uint32_t genX(vk_to_intel_front_face)[];
extern const uint32_t genX(vk_to_intel_primitive_type)[];
extern const uint32_t genX(vk_to_intel_compare_op)[];
extern const uint32_t genX(vk_to_intel_stencil_op)[];
extern const uint32_t genX(vk_to_intel_logic_op)[];
void genX(init_physical_device_state)(struct anv_physical_device *device);
VkResult genX(init_device_state)(struct anv_device *device);
void genX(init_cps_device_state)(struct anv_device *device);
void genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer);
void genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer);
void genX(cmd_buffer_emit_gfx7_depth_flush)(struct anv_cmd_buffer *cmd_buffer);
void genX(cmd_buffer_emit_gfx12_depth_wa)(struct anv_cmd_buffer *cmd_buffer,
const struct isl_surf *surf);
void genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(struct anv_cmd_buffer *cmd_buffer,
int vb_index,
struct anv_address vb_address,
uint32_t vb_size);
void genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(struct anv_cmd_buffer *cmd_buffer,
uint32_t access_type,
uint64_t vb_used);
void genX(cmd_buffer_emit_hashing_mode)(struct anv_cmd_buffer *cmd_buffer,
unsigned width, unsigned height,
unsigned scale);
void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer);
void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer);
enum anv_pipe_bits
genX(emit_apply_pipe_flushes)(struct anv_batch *batch,
struct anv_device *device,
uint32_t current_pipeline,
enum anv_pipe_bits bits);
void genX(emit_so_memcpy_init)(struct anv_memcpy_state *state,
struct anv_device *device,
struct anv_batch *batch);
void genX(emit_so_memcpy_fini)(struct anv_memcpy_state *state);
void genX(emit_so_memcpy)(struct anv_memcpy_state *state,
struct anv_address dst, struct anv_address src,
uint32_t size);
void genX(emit_l3_config)(struct anv_batch *batch,
const struct anv_device *device,
const struct intel_l3_config *cfg);
void genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
const struct intel_l3_config *cfg);
void genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer);
void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer);
void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer);
void genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer,
bool enable);
void genX(cmd_buffer_mark_image_written)(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image,
VkImageAspectFlagBits aspect,
enum isl_aux_usage aux_usage,
uint32_t level,
uint32_t base_layer,
uint32_t layer_count);
void genX(cmd_emit_conditional_render_predicate)(struct anv_cmd_buffer *cmd_buffer);
struct anv_state genX(cmd_buffer_ray_query_globals)(struct anv_cmd_buffer *cmd_buffer);
void
genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
const struct intel_l3_config *l3_config,
VkShaderStageFlags active_stages,
const unsigned entry_size[4],
enum intel_urb_deref_block_size *deref_block_size);
void genX(emit_multisample)(struct anv_batch *batch, uint32_t samples,
const struct vk_sample_locations_state *sl);
void genX(emit_sample_pattern)(struct anv_batch *batch,
const struct vk_sample_locations_state *sl);
void genX(emit_shading_rate)(struct anv_batch *batch,
const struct anv_graphics_pipeline *pipeline,
const struct vk_fragment_shading_rate_state *fsr);
void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address dst, struct anv_address src,
uint32_t size);
void genX(blorp_exec)(struct blorp_batch *batch,
const struct blorp_params *params);
void genX(cmd_emit_timestamp)(struct anv_batch *batch,
struct anv_device *device,
struct anv_address addr,
bool end_of_pipe);
void
genX(rasterization_mode)(VkPolygonMode raster_mode,
VkLineRasterizationModeEXT line_mode,
float line_width,
uint32_t *api_mode,
bool *msaa_rasterization_enable);
uint32_t
genX(ms_rasterization_mode)(struct anv_graphics_pipeline *pipeline,
VkPolygonMode raster_mode);
VkPolygonMode
genX(raster_polygon_mode)(struct anv_graphics_pipeline *pipeline,
VkPrimitiveTopology primitive_topology);
void
genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
const struct vk_graphics_pipeline_state *state);
void
genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline);
void
genX(ray_tracing_pipeline_emit)(struct anv_ray_tracing_pipeline *pipeline);

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,516 @@
/*
* Copyright © 2020 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "anv_measure.h"
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include "common/intel_measure.h"
#include "util/debug.h"
struct anv_measure_batch {
struct anv_bo *bo;
struct intel_measure_batch base;
};
void
anv_measure_device_init(struct anv_physical_device *device)
{
switch (device->info.verx10) {
case 125:
device->cmd_emit_timestamp = &gfx125_cmd_emit_timestamp;
break;
case 120:
device->cmd_emit_timestamp = &gfx12_cmd_emit_timestamp;
break;
case 110:
device->cmd_emit_timestamp = &gfx11_cmd_emit_timestamp;
break;
case 90:
device->cmd_emit_timestamp = &gfx9_cmd_emit_timestamp;
break;
case 80:
device->cmd_emit_timestamp = &gfx8_cmd_emit_timestamp;
break;
case 75:
device->cmd_emit_timestamp = &gfx75_cmd_emit_timestamp;
break;
case 70:
device->cmd_emit_timestamp = &gfx7_cmd_emit_timestamp;
break;
default:
assert(false);
}
/* initialise list of measure structures that await rendering */
struct intel_measure_device *measure_device = &device->measure_device;
intel_measure_init(measure_device);
struct intel_measure_config *config = measure_device->config;
if (config == NULL)
return;
/* the final member of intel_measure_ringbuffer is a zero-length array of
* intel_measure_buffered_result objects. Allocate additional space for
* the buffered objects based on the run-time configurable buffer_size
*/
const size_t rb_bytes = sizeof(struct intel_measure_ringbuffer) +
config->buffer_size * sizeof(struct intel_measure_buffered_result);
struct intel_measure_ringbuffer * rb =
vk_zalloc(&device->instance->vk.alloc,
rb_bytes, 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
measure_device->ringbuffer = rb;
}
static struct intel_measure_config*
config_from_command_buffer(struct anv_cmd_buffer *cmd_buffer)
{
return cmd_buffer->device->physical->measure_device.config;
}
void
anv_measure_init(struct anv_cmd_buffer *cmd_buffer)
{
struct intel_measure_config *config = config_from_command_buffer(cmd_buffer);
struct anv_device *device = cmd_buffer->device;
if (!config || !config->enabled) {
cmd_buffer->measure = NULL;
return;
}
/* the final member of anv_measure is a zero-length array of
* intel_measure_snapshot objects. Create additional space for the
* snapshot objects based on the run-time configurable batch_size
*/
const size_t batch_bytes = sizeof(struct anv_measure_batch) +
config->batch_size * sizeof(struct intel_measure_snapshot);
struct anv_measure_batch * measure =
vk_alloc(&cmd_buffer->vk.pool->alloc,
batch_bytes, 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
memset(measure, 0, batch_bytes);
ASSERTED VkResult result =
anv_device_alloc_bo(device, "measure data",
config->batch_size * sizeof(uint64_t),
ANV_BO_ALLOC_MAPPED,
0,
(struct anv_bo**)&measure->bo);
measure->base.timestamps = measure->bo->map;
assert(result == VK_SUCCESS);
cmd_buffer->measure = measure;
}
static void
anv_measure_start_snapshot(struct anv_cmd_buffer *cmd_buffer,
enum intel_measure_snapshot_type type,
const char *event_name,
uint32_t count)
{
struct anv_batch *batch = &cmd_buffer->batch;
struct anv_measure_batch *measure = cmd_buffer->measure;
struct anv_physical_device *device = cmd_buffer->device->physical;
struct intel_measure_device *measure_device = &device->measure_device;
const unsigned device_frame = measure_device->frame;
/* if the command buffer is not associated with a frame, associate it with
* the most recent acquired frame
*/
if (measure->base.frame == 0)
measure->base.frame = device_frame;
// uintptr_t framebuffer = (uintptr_t)cmd_buffer->state.framebuffer;
//
// if (!measure->base.framebuffer &&
// cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY)
// /* secondary command buffer inherited the framebuffer from the primary */
// measure->base.framebuffer = framebuffer;
//
// /* verify framebuffer has been properly tracked */
// assert(type == INTEL_SNAPSHOT_END ||
// framebuffer == measure->base.framebuffer ||
// framebuffer == 0 ); /* compute has no framebuffer */
unsigned index = measure->base.index++;
(*device->cmd_emit_timestamp)(batch, cmd_buffer->device,
(struct anv_address) {
.bo = measure->bo,
.offset = index * sizeof(uint64_t) },
true /* end_of_pipe */);
if (event_name == NULL)
event_name = intel_measure_snapshot_string(type);
struct intel_measure_snapshot *snapshot = &(measure->base.snapshots[index]);
memset(snapshot, 0, sizeof(*snapshot));
snapshot->type = type;
snapshot->count = (unsigned) count;
snapshot->event_count = measure->base.event_count;
snapshot->event_name = event_name;
// snapshot->framebuffer = framebuffer;
if (type == INTEL_SNAPSHOT_COMPUTE && cmd_buffer->state.compute.pipeline) {
snapshot->cs = (uintptr_t) cmd_buffer->state.compute.pipeline->cs;
} else if (cmd_buffer->state.gfx.pipeline) {
const struct anv_graphics_pipeline *pipeline =
cmd_buffer->state.gfx.pipeline;
snapshot->vs = (uintptr_t) pipeline->shaders[MESA_SHADER_VERTEX];
snapshot->tcs = (uintptr_t) pipeline->shaders[MESA_SHADER_TESS_CTRL];
snapshot->tes = (uintptr_t) pipeline->shaders[MESA_SHADER_TESS_EVAL];
snapshot->gs = (uintptr_t) pipeline->shaders[MESA_SHADER_GEOMETRY];
snapshot->fs = (uintptr_t) pipeline->shaders[MESA_SHADER_FRAGMENT];
}
}
static void
anv_measure_end_snapshot(struct anv_cmd_buffer *cmd_buffer,
uint32_t event_count)
{
struct anv_batch *batch = &cmd_buffer->batch;
struct anv_measure_batch *measure = cmd_buffer->measure;
struct anv_physical_device *device = cmd_buffer->device->physical;
unsigned index = measure->base.index++;
assert(index % 2 == 1);
(*device->cmd_emit_timestamp)(batch, cmd_buffer->device,
(struct anv_address) {
.bo = measure->bo,
.offset = index * sizeof(uint64_t) },
true /* end_of_pipe */);
struct intel_measure_snapshot *snapshot = &(measure->base.snapshots[index]);
memset(snapshot, 0, sizeof(*snapshot));
snapshot->type = INTEL_SNAPSHOT_END;
snapshot->event_count = event_count;
}
static bool
state_changed(struct anv_cmd_buffer *cmd_buffer,
enum intel_measure_snapshot_type type)
{
uintptr_t vs=0, tcs=0, tes=0, gs=0, fs=0, cs=0;
if (cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)
/* can't record timestamps in this mode */
return false;
if (type == INTEL_SNAPSHOT_COMPUTE) {
const struct anv_compute_pipeline *cs_pipe =
cmd_buffer->state.compute.pipeline;
assert(cs_pipe);
cs = (uintptr_t)cs_pipe->cs;
} else if (type == INTEL_SNAPSHOT_DRAW) {
const struct anv_graphics_pipeline *gfx = cmd_buffer->state.gfx.pipeline;
assert(gfx);
vs = (uintptr_t) gfx->shaders[MESA_SHADER_VERTEX];
tcs = (uintptr_t) gfx->shaders[MESA_SHADER_TESS_CTRL];
tes = (uintptr_t) gfx->shaders[MESA_SHADER_TESS_EVAL];
gs = (uintptr_t) gfx->shaders[MESA_SHADER_GEOMETRY];
fs = (uintptr_t) gfx->shaders[MESA_SHADER_FRAGMENT];
}
/* else blorp, all programs NULL */
return intel_measure_state_changed(&cmd_buffer->measure->base,
vs, tcs, tes, gs, fs, cs);
}
void
_anv_measure_snapshot(struct anv_cmd_buffer *cmd_buffer,
enum intel_measure_snapshot_type type,
const char *event_name,
uint32_t count)
{
struct intel_measure_config *config = config_from_command_buffer(cmd_buffer);
struct anv_measure_batch *measure = cmd_buffer->measure;
assert(config);
if (measure == NULL)
return;
assert(type != INTEL_SNAPSHOT_END);
if (!state_changed(cmd_buffer, type)) {
/* filter out this event */
return;
}
/* increment event count */
++measure->base.event_count;
if (measure->base.event_count == 1 ||
measure->base.event_count == config->event_interval + 1) {
/* the first event of an interval */
if (measure->base.index % 2) {
/* end the previous event */
anv_measure_end_snapshot(cmd_buffer, measure->base.event_count - 1);
}
measure->base.event_count = 1;
if (measure->base.index == config->batch_size) {
/* Snapshot buffer is full. The batch must be flushed before
* additional snapshots can be taken.
*/
static bool warned = false;
if (unlikely(!warned)) {
fprintf(config->file,
"WARNING: batch size exceeds INTEL_MEASURE limit: %d. "
"Data has been dropped. "
"Increase setting with INTEL_MEASURE=batch_size={count}\n",
config->batch_size);
}
warned = true;
return;
}
anv_measure_start_snapshot(cmd_buffer, type, event_name, count);
}
}
/**
* Called when a command buffer is reset. Re-initializes existing anv_measure
* data structures.
*/
void
anv_measure_reset(struct anv_cmd_buffer *cmd_buffer)
{
struct intel_measure_config *config = config_from_command_buffer(cmd_buffer);
struct anv_device *device = cmd_buffer->device;
struct anv_measure_batch *measure = cmd_buffer->measure;
if (!config)
return;
if (!config->enabled) {
cmd_buffer->measure = NULL;
return;
}
if (!measure) {
/* Capture has recently been enabled. Instead of resetting, a new data
* structure must be allocated and initialized.
*/
return anv_measure_init(cmd_buffer);
}
/* it is possible that the command buffer contains snapshots that have not
* yet been processed
*/
intel_measure_gather(&device->physical->measure_device,
device->info);
assert(cmd_buffer->device != NULL);
measure->base.index = 0;
// measure->base.framebuffer = 0;
measure->base.frame = 0;
measure->base.event_count = 0;
list_inithead(&measure->base.link);
}
void
anv_measure_destroy(struct anv_cmd_buffer *cmd_buffer)
{
struct intel_measure_config *config = config_from_command_buffer(cmd_buffer);
struct anv_measure_batch *measure = cmd_buffer->measure;
struct anv_device *device = cmd_buffer->device;
struct anv_physical_device *physical = device->physical;
if (!config)
return;
if (measure == NULL)
return;
/* it is possible that the command buffer contains snapshots that have not
* yet been processed
*/
intel_measure_gather(&physical->measure_device, &physical->info);
anv_device_release_bo(device, measure->bo);
vk_free(&cmd_buffer->vk.pool->alloc, measure);
cmd_buffer->measure = NULL;
}
static struct intel_measure_config*
config_from_device(struct anv_device *device)
{
return device->physical->measure_device.config;
}
void
anv_measure_device_destroy(struct anv_physical_device *device)
{
struct intel_measure_device *measure_device = &device->measure_device;
struct intel_measure_config *config = measure_device->config;
if (!config)
return;
if (measure_device->ringbuffer != NULL) {
vk_free(&device->instance->vk.alloc, measure_device->ringbuffer);
measure_device->ringbuffer = NULL;
}
}
/**
* Hook for command buffer submission.
*/
void
_anv_measure_submit(struct anv_cmd_buffer *cmd_buffer)
{
struct intel_measure_config *config = config_from_command_buffer(cmd_buffer);
struct anv_measure_batch *measure = cmd_buffer->measure;
struct intel_measure_device *measure_device = &cmd_buffer->device->physical->measure_device;
if (!config)
return;
if (measure == NULL)
return;
struct intel_measure_batch *base = &measure->base;
if (base->index == 0)
/* no snapshots were started */
return;
/* finalize snapshots and enqueue them */
static unsigned cmd_buffer_count = 0;
base->batch_count = p_atomic_inc_return(&cmd_buffer_count);
if (base->index %2 == 1) {
anv_measure_end_snapshot(cmd_buffer, base->event_count);
base->event_count = 0;
}
/* Mark the final timestamp as 'not completed'. This marker will be used
* to verify that rendering is complete.
*/
base->timestamps[base->index - 1] = 0;
/* add to the list of submitted snapshots */
pthread_mutex_lock(&measure_device->mutex);
list_addtail(&measure->base.link, &measure_device->queued_snapshots);
pthread_mutex_unlock(&measure_device->mutex);
}
/**
* Hook for the start of a frame.
*/
void
_anv_measure_acquire(struct anv_device *device)
{
struct intel_measure_config *config = config_from_device(device);
struct intel_measure_device *measure_device = &device->physical->measure_device;
if (!config)
return;
if (measure_device == NULL)
return;
intel_measure_frame_transition(p_atomic_inc_return(&measure_device->frame));
/* iterate the queued snapshots and publish those that finished */
intel_measure_gather(measure_device, &device->physical->info);
}
void
_anv_measure_endcommandbuffer(struct anv_cmd_buffer *cmd_buffer)
{
struct intel_measure_config *config = config_from_command_buffer(cmd_buffer);
struct anv_measure_batch *measure = cmd_buffer->measure;
if (!config)
return;
if (measure == NULL)
return;
if (measure->base.index % 2 == 0)
return;
anv_measure_end_snapshot(cmd_buffer, measure->base.event_count);
measure->base.event_count = 0;
}
void
_anv_measure_beginrenderpass(struct anv_cmd_buffer *cmd_buffer)
{
struct intel_measure_config *config = config_from_command_buffer(cmd_buffer);
struct anv_measure_batch *measure = cmd_buffer->measure;
if (!config)
return;
if (measure == NULL)
return;
// if (measure->base.framebuffer == (uintptr_t) cmd_buffer->state.framebuffer)
// /* no change */
// return;
bool filtering = (config->flags & (INTEL_MEASURE_RENDERPASS |
INTEL_MEASURE_SHADER));
if (filtering && measure->base.index % 2 == 1) {
/* snapshot for previous renderpass was not ended */
anv_measure_end_snapshot(cmd_buffer,
measure->base.event_count);
measure->base.event_count = 0;
}
// measure->base.framebuffer = (uintptr_t) cmd_buffer->state.framebuffer;
}
void
_anv_measure_add_secondary(struct anv_cmd_buffer *primary,
struct anv_cmd_buffer *secondary)
{
struct intel_measure_config *config = config_from_command_buffer(primary);
struct anv_measure_batch *measure = primary->measure;
if (!config)
return;
if (measure == NULL)
return;
if (config->flags & (INTEL_MEASURE_BATCH | INTEL_MEASURE_FRAME))
/* secondary timing will be contained within the primary */
return;
if (secondary->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT) {
static bool warned = false;
if (unlikely(!warned)) {
fprintf(config->file,
"WARNING: INTEL_MEASURE cannot capture timings of commands "
"in secondary command buffers with "
"VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT set.\n");
}
return;
}
if (measure->base.index % 2 == 1)
anv_measure_end_snapshot(primary, measure->base.event_count);
struct intel_measure_snapshot *snapshot = &(measure->base.snapshots[measure->base.index]);
_anv_measure_snapshot(primary, INTEL_SNAPSHOT_SECONDARY_BATCH, NULL, 0);
snapshot->secondary = &secondary->measure->base;
}

View File

@ -0,0 +1,82 @@
/*
* Copyright © 2020 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ANV_MEASURE_H
#define ANV_MEASURE_H
#include "anv_private.h"
#include "common/intel_measure.h"
void anv_measure_device_init(struct anv_physical_device *device);
void anv_measure_device_destroy(struct anv_physical_device *device);
void anv_measure_init(struct anv_cmd_buffer *cmd_buffer);
void anv_measure_destroy(struct anv_cmd_buffer *cmd_buffer);
void anv_measure_reset(struct anv_cmd_buffer *cmd_buffer);
void _anv_measure_snapshot(struct anv_cmd_buffer *cmd_buffer,
enum intel_measure_snapshot_type type,
const char *event_name,
uint32_t count);
/* ends snapshots before command buffer submission */
void _anv_measure_endcommandbuffer(struct anv_cmd_buffer *cmd_buffer);
/* when measuring render passes, inserts a timestamp */
void _anv_measure_beginrenderpass(struct anv_cmd_buffer *cmd_buffer);
/* tracks frame progression */
void _anv_measure_acquire(struct anv_device *device);
/* should be combined with endcommandbuffer */
void _anv_measure_submit(struct anv_cmd_buffer *cmd_buffer);
void
_anv_measure_add_secondary(struct anv_cmd_buffer *primary,
struct anv_cmd_buffer *secondary);
#define anv_measure_acquire(device) \
if (unlikely(device->physical->measure_device.config)) \
_anv_measure_acquire(device)
#define anv_measure_snapshot(cmd_buffer, type, event_name, count) \
if (unlikely(cmd_buffer->measure)) \
_anv_measure_snapshot(cmd_buffer, type, event_name, count)
#define anv_measure_endcommandbuffer(cmd_buffer) \
if (unlikely(cmd_buffer->measure)) \
_anv_measure_endcommandbuffer(cmd_buffer)
#define anv_measure_beginrenderpass(cmd_buffer) \
if (unlikely(cmd_buffer->measure)) \
_anv_measure_beginrenderpass(cmd_buffer)
#define anv_measure_submit(cmd_buffer) \
if (unlikely(cmd_buffer->measure)) \
_anv_measure_submit(cmd_buffer)
#define anv_measure_add_secondary(primary, secondary) \
if (unlikely(primary->measure)) \
_anv_measure_add_secondary(primary, secondary)
#endif /* ANV_MEASURE_H */

View File

@ -0,0 +1,97 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef ANV_NIR_H
#define ANV_NIR_H
#include "nir/nir.h"
#include "anv_private.h"
#ifdef __cplusplus
extern "C" {
#endif
bool anv_check_for_primitive_replication(struct anv_device *device,
VkShaderStageFlags stages,
nir_shader **shaders,
uint32_t view_mask);
bool anv_nir_lower_multiview(nir_shader *shader, uint32_t view_mask,
bool use_primitive_replication);
bool anv_nir_lower_ycbcr_textures(nir_shader *shader,
const struct anv_pipeline_layout *layout);
static inline nir_address_format
anv_nir_ssbo_addr_format(const struct anv_physical_device *pdevice,
bool robust_buffer_access)
{
if (pdevice->has_a64_buffer_access) {
if (robust_buffer_access)
return nir_address_format_64bit_bounded_global;
else
return nir_address_format_64bit_global_32bit_offset;
} else {
return nir_address_format_32bit_index_offset;
}
}
static inline nir_address_format
anv_nir_ubo_addr_format(const struct anv_physical_device *pdevice,
bool robust_buffer_access)
{
if (pdevice->has_a64_buffer_access) {
if (robust_buffer_access)
return nir_address_format_64bit_bounded_global;
else
return nir_address_format_64bit_global_32bit_offset;
} else {
return nir_address_format_32bit_index_offset;
}
}
bool anv_nir_lower_ubo_loads(nir_shader *shader);
void anv_nir_apply_pipeline_layout(nir_shader *shader,
const struct anv_physical_device *pdevice,
bool robust_buffer_access,
const struct anv_pipeline_layout *layout,
struct anv_pipeline_bind_map *map);
void anv_nir_compute_push_layout(nir_shader *nir,
const struct anv_physical_device *pdevice,
bool robust_buffer_access,
struct brw_stage_prog_data *prog_data,
struct anv_pipeline_bind_map *map,
void *mem_ctx);
void anv_nir_validate_push_layout(struct brw_stage_prog_data *prog_data,
struct anv_pipeline_bind_map *map);
bool anv_nir_add_base_work_group_id(nir_shader *shader);
#ifdef __cplusplus
}
#endif
#endif /* ANV_NIR_H */

View File

@ -0,0 +1,63 @@
/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_nir.h"
#include "nir/nir_builder.h"
#include "compiler/brw_compiler.h"
static bool
anv_nir_add_base_work_group_id_instr(nir_builder *b,
nir_instr *instr,
UNUSED void *cb_data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *load_id = nir_instr_as_intrinsic(instr);
if (load_id->intrinsic != nir_intrinsic_load_workgroup_id)
return false;
b->cursor = nir_after_instr(&load_id->instr);
nir_ssa_def *load_base =
nir_load_push_constant(b, 3, 32, nir_imm_int(b, 0),
.base = offsetof(struct anv_push_constants, cs.base_work_group_id),
.range = 3 * sizeof(uint32_t));
nir_ssa_def *id = nir_iadd(b, &load_id->dest.ssa, load_base);
nir_ssa_def_rewrite_uses_after(&load_id->dest.ssa, id, id->parent_instr);
return true;
}
bool
anv_nir_add_base_work_group_id(nir_shader *shader)
{
assert(shader->info.stage == MESA_SHADER_COMPUTE);
return nir_shader_instructions_pass(shader,
anv_nir_add_base_work_group_id_instr,
nir_metadata_block_index |
nir_metadata_dominance,
NULL);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,290 @@
/*
* Copyright © 2019 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_nir.h"
#include "nir_builder.h"
#include "compiler/brw_nir.h"
#include "util/mesa-sha1.h"
#define sizeof_field(type, field) sizeof(((type *)0)->field)
void
anv_nir_compute_push_layout(nir_shader *nir,
const struct anv_physical_device *pdevice,
bool robust_buffer_access,
struct brw_stage_prog_data *prog_data,
struct anv_pipeline_bind_map *map,
void *mem_ctx)
{
const struct brw_compiler *compiler = pdevice->compiler;
const struct intel_device_info *devinfo = compiler->devinfo;
memset(map->push_ranges, 0, sizeof(map->push_ranges));
bool has_const_ubo = false;
unsigned push_start = UINT_MAX, push_end = 0;
nir_foreach_function(function, nir) {
if (!function->impl)
continue;
nir_foreach_block(block, function->impl) {
nir_foreach_instr(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_load_ubo:
if (nir_src_is_const(intrin->src[0]) &&
nir_src_is_const(intrin->src[1]))
has_const_ubo = true;
break;
case nir_intrinsic_load_push_constant: {
unsigned base = nir_intrinsic_base(intrin);
unsigned range = nir_intrinsic_range(intrin);
push_start = MIN2(push_start, base);
push_end = MAX2(push_end, base + range);
break;
}
case nir_intrinsic_load_desc_set_address_intel:
push_start = MIN2(push_start,
offsetof(struct anv_push_constants, desc_sets));
push_end = MAX2(push_end, push_start +
sizeof_field(struct anv_push_constants, desc_sets));
break;
default:
break;
}
}
}
}
const bool has_push_intrinsic = push_start <= push_end;
const bool push_ubo_ranges =
pdevice->info.verx10 >= 75 &&
has_const_ubo && nir->info.stage != MESA_SHADER_COMPUTE &&
!brw_shader_stage_requires_bindless_resources(nir->info.stage);
if (push_ubo_ranges && robust_buffer_access) {
/* We can't on-the-fly adjust our push ranges because doing so would
* mess up the layout in the shader. When robustBufferAccess is
* enabled, we push a mask into the shader indicating which pushed
* registers are valid and we zero out the invalid ones at the top of
* the shader.
*/
const uint32_t push_reg_mask_start =
offsetof(struct anv_push_constants, push_reg_mask[nir->info.stage]);
const uint32_t push_reg_mask_end = push_reg_mask_start + sizeof(uint64_t);
push_start = MIN2(push_start, push_reg_mask_start);
push_end = MAX2(push_end, push_reg_mask_end);
}
if (nir->info.stage == MESA_SHADER_COMPUTE && devinfo->verx10 < 125) {
/* For compute shaders, we always have to have the subgroup ID. The
* back-end compiler will "helpfully" add it for us in the last push
* constant slot. Yes, there is an off-by-one error here but that's
* because the back-end will add it so we want to claim the number of
* push constants one dword less than the full amount including
* gl_SubgroupId.
*/
assert(push_end <= offsetof(struct anv_push_constants, cs.subgroup_id));
push_end = offsetof(struct anv_push_constants, cs.subgroup_id);
}
/* Align push_start down to a 32B boundary and make it no larger than
* push_end (no push constants is indicated by push_start = UINT_MAX).
*/
push_start = MIN2(push_start, push_end);
push_start = align_down_u32(push_start, 32);
/* For vec4 our push data size needs to be aligned to a vec4 and for
* scalar, it needs to be aligned to a DWORD.
*/
const unsigned align = compiler->scalar_stage[nir->info.stage] ? 4 : 16;
nir->num_uniforms = ALIGN(push_end - push_start, align);
prog_data->nr_params = nir->num_uniforms / 4;
prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params);
struct anv_push_range push_constant_range = {
.set = ANV_DESCRIPTOR_SET_PUSH_CONSTANTS,
.start = push_start / 32,
.length = DIV_ROUND_UP(push_end - push_start, 32),
};
if (has_push_intrinsic) {
nir_foreach_function(function, nir) {
if (!function->impl)
continue;
nir_builder build, *b = &build;
nir_builder_init(b, function->impl);
nir_foreach_block(block, function->impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_load_push_constant: {
/* With bindless shaders we load uniforms with SEND
* messages. All the push constants are located after the
* RT_DISPATCH_GLOBALS. We just need to add the offset to
* the address right after RT_DISPATCH_GLOBALS (see
* brw_nir_lower_rt_intrinsics.c).
*/
unsigned base_offset =
brw_shader_stage_requires_bindless_resources(nir->info.stage) ? 0 : push_start;
intrin->intrinsic = nir_intrinsic_load_uniform;
nir_intrinsic_set_base(intrin,
nir_intrinsic_base(intrin) -
base_offset);
break;
}
case nir_intrinsic_load_desc_set_address_intel: {
b->cursor = nir_before_instr(&intrin->instr);
nir_ssa_def *pc_load = nir_load_uniform(b, 1, 64,
nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint64_t)),
.base = offsetof(struct anv_push_constants, desc_sets),
.range = sizeof_field(struct anv_push_constants, desc_sets),
.dest_type = nir_type_uint64);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, pc_load);
break;
}
default:
break;
}
}
}
}
}
if (push_ubo_ranges) {
brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
/* The vec4 back-end pushes at most 32 regs while the scalar back-end
* pushes up to 64. This is primarily because the scalar back-end has a
* massively more competent register allocator and so the risk of
* spilling due to UBO pushing isn't nearly as high.
*/
const unsigned max_push_regs =
compiler->scalar_stage[nir->info.stage] ? 64 : 32;
unsigned total_push_regs = push_constant_range.length;
for (unsigned i = 0; i < 4; i++) {
if (total_push_regs + prog_data->ubo_ranges[i].length > max_push_regs)
prog_data->ubo_ranges[i].length = max_push_regs - total_push_regs;
total_push_regs += prog_data->ubo_ranges[i].length;
}
assert(total_push_regs <= max_push_regs);
int n = 0;
if (push_constant_range.length > 0)
map->push_ranges[n++] = push_constant_range;
if (robust_buffer_access) {
const uint32_t push_reg_mask_offset =
offsetof(struct anv_push_constants, push_reg_mask[nir->info.stage]);
assert(push_reg_mask_offset >= push_start);
prog_data->push_reg_mask_param =
(push_reg_mask_offset - push_start) / 4;
}
unsigned range_start_reg = push_constant_range.length;
for (int i = 0; i < 4; i++) {
struct brw_ubo_range *ubo_range = &prog_data->ubo_ranges[i];
if (ubo_range->length == 0)
continue;
if (n >= 4 || (n == 3 && compiler->constant_buffer_0_is_relative)) {
memset(ubo_range, 0, sizeof(*ubo_range));
continue;
}
const struct anv_pipeline_binding *binding =
&map->surface_to_descriptor[ubo_range->block];
map->push_ranges[n++] = (struct anv_push_range) {
.set = binding->set,
.index = binding->index,
.dynamic_offset_index = binding->dynamic_offset_index,
.start = ubo_range->start,
.length = ubo_range->length,
};
/* We only bother to shader-zero pushed client UBOs */
if (binding->set < MAX_SETS && robust_buffer_access) {
prog_data->zero_push_reg |= BITFIELD64_RANGE(range_start_reg,
ubo_range->length);
}
range_start_reg += ubo_range->length;
}
} else {
/* For Ivy Bridge, the push constants packets have a different
* rule that would require us to iterate in the other direction
* and possibly mess around with dynamic state base address.
* Don't bother; just emit regular push constants at n = 0.
*
* In the compute case, we don't have multiple push ranges so it's
* better to just provide one in push_ranges[0].
*/
map->push_ranges[0] = push_constant_range;
}
/* Now that we're done computing the push constant portion of the
* bind map, hash it. This lets us quickly determine if the actual
* mapping has changed and not just a no-op pipeline change.
*/
_mesa_sha1_compute(map->push_ranges,
sizeof(map->push_ranges),
map->push_sha1);
}
void
anv_nir_validate_push_layout(struct brw_stage_prog_data *prog_data,
struct anv_pipeline_bind_map *map)
{
#ifndef NDEBUG
unsigned prog_data_push_size = DIV_ROUND_UP(prog_data->nr_params, 8);
for (unsigned i = 0; i < 4; i++)
prog_data_push_size += prog_data->ubo_ranges[i].length;
unsigned bind_map_push_size = 0;
for (unsigned i = 0; i < 4; i++)
bind_map_push_size += map->push_ranges[i].length;
/* We could go through everything again but it should be enough to assert
* that they push the same number of registers. This should alert us if
* the back-end compiler decides to re-arrange stuff or shrink a range.
*/
assert(prog_data_push_size == bind_map_push_size);
#endif
}

View File

@ -0,0 +1,324 @@
/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_nir.h"
#include "nir/nir_builder.h"
#include "util/debug.h"
/**
* This file implements the lowering required for VK_KHR_multiview.
*
* When possible, Primitive Replication is used and the shader is modified to
* make gl_Position an array and fill it with values for each view.
*
* Otherwise we implement multiview using instanced rendering. The number of
* instances in each draw call is multiplied by the number of views in the
* subpass. Then, in the shader, we divide gl_InstanceId by the number of
* views and use gl_InstanceId % view_count to compute the actual ViewIndex.
*/
struct lower_multiview_state {
nir_builder builder;
uint32_t view_mask;
nir_ssa_def *instance_id;
nir_ssa_def *view_index;
};
static nir_ssa_def *
build_instance_id(struct lower_multiview_state *state)
{
assert(state->builder.shader->info.stage == MESA_SHADER_VERTEX);
if (state->instance_id == NULL) {
nir_builder *b = &state->builder;
b->cursor = nir_before_block(nir_start_block(b->impl));
/* We use instancing for implementing multiview. The actual instance id
* is given by dividing instance_id by the number of views in this
* subpass.
*/
state->instance_id =
nir_idiv(b, nir_load_instance_id(b),
nir_imm_int(b, util_bitcount(state->view_mask)));
}
return state->instance_id;
}
static nir_ssa_def *
build_view_index(struct lower_multiview_state *state)
{
assert(state->builder.shader->info.stage != MESA_SHADER_FRAGMENT);
if (state->view_index == NULL) {
nir_builder *b = &state->builder;
b->cursor = nir_before_block(nir_start_block(b->impl));
assert(state->view_mask != 0);
if (util_bitcount(state->view_mask) == 1) {
/* Set the view index directly. */
state->view_index = nir_imm_int(b, ffs(state->view_mask) - 1);
} else if (state->builder.shader->info.stage == MESA_SHADER_VERTEX) {
/* We only support 16 viewports */
assert((state->view_mask & 0xffff0000) == 0);
/* We use instancing for implementing multiview. The compacted view
* id is given by instance_id % view_count. We then have to convert
* that to an actual view id.
*/
nir_ssa_def *compacted =
nir_umod(b, nir_load_instance_id(b),
nir_imm_int(b, util_bitcount(state->view_mask)));
if (util_is_power_of_two_or_zero(state->view_mask + 1)) {
/* If we have a full view mask, then compacted is what we want */
state->view_index = compacted;
} else {
/* Now we define a map from compacted view index to the actual
* view index that's based on the view_mask. The map is given by
* 16 nibbles, each of which is a value from 0 to 15.
*/
uint64_t remap = 0;
uint32_t i = 0;
u_foreach_bit(bit, state->view_mask) {
assert(bit < 16);
remap |= (uint64_t)bit << (i++ * 4);
}
nir_ssa_def *shift = nir_imul(b, compacted, nir_imm_int(b, 4));
/* One of these days, when we have int64 everywhere, this will be
* easier.
*/
nir_ssa_def *shifted;
if (remap <= UINT32_MAX) {
shifted = nir_ushr(b, nir_imm_int(b, remap), shift);
} else {
nir_ssa_def *shifted_low =
nir_ushr(b, nir_imm_int(b, remap), shift);
nir_ssa_def *shifted_high =
nir_ushr(b, nir_imm_int(b, remap >> 32),
nir_isub(b, shift, nir_imm_int(b, 32)));
shifted = nir_bcsel(b, nir_ilt(b, shift, nir_imm_int(b, 32)),
shifted_low, shifted_high);
}
state->view_index = nir_iand(b, shifted, nir_imm_int(b, 0xf));
}
} else {
const struct glsl_type *type = glsl_int_type();
if (b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
b->shader->info.stage == MESA_SHADER_GEOMETRY)
type = glsl_array_type(type, 1, 0);
nir_variable *idx_var =
nir_variable_create(b->shader, nir_var_shader_in,
type, "view index");
idx_var->data.location = VARYING_SLOT_VIEW_INDEX;
if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
idx_var->data.interpolation = INTERP_MODE_FLAT;
nir_deref_instr *deref = nir_build_deref_var(b, idx_var);
if (glsl_type_is_array(type))
deref = nir_build_deref_array_imm(b, deref, 0);
state->view_index = nir_load_deref(b, deref);
}
}
return state->view_index;
}
static bool
is_load_view_index(const nir_instr *instr, const void *data)
{
return instr->type == nir_instr_type_intrinsic &&
nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_view_index;
}
static nir_ssa_def *
replace_load_view_index_with_zero(struct nir_builder *b,
nir_instr *instr, void *data)
{
assert(is_load_view_index(instr, data));
return nir_imm_zero(b, 1, 32);
}
static nir_ssa_def *
replace_load_view_index_with_layer_id(struct nir_builder *b,
nir_instr *instr, void *data)
{
assert(is_load_view_index(instr, data));
return nir_load_layer_id(b);
}
bool
anv_nir_lower_multiview(nir_shader *shader, uint32_t view_mask,
bool use_primitive_replication)
{
assert(shader->info.stage != MESA_SHADER_COMPUTE);
/* If multiview isn't enabled, just lower the ViewIndex builtin to zero. */
if (view_mask == 0) {
return nir_shader_lower_instructions(shader, is_load_view_index,
replace_load_view_index_with_zero, NULL);
}
if (shader->info.stage == MESA_SHADER_FRAGMENT) {
return nir_shader_lower_instructions(shader, is_load_view_index,
replace_load_view_index_with_layer_id, NULL);
}
/* This pass assumes a single entrypoint */
nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader);
/* Primitive Replication allows a shader to write different positions for
* each view in the same execution. If only the position depends on the
* view, then it is possible to use the feature instead of instancing to
* implement multiview.
*/
if (use_primitive_replication) {
bool progress = nir_lower_multiview(shader, view_mask);
if (progress) {
nir_builder b;
nir_builder_init(&b, entrypoint);
b.cursor = nir_before_cf_list(&entrypoint->body);
/* Fill Layer ID with zero. Replication will use that as base to
* apply the RTAI offsets.
*/
nir_variable *layer_id_out =
nir_variable_create(shader, nir_var_shader_out,
glsl_int_type(), "layer ID");
layer_id_out->data.location = VARYING_SLOT_LAYER;
nir_store_var(&b, layer_id_out, nir_imm_zero(&b, 1, 32), 0x1);
}
return progress;
}
struct lower_multiview_state state = {
.view_mask = view_mask,
};
nir_builder_init(&state.builder, entrypoint);
nir_foreach_block(block, entrypoint) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
if (load->intrinsic != nir_intrinsic_load_instance_id &&
load->intrinsic != nir_intrinsic_load_view_index)
continue;
assert(load->dest.is_ssa);
nir_ssa_def *value;
if (load->intrinsic == nir_intrinsic_load_instance_id) {
value = build_instance_id(&state);
} else {
assert(load->intrinsic == nir_intrinsic_load_view_index);
value = build_view_index(&state);
}
nir_ssa_def_rewrite_uses(&load->dest.ssa, value);
nir_instr_remove(&load->instr);
}
}
/* The view index is available in all stages but the instance id is only
* available in the VS. If it's not a fragment shader, we need to pass
* the view index on to the next stage.
*/
nir_ssa_def *view_index = build_view_index(&state);
nir_builder *b = &state.builder;
assert(view_index->parent_instr->block == nir_start_block(entrypoint));
b->cursor = nir_after_instr(view_index->parent_instr);
/* Unless there is only one possible view index (that would be set
* directly), pass it to the next stage. */
if (util_bitcount(state.view_mask) != 1) {
nir_variable *view_index_out =
nir_variable_create(shader, nir_var_shader_out,
glsl_int_type(), "view index");
view_index_out->data.location = VARYING_SLOT_VIEW_INDEX;
nir_store_var(b, view_index_out, view_index, 0x1);
}
nir_variable *layer_id_out =
nir_variable_create(shader, nir_var_shader_out,
glsl_int_type(), "layer ID");
layer_id_out->data.location = VARYING_SLOT_LAYER;
nir_store_var(b, layer_id_out, view_index, 0x1);
nir_metadata_preserve(entrypoint, nir_metadata_block_index |
nir_metadata_dominance);
return true;
}
bool
anv_check_for_primitive_replication(struct anv_device *device,
VkShaderStageFlags stages,
nir_shader **shaders,
uint32_t view_mask)
{
assert(device->info->ver >= 12);
static int primitive_replication_max_views = -1;
if (primitive_replication_max_views < 0) {
/* TODO: Figure out why we are not getting same benefits for larger than
* 2 views. For now use Primitive Replication just for the 2-view case
* by default.
*/
const unsigned default_max_views = 2;
primitive_replication_max_views =
MIN2(MAX_VIEWS_FOR_PRIMITIVE_REPLICATION,
env_var_as_unsigned("ANV_PRIMITIVE_REPLICATION_MAX_VIEWS",
default_max_views));
}
/* TODO: We should be able to support replication at 'geometry' stages
* later than Vertex. In that case only the last stage can refer to
* gl_ViewIndex.
*/
if (stages & ~(VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT))
return false;
int view_count = util_bitcount(view_mask);
if (view_count == 1 || view_count > primitive_replication_max_views)
return false;
return nir_can_lower_multiview(shaders[MESA_SHADER_VERTEX]);
}

View File

@ -0,0 +1,124 @@
/*
* Copyright © 2020 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_nir.h"
#include "nir_builder.h"
static bool
lower_ubo_load_instr(nir_builder *b, nir_instr *instr, UNUSED void *_data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
if (load->intrinsic != nir_intrinsic_load_global_constant_offset &&
load->intrinsic != nir_intrinsic_load_global_constant_bounded)
return false;
b->cursor = nir_before_instr(instr);
nir_ssa_def *base_addr = load->src[0].ssa;
nir_ssa_def *bound = NULL;
if (load->intrinsic == nir_intrinsic_load_global_constant_bounded)
bound = load->src[2].ssa;
unsigned bit_size = load->dest.ssa.bit_size;
assert(bit_size >= 8 && bit_size % 8 == 0);
unsigned byte_size = bit_size / 8;
nir_ssa_def *val;
if (nir_src_is_const(load->src[1])) {
uint32_t offset = nir_src_as_uint(load->src[1]);
/* Things should be component-aligned. */
assert(offset % byte_size == 0);
assert(ANV_UBO_ALIGNMENT == 64);
unsigned suboffset = offset % 64;
uint64_t aligned_offset = offset - suboffset;
/* Load two just in case we go over a 64B boundary */
nir_ssa_def *data[2];
for (unsigned i = 0; i < 2; i++) {
nir_ssa_def *pred;
if (bound) {
pred = nir_ilt(b, nir_imm_int(b, aligned_offset + i * 64 + 63),
bound);
} else {
pred = nir_imm_true(b);
}
nir_ssa_def *addr = nir_iadd_imm(b, base_addr,
aligned_offset + i * 64);
data[i] = nir_load_global_const_block_intel(b, 16, addr, pred);
}
val = nir_extract_bits(b, data, 2, suboffset * 8,
load->num_components, bit_size);
} else {
nir_ssa_def *offset = load->src[1].ssa;
nir_ssa_def *addr = nir_iadd(b, base_addr, nir_u2u64(b, offset));
if (bound) {
nir_ssa_def *zero = nir_imm_zero(b, load->num_components, bit_size);
unsigned load_size = byte_size * load->num_components;
nir_ssa_def *in_bounds =
nir_ilt(b, nir_iadd_imm(b, offset, load_size - 1), bound);
nir_push_if(b, in_bounds);
nir_ssa_def *load_val =
nir_build_load_global_constant(b, load->dest.ssa.num_components,
load->dest.ssa.bit_size, addr,
.access = nir_intrinsic_access(load),
.align_mul = nir_intrinsic_align_mul(load),
.align_offset = nir_intrinsic_align_offset(load));
nir_pop_if(b, NULL);
val = nir_if_phi(b, load_val, zero);
} else {
val = nir_build_load_global_constant(b, load->dest.ssa.num_components,
load->dest.ssa.bit_size, addr,
.access = nir_intrinsic_access(load),
.align_mul = nir_intrinsic_align_mul(load),
.align_offset = nir_intrinsic_align_offset(load));
}
}
nir_ssa_def_rewrite_uses(&load->dest.ssa, val);
nir_instr_remove(&load->instr);
return true;
}
bool
anv_nir_lower_ubo_loads(nir_shader *shader)
{
return nir_shader_instructions_pass(shader, lower_ubo_load_instr,
nir_metadata_none,
NULL);
}

View File

@ -0,0 +1,349 @@
/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_nir.h"
#include "anv_private.h"
#include "nir/nir.h"
#include "nir/nir_builder.h"
#include "nir/nir_vulkan.h"
struct ycbcr_state {
nir_builder *builder;
nir_ssa_def *image_size;
nir_tex_instr *origin_tex;
nir_deref_instr *tex_deref;
struct anv_ycbcr_conversion *conversion;
};
/* TODO: we should probably replace this with a push constant/uniform. */
static nir_ssa_def *
get_texture_size(struct ycbcr_state *state, nir_deref_instr *texture)
{
if (state->image_size)
return state->image_size;
nir_builder *b = state->builder;
const struct glsl_type *type = texture->type;
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1);
tex->op = nir_texop_txs;
tex->sampler_dim = glsl_get_sampler_dim(type);
tex->is_array = glsl_sampler_type_is_array(type);
tex->is_shadow = glsl_sampler_type_is_shadow(type);
tex->dest_type = nir_type_int32;
tex->src[0].src_type = nir_tex_src_texture_deref;
tex->src[0].src = nir_src_for_ssa(&texture->dest.ssa);
nir_ssa_dest_init(&tex->instr, &tex->dest,
nir_tex_instr_dest_size(tex), 32, NULL);
nir_builder_instr_insert(b, &tex->instr);
state->image_size = nir_i2f32(b, &tex->dest.ssa);
return state->image_size;
}
static nir_ssa_def *
implicit_downsampled_coord(nir_builder *b,
nir_ssa_def *value,
nir_ssa_def *max_value,
int div_scale)
{
return nir_fadd(b,
value,
nir_fdiv(b,
nir_imm_float(b, 1.0f),
nir_fmul(b,
nir_imm_float(b, div_scale),
max_value)));
}
static nir_ssa_def *
implicit_downsampled_coords(struct ycbcr_state *state,
nir_ssa_def *old_coords,
const struct anv_format_plane *plane_format)
{
nir_builder *b = state->builder;
struct anv_ycbcr_conversion *conversion = state->conversion;
nir_ssa_def *image_size = get_texture_size(state, state->tex_deref);
nir_ssa_def *comp[4] = { NULL, };
int c;
for (c = 0; c < ARRAY_SIZE(conversion->chroma_offsets); c++) {
if (plane_format->denominator_scales[c] > 1 &&
conversion->chroma_offsets[c] == VK_CHROMA_LOCATION_COSITED_EVEN) {
comp[c] = implicit_downsampled_coord(b,
nir_channel(b, old_coords, c),
nir_channel(b, image_size, c),
plane_format->denominator_scales[c]);
} else {
comp[c] = nir_channel(b, old_coords, c);
}
}
/* Leave other coordinates untouched */
for (; c < old_coords->num_components; c++)
comp[c] = nir_channel(b, old_coords, c);
return nir_vec(b, comp, old_coords->num_components);
}
static nir_ssa_def *
create_plane_tex_instr_implicit(struct ycbcr_state *state,
uint32_t plane)
{
nir_builder *b = state->builder;
struct anv_ycbcr_conversion *conversion = state->conversion;
const struct anv_format_plane *plane_format =
&conversion->format->planes[plane];
nir_tex_instr *old_tex = state->origin_tex;
nir_tex_instr *tex = nir_tex_instr_create(b->shader, old_tex->num_srcs + 1);
for (uint32_t i = 0; i < old_tex->num_srcs; i++) {
tex->src[i].src_type = old_tex->src[i].src_type;
switch (old_tex->src[i].src_type) {
case nir_tex_src_coord:
if (plane_format->has_chroma && conversion->chroma_reconstruction) {
assert(old_tex->src[i].src.is_ssa);
tex->src[i].src =
nir_src_for_ssa(implicit_downsampled_coords(state,
old_tex->src[i].src.ssa,
plane_format));
break;
}
FALLTHROUGH;
default:
nir_src_copy(&tex->src[i].src, &old_tex->src[i].src, &tex->instr);
break;
}
}
tex->src[tex->num_srcs - 1].src = nir_src_for_ssa(nir_imm_int(b, plane));
tex->src[tex->num_srcs - 1].src_type = nir_tex_src_plane;
tex->sampler_dim = old_tex->sampler_dim;
tex->dest_type = old_tex->dest_type;
tex->op = old_tex->op;
tex->coord_components = old_tex->coord_components;
tex->is_new_style_shadow = old_tex->is_new_style_shadow;
tex->component = old_tex->component;
tex->texture_index = old_tex->texture_index;
tex->sampler_index = old_tex->sampler_index;
tex->is_array = old_tex->is_array;
nir_ssa_dest_init(&tex->instr, &tex->dest,
old_tex->dest.ssa.num_components,
nir_dest_bit_size(old_tex->dest), NULL);
nir_builder_instr_insert(b, &tex->instr);
return &tex->dest.ssa;
}
static unsigned
channel_to_component(enum isl_channel_select channel)
{
switch (channel) {
case ISL_CHANNEL_SELECT_RED:
return 0;
case ISL_CHANNEL_SELECT_GREEN:
return 1;
case ISL_CHANNEL_SELECT_BLUE:
return 2;
case ISL_CHANNEL_SELECT_ALPHA:
return 3;
default:
unreachable("invalid channel");
return 0;
}
}
static enum isl_channel_select
swizzle_channel(struct isl_swizzle swizzle, unsigned channel)
{
switch (channel) {
case 0:
return swizzle.r;
case 1:
return swizzle.g;
case 2:
return swizzle.b;
case 3:
return swizzle.a;
default:
unreachable("invalid channel");
return 0;
}
}
static bool
anv_nir_lower_ycbcr_textures_instr(nir_builder *builder,
nir_instr *instr,
void *cb_data)
{
const struct anv_pipeline_layout *layout = cb_data;
if (instr->type != nir_instr_type_tex)
return false;
nir_tex_instr *tex = nir_instr_as_tex(instr);
int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
assert(deref_src_idx >= 0);
nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
nir_variable *var = nir_deref_instr_get_variable(deref);
const struct anv_descriptor_set_layout *set_layout =
layout->set[var->data.descriptor_set].layout;
const struct anv_descriptor_set_binding_layout *binding =
&set_layout->binding[var->data.binding];
/* For the following instructions, we don't apply any change and let the
* instruction apply to the first plane.
*/
if (tex->op == nir_texop_txs ||
tex->op == nir_texop_query_levels ||
tex->op == nir_texop_lod)
return false;
if (binding->immutable_samplers == NULL)
return false;
assert(tex->texture_index == 0);
unsigned array_index = 0;
if (deref->deref_type != nir_deref_type_var) {
assert(deref->deref_type == nir_deref_type_array);
if (!nir_src_is_const(deref->arr.index))
return false;
array_index = nir_src_as_uint(deref->arr.index);
array_index = MIN2(array_index, binding->array_size - 1);
}
const struct anv_sampler *sampler = binding->immutable_samplers[array_index];
if (sampler->conversion == NULL)
return false;
struct ycbcr_state state = {
.builder = builder,
.origin_tex = tex,
.tex_deref = deref,
.conversion = sampler->conversion,
};
builder->cursor = nir_before_instr(&tex->instr);
const struct anv_format *format = state.conversion->format;
const struct isl_format_layout *y_isl_layout = NULL;
for (uint32_t p = 0; p < format->n_planes; p++) {
if (!format->planes[p].has_chroma)
y_isl_layout = isl_format_get_layout(format->planes[p].isl_format);
}
assert(y_isl_layout != NULL);
uint8_t y_bpc = y_isl_layout->channels_array[0].bits;
/* |ycbcr_comp| holds components in the order : Cr-Y-Cb */
nir_ssa_def *zero = nir_imm_float(builder, 0.0f);
nir_ssa_def *one = nir_imm_float(builder, 1.0f);
/* Use extra 2 channels for following swizzle */
nir_ssa_def *ycbcr_comp[5] = { zero, zero, zero, one, zero };
uint8_t ycbcr_bpcs[5];
memset(ycbcr_bpcs, y_bpc, sizeof(ycbcr_bpcs));
/* Go through all the planes and gather the samples into a |ycbcr_comp|
* while applying a swizzle required by the spec:
*
* R, G, B should respectively map to Cr, Y, Cb
*/
for (uint32_t p = 0; p < format->n_planes; p++) {
const struct anv_format_plane *plane_format = &format->planes[p];
nir_ssa_def *plane_sample = create_plane_tex_instr_implicit(&state, p);
for (uint32_t pc = 0; pc < 4; pc++) {
enum isl_channel_select ycbcr_swizzle =
swizzle_channel(plane_format->ycbcr_swizzle, pc);
if (ycbcr_swizzle == ISL_CHANNEL_SELECT_ZERO)
continue;
unsigned ycbcr_component = channel_to_component(ycbcr_swizzle);
ycbcr_comp[ycbcr_component] = nir_channel(builder, plane_sample, pc);
/* Also compute the number of bits for each component. */
const struct isl_format_layout *isl_layout =
isl_format_get_layout(plane_format->isl_format);
ycbcr_bpcs[ycbcr_component] = isl_layout->channels_array[pc].bits;
}
}
/* Now remaps components to the order specified by the conversion. */
nir_ssa_def *swizzled_comp[4] = { NULL, };
uint32_t swizzled_bpcs[4] = { 0, };
for (uint32_t i = 0; i < ARRAY_SIZE(state.conversion->mapping); i++) {
/* Maps to components in |ycbcr_comp| */
static const uint32_t swizzle_mapping[] = {
[VK_COMPONENT_SWIZZLE_ZERO] = 4,
[VK_COMPONENT_SWIZZLE_ONE] = 3,
[VK_COMPONENT_SWIZZLE_R] = 0,
[VK_COMPONENT_SWIZZLE_G] = 1,
[VK_COMPONENT_SWIZZLE_B] = 2,
[VK_COMPONENT_SWIZZLE_A] = 3,
};
const VkComponentSwizzle m = state.conversion->mapping[i];
if (m == VK_COMPONENT_SWIZZLE_IDENTITY) {
swizzled_comp[i] = ycbcr_comp[i];
swizzled_bpcs[i] = ycbcr_bpcs[i];
} else {
swizzled_comp[i] = ycbcr_comp[swizzle_mapping[m]];
swizzled_bpcs[i] = ycbcr_bpcs[swizzle_mapping[m]];
}
}
nir_ssa_def *result = nir_vec(builder, swizzled_comp, 4);
if (state.conversion->ycbcr_model != VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY) {
result = nir_convert_ycbcr_to_rgb(builder,
state.conversion->ycbcr_model,
state.conversion->ycbcr_range,
result,
swizzled_bpcs);
}
nir_ssa_def_rewrite_uses(&tex->dest.ssa, result);
nir_instr_remove(&tex->instr);
return true;
}
bool
anv_nir_lower_ycbcr_textures(nir_shader *shader,
const struct anv_pipeline_layout *layout)
{
return nir_shader_instructions_pass(shader,
anv_nir_lower_ycbcr_textures_instr,
nir_metadata_block_index |
nir_metadata_dominance,
(void *)layout);
}

View File

@ -0,0 +1,488 @@
/*
* Copyright © 2018 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include "anv_private.h"
#include "vk_util.h"
#include "perf/intel_perf.h"
#include "perf/intel_perf_mdapi.h"
#include "util/mesa-sha1.h"
void
anv_physical_device_init_perf(struct anv_physical_device *device, int fd)
{
const struct intel_device_info *devinfo = &device->info;
device->perf = NULL;
/* We need self modifying batches. The i915 parser prevents it on
* Gfx7.5 :( maybe one day.
*/
if (devinfo->ver < 8)
return;
struct intel_perf_config *perf = intel_perf_new(NULL);
intel_perf_init_metrics(perf, &device->info, fd,
false /* pipeline statistics */,
true /* register snapshots */);
if (!perf->n_queries)
goto err;
/* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in
* perf revision 2.
*/
if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
if (!intel_perf_has_hold_preemption(perf))
goto err;
}
device->perf = perf;
/* Compute the number of commands we need to implement a performance
* query.
*/
const struct intel_perf_query_field_layout *layout = &perf->query_layout;
device->n_perf_query_commands = 0;
for (uint32_t f = 0; f < layout->n_fields; f++) {
struct intel_perf_query_field *field = &layout->fields[f];
switch (field->type) {
case INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC:
device->n_perf_query_commands++;
break;
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT:
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A:
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
device->n_perf_query_commands += field->size / 4;
break;
default:
unreachable("Unhandled register type");
}
}
device->n_perf_query_commands *= 2; /* Begin & End */
device->n_perf_query_commands += 1; /* availability */
return;
err:
ralloc_free(perf);
}
void
anv_device_perf_init(struct anv_device *device)
{
device->perf_fd = -1;
}
static int
anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
{
uint64_t properties[DRM_I915_PERF_PROP_MAX * 2];
struct drm_i915_perf_open_param param;
int p = 0, stream_fd;
properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA;
properties[p++] = true;
properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET;
properties[p++] = metric_id;
properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
properties[p++] = device->info->ver >= 8 ?
I915_OA_FORMAT_A32u40_A4u32_B8_C8 :
I915_OA_FORMAT_A45_B8_C8;
properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
properties[p++] = 31; /* slowest sampling period */
properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE;
properties[p++] = device->context_id;
properties[p++] = DRM_I915_PERF_PROP_HOLD_PREEMPTION;
properties[p++] = true;
/* If global SSEU is available, pin it to the default. This will ensure on
* Gfx11 for instance we use the full EU array. Initially when perf was
* enabled we would use only half on Gfx11 because of functional
* requirements.
*
* Temporary disable this option on Gfx12.5+, kernel doesn't appear to
* support it.
*/
if (intel_perf_has_global_sseu(device->physical->perf) &&
device->info->verx10 < 125) {
properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU;
properties[p++] = (uintptr_t) &device->physical->perf->sseu;
}
memset(&param, 0, sizeof(param));
param.flags = 0;
param.flags |= I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK;
param.properties_ptr = (uintptr_t)properties;
param.num_properties = p / 2;
stream_fd = intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_OPEN, &param);
return stream_fd;
}
/* VK_INTEL_performance_query */
VkResult anv_InitializePerformanceApiINTEL(
VkDevice _device,
const VkInitializePerformanceApiInfoINTEL* pInitializeInfo)
{
ANV_FROM_HANDLE(anv_device, device, _device);
if (!device->physical->perf)
return VK_ERROR_EXTENSION_NOT_PRESENT;
/* Not much to do here */
return VK_SUCCESS;
}
VkResult anv_GetPerformanceParameterINTEL(
VkDevice _device,
VkPerformanceParameterTypeINTEL parameter,
VkPerformanceValueINTEL* pValue)
{
ANV_FROM_HANDLE(anv_device, device, _device);
if (!device->physical->perf)
return VK_ERROR_EXTENSION_NOT_PRESENT;
VkResult result = VK_SUCCESS;
switch (parameter) {
case VK_PERFORMANCE_PARAMETER_TYPE_HW_COUNTERS_SUPPORTED_INTEL:
pValue->type = VK_PERFORMANCE_VALUE_TYPE_BOOL_INTEL;
pValue->data.valueBool = VK_TRUE;
break;
case VK_PERFORMANCE_PARAMETER_TYPE_STREAM_MARKER_VALID_BITS_INTEL:
pValue->type = VK_PERFORMANCE_VALUE_TYPE_UINT32_INTEL;
pValue->data.value32 = 25;
break;
default:
result = VK_ERROR_FEATURE_NOT_PRESENT;
break;
}
return result;
}
VkResult anv_CmdSetPerformanceMarkerINTEL(
VkCommandBuffer commandBuffer,
const VkPerformanceMarkerInfoINTEL* pMarkerInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
cmd_buffer->intel_perf_marker = pMarkerInfo->marker;
return VK_SUCCESS;
}
VkResult anv_AcquirePerformanceConfigurationINTEL(
VkDevice _device,
const VkPerformanceConfigurationAcquireInfoINTEL* pAcquireInfo,
VkPerformanceConfigurationINTEL* pConfiguration)
{
ANV_FROM_HANDLE(anv_device, device, _device);
struct anv_performance_configuration_intel *config;
config = vk_object_alloc(&device->vk, NULL, sizeof(*config),
VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL);
if (!config)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
config->register_config =
intel_perf_load_configuration(device->physical->perf, device->fd,
INTEL_PERF_QUERY_GUID_MDAPI);
if (!config->register_config) {
vk_object_free(&device->vk, NULL, config);
return VK_INCOMPLETE;
}
int ret =
intel_perf_store_configuration(device->physical->perf, device->fd,
config->register_config, NULL /* guid */);
if (ret < 0) {
ralloc_free(config->register_config);
vk_object_free(&device->vk, NULL, config);
return VK_INCOMPLETE;
}
config->config_id = ret;
}
*pConfiguration = anv_performance_configuration_intel_to_handle(config);
return VK_SUCCESS;
}
VkResult anv_ReleasePerformanceConfigurationINTEL(
VkDevice _device,
VkPerformanceConfigurationINTEL _configuration)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
if (!INTEL_DEBUG(DEBUG_NO_OACONFIG))
intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config->config_id);
ralloc_free(config->register_config);
vk_object_free(&device->vk, NULL, config);
return VK_SUCCESS;
}
VkResult anv_QueueSetPerformanceConfigurationINTEL(
VkQueue _queue,
VkPerformanceConfigurationINTEL _configuration)
{
ANV_FROM_HANDLE(anv_queue, queue, _queue);
ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
struct anv_device *device = queue->device;
if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
if (device->perf_fd < 0) {
device->perf_fd = anv_device_perf_open(device, config->config_id);
if (device->perf_fd < 0)
return VK_ERROR_INITIALIZATION_FAILED;
} else {
int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
(void *)(uintptr_t) config->config_id);
if (ret < 0)
return vk_device_set_lost(&device->vk, "i915-perf config failed: %m");
}
}
return VK_SUCCESS;
}
void anv_UninitializePerformanceApiINTEL(
VkDevice _device)
{
ANV_FROM_HANDLE(anv_device, device, _device);
if (device->perf_fd >= 0) {
close(device->perf_fd);
device->perf_fd = -1;
}
}
/* VK_KHR_performance_query */
static const VkPerformanceCounterUnitKHR
intel_perf_counter_unit_to_vk_unit[] = {
[INTEL_PERF_COUNTER_UNITS_BYTES] = VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR,
[INTEL_PERF_COUNTER_UNITS_HZ] = VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR,
[INTEL_PERF_COUNTER_UNITS_NS] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR,
[INTEL_PERF_COUNTER_UNITS_US] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR, /* todo */
[INTEL_PERF_COUNTER_UNITS_PIXELS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
[INTEL_PERF_COUNTER_UNITS_TEXELS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
[INTEL_PERF_COUNTER_UNITS_THREADS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
[INTEL_PERF_COUNTER_UNITS_PERCENT] = VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR,
[INTEL_PERF_COUNTER_UNITS_MESSAGES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
[INTEL_PERF_COUNTER_UNITS_NUMBER] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
[INTEL_PERF_COUNTER_UNITS_CYCLES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
[INTEL_PERF_COUNTER_UNITS_EVENTS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
[INTEL_PERF_COUNTER_UNITS_UTILIZATION] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
[INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
[INTEL_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
[INTEL_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
[INTEL_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
};
static const VkPerformanceCounterStorageKHR
intel_perf_counter_data_type_to_vk_storage[] = {
[INTEL_PERF_COUNTER_DATA_TYPE_BOOL32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
[INTEL_PERF_COUNTER_DATA_TYPE_UINT32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
[INTEL_PERF_COUNTER_DATA_TYPE_UINT64] = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR,
[INTEL_PERF_COUNTER_DATA_TYPE_FLOAT] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR,
[INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR,
};
VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
VkPhysicalDevice physicalDevice,
uint32_t queueFamilyIndex,
uint32_t* pCounterCount,
VkPerformanceCounterKHR* pCounters,
VkPerformanceCounterDescriptionKHR* pCounterDescriptions)
{
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
struct intel_perf_config *perf = pdevice->perf;
uint32_t desc_count = *pCounterCount;
VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterKHR, out, pCounters, pCounterCount);
VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterDescriptionKHR, out_desc,
pCounterDescriptions, &desc_count);
/* We cannot support performance queries on anything other than RCS,
* because the MI_REPORT_PERF_COUNT command is not available on other
* engines.
*/
struct anv_queue_family *queue_family =
&pdevice->queue.families[queueFamilyIndex];
if (queue_family->engine_class != I915_ENGINE_CLASS_RENDER)
return vk_outarray_status(&out);
for (int c = 0; c < (perf ? perf->n_counters : 0); c++) {
const struct intel_perf_query_counter *intel_counter = perf->counter_infos[c].counter;
vk_outarray_append_typed(VkPerformanceCounterKHR, &out, counter) {
counter->unit = intel_perf_counter_unit_to_vk_unit[intel_counter->units];
counter->scope = VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_KHR;
counter->storage = intel_perf_counter_data_type_to_vk_storage[intel_counter->data_type];
unsigned char sha1_result[20];
_mesa_sha1_compute(intel_counter->symbol_name,
strlen(intel_counter->symbol_name),
sha1_result);
memcpy(counter->uuid, sha1_result, sizeof(counter->uuid));
}
vk_outarray_append_typed(VkPerformanceCounterDescriptionKHR, &out_desc, desc) {
desc->flags = 0; /* None so far. */
snprintf(desc->name, sizeof(desc->name), "%s", intel_counter->name);
snprintf(desc->category, sizeof(desc->category), "%s", intel_counter->category);
snprintf(desc->description, sizeof(desc->description), "%s", intel_counter->desc);
}
}
return vk_outarray_status(&out);
}
void anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
VkPhysicalDevice physicalDevice,
const VkQueryPoolPerformanceCreateInfoKHR* pPerformanceQueryCreateInfo,
uint32_t* pNumPasses)
{
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
struct intel_perf_config *perf = pdevice->perf;
if (!perf) {
*pNumPasses = 0;
return;
}
*pNumPasses = intel_perf_get_n_passes(perf,
pPerformanceQueryCreateInfo->pCounterIndices,
pPerformanceQueryCreateInfo->counterIndexCount,
NULL);
}
VkResult anv_AcquireProfilingLockKHR(
VkDevice _device,
const VkAcquireProfilingLockInfoKHR* pInfo)
{
ANV_FROM_HANDLE(anv_device, device, _device);
struct intel_perf_config *perf = device->physical->perf;
struct intel_perf_query_info *first_metric_set = &perf->queries[0];
int fd = -1;
assert(device->perf_fd == -1);
if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
fd = anv_device_perf_open(device, first_metric_set->oa_metrics_set_id);
if (fd < 0)
return VK_TIMEOUT;
}
device->perf_fd = fd;
return VK_SUCCESS;
}
void anv_ReleaseProfilingLockKHR(
VkDevice _device)
{
ANV_FROM_HANDLE(anv_device, device, _device);
if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
assert(device->perf_fd >= 0);
close(device->perf_fd);
}
device->perf_fd = -1;
}
void
anv_perf_write_pass_results(struct intel_perf_config *perf,
struct anv_query_pool *pool, uint32_t pass,
const struct intel_perf_query_result *accumulated_results,
union VkPerformanceCounterResultKHR *results)
{
for (uint32_t c = 0; c < pool->n_counters; c++) {
const struct intel_perf_counter_pass *counter_pass = &pool->counter_pass[c];
if (counter_pass->pass != pass)
continue;
switch (pool->pass_query[pass]->kind) {
case INTEL_PERF_QUERY_TYPE_PIPELINE: {
assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);
uint32_t accu_offset = counter_pass->counter->offset / sizeof(uint64_t);
results[c].uint64 = accumulated_results->accumulator[accu_offset];
break;
}
case INTEL_PERF_QUERY_TYPE_OA:
case INTEL_PERF_QUERY_TYPE_RAW:
switch (counter_pass->counter->data_type) {
case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
results[c].uint64 =
counter_pass->counter->oa_counter_read_uint64(perf,
counter_pass->query,
accumulated_results);
break;
case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
results[c].float32 =
counter_pass->counter->oa_counter_read_float(perf,
counter_pass->query,
accumulated_results);
break;
default:
/* So far we aren't using uint32, double or bool32... */
unreachable("unexpected counter data type");
}
break;
default:
unreachable("invalid query type");
}
/* The Vulkan extension only has nanoseconds as a unit */
if (counter_pass->counter->units == INTEL_PERF_COUNTER_UNITS_US) {
assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);
results[c].uint64 *= 1000;
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,380 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "util/blob.h"
#include "util/hash_table.h"
#include "util/debug.h"
#include "util/disk_cache.h"
#include "util/mesa-sha1.h"
#include "nir/nir_serialize.h"
#include "anv_private.h"
#include "nir/nir_xfb_info.h"
#include "vulkan/util/vk_util.h"
static bool
anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,
struct blob *blob);
struct vk_pipeline_cache_object *
anv_shader_bin_deserialize(struct vk_device *device,
const void *key_data, size_t key_size,
struct blob_reader *blob);
static void
anv_shader_bin_destroy(struct vk_pipeline_cache_object *object)
{
struct anv_device *device =
container_of(object->device, struct anv_device, vk);
struct anv_shader_bin *shader =
container_of(object, struct anv_shader_bin, base);
anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
vk_pipeline_cache_object_finish(&shader->base);
vk_free(&device->vk.alloc, shader);
}
static const struct vk_pipeline_cache_object_ops anv_shader_bin_ops = {
.serialize = anv_shader_bin_serialize,
.deserialize = anv_shader_bin_deserialize,
.destroy = anv_shader_bin_destroy,
};
const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2] = {
&anv_shader_bin_ops,
NULL
};
struct anv_shader_bin *
anv_shader_bin_create(struct anv_device *device,
gl_shader_stage stage,
const void *key_data, uint32_t key_size,
const void *kernel_data, uint32_t kernel_size,
const struct brw_stage_prog_data *prog_data_in,
uint32_t prog_data_size,
const struct brw_compile_stats *stats, uint32_t num_stats,
const nir_xfb_info *xfb_info_in,
const struct anv_pipeline_bind_map *bind_map)
{
VK_MULTIALLOC(ma);
VK_MULTIALLOC_DECL(&ma, struct anv_shader_bin, shader, 1);
VK_MULTIALLOC_DECL_SIZE(&ma, void, obj_key_data, key_size);
VK_MULTIALLOC_DECL_SIZE(&ma, struct brw_stage_prog_data, prog_data,
prog_data_size);
VK_MULTIALLOC_DECL(&ma, struct brw_shader_reloc, prog_data_relocs,
prog_data_in->num_relocs);
VK_MULTIALLOC_DECL(&ma, uint32_t, prog_data_param, prog_data_in->nr_params);
VK_MULTIALLOC_DECL_SIZE(&ma, nir_xfb_info, xfb_info,
xfb_info_in == NULL ? 0 :
nir_xfb_info_size(xfb_info_in->output_count));
VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, surface_to_descriptor,
bind_map->surface_count);
VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, sampler_to_descriptor,
bind_map->sampler_count);
if (!vk_multialloc_alloc(&ma, &device->vk.alloc,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
return NULL;
memcpy(obj_key_data, key_data, key_size);
vk_pipeline_cache_object_init(&device->vk, &shader->base,
&anv_shader_bin_ops, obj_key_data, key_size);
shader->stage = stage;
shader->kernel =
anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
memcpy(shader->kernel.map, kernel_data, kernel_size);
shader->kernel_size = kernel_size;
uint64_t shader_data_addr = INSTRUCTION_STATE_POOL_MIN_ADDRESS +
shader->kernel.offset +
prog_data_in->const_data_offset;
int rv_count = 0;
struct brw_shader_reloc_value reloc_values[5];
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW,
.value = shader_data_addr,
};
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH,
.value = shader_data_addr >> 32,
};
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_SHADER_START_OFFSET,
.value = shader->kernel.offset,
};
if (brw_shader_stage_is_bindless(stage)) {
const struct brw_bs_prog_data *bs_prog_data =
brw_bs_prog_data_const(prog_data_in);
uint64_t resume_sbt_addr = INSTRUCTION_STATE_POOL_MIN_ADDRESS +
shader->kernel.offset +
bs_prog_data->resume_sbt_offset;
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW,
.value = resume_sbt_addr,
};
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH,
.value = resume_sbt_addr >> 32,
};
}
brw_write_shader_relocs(&device->physical->compiler->isa,
shader->kernel.map, prog_data_in,
reloc_values, rv_count);
memcpy(prog_data, prog_data_in, prog_data_size);
typed_memcpy(prog_data_relocs, prog_data_in->relocs,
prog_data_in->num_relocs);
prog_data->relocs = prog_data_relocs;
memset(prog_data_param, 0,
prog_data->nr_params * sizeof(*prog_data_param));
prog_data->param = prog_data_param;
shader->prog_data = prog_data;
shader->prog_data_size = prog_data_size;
assert(num_stats <= ARRAY_SIZE(shader->stats));
typed_memcpy(shader->stats, stats, num_stats);
shader->num_stats = num_stats;
if (xfb_info_in) {
*xfb_info = *xfb_info_in;
typed_memcpy(xfb_info->outputs, xfb_info_in->outputs,
xfb_info_in->output_count);
shader->xfb_info = xfb_info;
} else {
shader->xfb_info = NULL;
}
shader->bind_map = *bind_map;
typed_memcpy(surface_to_descriptor, bind_map->surface_to_descriptor,
bind_map->surface_count);
shader->bind_map.surface_to_descriptor = surface_to_descriptor;
typed_memcpy(sampler_to_descriptor, bind_map->sampler_to_descriptor,
bind_map->sampler_count);
shader->bind_map.sampler_to_descriptor = sampler_to_descriptor;
return shader;
}
static bool
anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,
struct blob *blob)
{
struct anv_shader_bin *shader =
container_of(object, struct anv_shader_bin, base);
blob_write_uint32(blob, shader->stage);
blob_write_uint32(blob, shader->kernel_size);
blob_write_bytes(blob, shader->kernel.map, shader->kernel_size);
blob_write_uint32(blob, shader->prog_data_size);
blob_write_bytes(blob, shader->prog_data, shader->prog_data_size);
blob_write_bytes(blob, shader->prog_data->relocs,
shader->prog_data->num_relocs *
sizeof(shader->prog_data->relocs[0]));
blob_write_uint32(blob, shader->num_stats);
blob_write_bytes(blob, shader->stats,
shader->num_stats * sizeof(shader->stats[0]));
if (shader->xfb_info) {
uint32_t xfb_info_size =
nir_xfb_info_size(shader->xfb_info->output_count);
blob_write_uint32(blob, xfb_info_size);
blob_write_bytes(blob, shader->xfb_info, xfb_info_size);
} else {
blob_write_uint32(blob, 0);
}
blob_write_bytes(blob, shader->bind_map.surface_sha1,
sizeof(shader->bind_map.surface_sha1));
blob_write_bytes(blob, shader->bind_map.sampler_sha1,
sizeof(shader->bind_map.sampler_sha1));
blob_write_bytes(blob, shader->bind_map.push_sha1,
sizeof(shader->bind_map.push_sha1));
blob_write_uint32(blob, shader->bind_map.surface_count);
blob_write_uint32(blob, shader->bind_map.sampler_count);
blob_write_bytes(blob, shader->bind_map.surface_to_descriptor,
shader->bind_map.surface_count *
sizeof(*shader->bind_map.surface_to_descriptor));
blob_write_bytes(blob, shader->bind_map.sampler_to_descriptor,
shader->bind_map.sampler_count *
sizeof(*shader->bind_map.sampler_to_descriptor));
blob_write_bytes(blob, shader->bind_map.push_ranges,
sizeof(shader->bind_map.push_ranges));
return !blob->out_of_memory;
}
struct vk_pipeline_cache_object *
anv_shader_bin_deserialize(struct vk_device *vk_device,
const void *key_data, size_t key_size,
struct blob_reader *blob)
{
struct anv_device *device =
container_of(vk_device, struct anv_device, vk);
gl_shader_stage stage = blob_read_uint32(blob);
uint32_t kernel_size = blob_read_uint32(blob);
const void *kernel_data = blob_read_bytes(blob, kernel_size);
uint32_t prog_data_size = blob_read_uint32(blob);
const void *prog_data_bytes = blob_read_bytes(blob, prog_data_size);
if (blob->overrun)
return NULL;
union brw_any_prog_data prog_data;
memcpy(&prog_data, prog_data_bytes,
MIN2(sizeof(prog_data), prog_data_size));
prog_data.base.relocs =
blob_read_bytes(blob, prog_data.base.num_relocs *
sizeof(prog_data.base.relocs[0]));
uint32_t num_stats = blob_read_uint32(blob);
const struct brw_compile_stats *stats =
blob_read_bytes(blob, num_stats * sizeof(stats[0]));
const nir_xfb_info *xfb_info = NULL;
uint32_t xfb_size = blob_read_uint32(blob);
if (xfb_size)
xfb_info = blob_read_bytes(blob, xfb_size);
struct anv_pipeline_bind_map bind_map;
blob_copy_bytes(blob, bind_map.surface_sha1, sizeof(bind_map.surface_sha1));
blob_copy_bytes(blob, bind_map.sampler_sha1, sizeof(bind_map.sampler_sha1));
blob_copy_bytes(blob, bind_map.push_sha1, sizeof(bind_map.push_sha1));
bind_map.surface_count = blob_read_uint32(blob);
bind_map.sampler_count = blob_read_uint32(blob);
bind_map.surface_to_descriptor = (void *)
blob_read_bytes(blob, bind_map.surface_count *
sizeof(*bind_map.surface_to_descriptor));
bind_map.sampler_to_descriptor = (void *)
blob_read_bytes(blob, bind_map.sampler_count *
sizeof(*bind_map.sampler_to_descriptor));
blob_copy_bytes(blob, bind_map.push_ranges, sizeof(bind_map.push_ranges));
if (blob->overrun)
return NULL;
struct anv_shader_bin *shader =
anv_shader_bin_create(device, stage,
key_data, key_size,
kernel_data, kernel_size,
&prog_data.base, prog_data_size,
stats, num_stats, xfb_info, &bind_map);
if (shader == NULL)
return NULL;
return &shader->base;
}
struct anv_shader_bin *
anv_device_search_for_kernel(struct anv_device *device,
struct vk_pipeline_cache *cache,
const void *key_data, uint32_t key_size,
bool *user_cache_hit)
{
/* Use the default pipeline cache if none is specified */
if (cache == NULL)
cache = device->default_pipeline_cache;
bool cache_hit = false;
struct vk_pipeline_cache_object *object =
vk_pipeline_cache_lookup_object(cache, key_data, key_size,
&anv_shader_bin_ops, &cache_hit);
if (user_cache_hit != NULL) {
*user_cache_hit = object != NULL && cache_hit &&
cache != device->default_pipeline_cache;
}
if (object == NULL)
return NULL;
return container_of(object, struct anv_shader_bin, base);
}
struct anv_shader_bin *
anv_device_upload_kernel(struct anv_device *device,
struct vk_pipeline_cache *cache,
gl_shader_stage stage,
const void *key_data, uint32_t key_size,
const void *kernel_data, uint32_t kernel_size,
const struct brw_stage_prog_data *prog_data,
uint32_t prog_data_size,
const struct brw_compile_stats *stats,
uint32_t num_stats,
const nir_xfb_info *xfb_info,
const struct anv_pipeline_bind_map *bind_map)
{
/* Use the default pipeline cache if none is specified */
if (cache == NULL)
cache = device->default_pipeline_cache;
struct anv_shader_bin *shader =
anv_shader_bin_create(device, stage,
key_data, key_size,
kernel_data, kernel_size,
prog_data, prog_data_size,
stats, num_stats,
xfb_info, bind_map);
if (shader == NULL)
return NULL;
struct vk_pipeline_cache_object *cached =
vk_pipeline_cache_add_object(cache, &shader->base);
return container_of(cached, struct anv_shader_bin, base);
}
#define SHA1_KEY_SIZE 20
struct nir_shader *
anv_device_search_for_nir(struct anv_device *device,
struct vk_pipeline_cache *cache,
const nir_shader_compiler_options *nir_options,
unsigned char sha1_key[SHA1_KEY_SIZE],
void *mem_ctx)
{
if (cache == NULL)
cache = device->default_pipeline_cache;
return vk_pipeline_cache_lookup_nir(cache, sha1_key, SHA1_KEY_SIZE,
nir_options, NULL, mem_ctx);
}
void
anv_device_upload_nir(struct anv_device *device,
struct vk_pipeline_cache *cache,
const struct nir_shader *nir,
unsigned char sha1_key[SHA1_KEY_SIZE])
{
if (cache == NULL)
cache = device->default_pipeline_cache;
vk_pipeline_cache_add_nir(cache, sha1_key, SHA1_KEY_SIZE, nir);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,75 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/**
* This file implements VkQueue
*/
#include "anv_private.h"
VkResult
anv_queue_init(struct anv_device *device, struct anv_queue *queue,
uint32_t exec_flags,
const VkDeviceQueueCreateInfo *pCreateInfo,
uint32_t index_in_family)
{
struct anv_physical_device *pdevice = device->physical;
VkResult result;
result = vk_queue_init(&queue->vk, &device->vk, pCreateInfo,
index_in_family);
if (result != VK_SUCCESS)
return result;
if (INTEL_DEBUG(DEBUG_SYNC)) {
result = vk_sync_create(&device->vk,
&device->physical->sync_syncobj_type,
0, 0, &queue->sync);
if (result != VK_SUCCESS) {
vk_queue_finish(&queue->vk);
return result;
}
}
queue->vk.driver_submit = anv_queue_submit;
queue->device = device;
assert(queue->vk.queue_family_index < pdevice->queue.family_count);
queue->family = &pdevice->queue.families[queue->vk.queue_family_index];
queue->index_in_family = index_in_family;
queue->exec_flags = exec_flags;
return VK_SUCCESS;
}
void
anv_queue_finish(struct anv_queue *queue)
{
if (queue->sync)
vk_sync_destroy(&queue->device->vk, queue->sync);
vk_queue_finish(&queue->vk);
}

View File

@ -0,0 +1,92 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
#include "anv_private.h"
#include "vk_enum_to_str.h"
void
__anv_perf_warn(struct anv_device *device,
const struct vk_object_base *object,
const char *file, int line, const char *format, ...)
{
va_list ap;
char buffer[256];
va_start(ap, format);
vsnprintf(buffer, sizeof(buffer), format, ap);
va_end(ap);
if (object) {
__vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT,
VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
VK_LOG_OBJS(object), file, line,
"PERF: %s", buffer);
} else {
__vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT,
VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
VK_LOG_NO_OBJS(device->physical->instance), file, line,
"PERF: %s", buffer);
}
}
void
anv_dump_pipe_bits(enum anv_pipe_bits bits)
{
if (bits & ANV_PIPE_DEPTH_CACHE_FLUSH_BIT)
fputs("+depth_flush ", stderr);
if (bits & ANV_PIPE_DATA_CACHE_FLUSH_BIT)
fputs("+dc_flush ", stderr);
if (bits & ANV_PIPE_HDC_PIPELINE_FLUSH_BIT)
fputs("+hdc_flush ", stderr);
if (bits & ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT)
fputs("+rt_flush ", stderr);
if (bits & ANV_PIPE_TILE_CACHE_FLUSH_BIT)
fputs("+tile_flush ", stderr);
if (bits & ANV_PIPE_STATE_CACHE_INVALIDATE_BIT)
fputs("+state_inval ", stderr);
if (bits & ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT)
fputs("+const_inval ", stderr);
if (bits & ANV_PIPE_VF_CACHE_INVALIDATE_BIT)
fputs("+vf_inval ", stderr);
if (bits & ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT)
fputs("+tex_inval ", stderr);
if (bits & ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT)
fputs("+ic_inval ", stderr);
if (bits & ANV_PIPE_STALL_AT_SCOREBOARD_BIT)
fputs("+pb_stall ", stderr);
if (bits & ANV_PIPE_PSS_STALL_SYNC_BIT)
fputs("+pss_stall ", stderr);
if (bits & ANV_PIPE_DEPTH_STALL_BIT)
fputs("+depth_stall ", stderr);
if (bits & ANV_PIPE_CS_STALL_BIT)
fputs("+cs_stall ", stderr);
if (bits & ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT)
fputs("+utdp_flush", stderr);
}

View File

@ -0,0 +1,346 @@
/*
* Copyright © 2021 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_private.h"
#include "perf/intel_perf.h"
static uint32_t
command_buffers_count_utraces(struct anv_device *device,
uint32_t cmd_buffer_count,
struct anv_cmd_buffer **cmd_buffers,
uint32_t *utrace_copies)
{
if (!u_trace_context_actively_tracing(&device->ds.trace_context))
return 0;
uint32_t utraces = 0;
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
if (u_trace_has_points(&cmd_buffers[i]->trace)) {
utraces++;
if (!(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT))
*utrace_copies += list_length(&cmd_buffers[i]->trace.trace_chunks);
}
}
return utraces;
}
static void
anv_utrace_delete_flush_data(struct u_trace_context *utctx,
void *flush_data)
{
struct anv_device *device =
container_of(utctx, struct anv_device, ds.trace_context);
struct anv_utrace_flush_copy *flush = flush_data;
intel_ds_flush_data_fini(&flush->ds);
if (flush->trace_bo) {
assert(flush->batch_bo);
anv_reloc_list_finish(&flush->relocs, &device->vk.alloc);
anv_device_release_bo(device, flush->batch_bo);
anv_device_release_bo(device, flush->trace_bo);
}
vk_sync_destroy(&device->vk, flush->sync);
vk_free(&device->vk.alloc, flush);
}
static void
anv_device_utrace_emit_copy_ts_buffer(struct u_trace_context *utctx,
void *cmdstream,
void *ts_from, uint32_t from_offset,
void *ts_to, uint32_t to_offset,
uint32_t count)
{
struct anv_device *device =
container_of(utctx, struct anv_device, ds.trace_context);
struct anv_utrace_flush_copy *flush = cmdstream;
struct anv_address from_addr = (struct anv_address) {
.bo = ts_from, .offset = from_offset * sizeof(uint64_t) };
struct anv_address to_addr = (struct anv_address) {
.bo = ts_to, .offset = to_offset * sizeof(uint64_t) };
anv_genX(device->info, emit_so_memcpy)(&flush->memcpy_state,
to_addr, from_addr, count * sizeof(uint64_t));
}
VkResult
anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
uint32_t cmd_buffer_count,
struct anv_cmd_buffer **cmd_buffers,
struct anv_utrace_flush_copy **out_flush_data)
{
struct anv_device *device = queue->device;
uint32_t utrace_copies = 0;
uint32_t utraces = command_buffers_count_utraces(device,
cmd_buffer_count,
cmd_buffers,
&utrace_copies);
if (!utraces) {
*out_flush_data = NULL;
return VK_SUCCESS;
}
VkResult result;
struct anv_utrace_flush_copy *flush =
vk_zalloc(&device->vk.alloc, sizeof(struct anv_utrace_flush_copy),
8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (!flush)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
intel_ds_flush_data_init(&flush->ds, queue->ds, queue->ds->submission_id);
result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type,
0, 0, &flush->sync);
if (result != VK_SUCCESS)
goto error_sync;
if (utrace_copies > 0) {
result = anv_bo_pool_alloc(&device->utrace_bo_pool,
utrace_copies * 4096,
&flush->trace_bo);
if (result != VK_SUCCESS)
goto error_trace_buf;
result = anv_bo_pool_alloc(&device->utrace_bo_pool,
/* 128 dwords of setup + 64 dwords per copy */
align_u32(512 + 64 * utrace_copies, 4096),
&flush->batch_bo);
if (result != VK_SUCCESS)
goto error_batch_buf;
result = anv_reloc_list_init(&flush->relocs, &device->vk.alloc);
if (result != VK_SUCCESS)
goto error_reloc_list;
flush->batch.alloc = &device->vk.alloc;
flush->batch.relocs = &flush->relocs;
anv_batch_set_storage(&flush->batch,
(struct anv_address) { .bo = flush->batch_bo, },
flush->batch_bo->map, flush->batch_bo->size);
/* Emit the copies */
anv_genX(device->info, emit_so_memcpy_init)(&flush->memcpy_state,
device,
&flush->batch);
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
if (cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) {
u_trace_flush(&cmd_buffers[i]->trace, flush, false);
} else {
u_trace_clone_append(u_trace_begin_iterator(&cmd_buffers[i]->trace),
u_trace_end_iterator(&cmd_buffers[i]->trace),
&flush->ds.trace,
flush,
anv_device_utrace_emit_copy_ts_buffer);
}
}
anv_genX(device->info, emit_so_memcpy_fini)(&flush->memcpy_state);
u_trace_flush(&flush->ds.trace, flush, true);
if (flush->batch.status != VK_SUCCESS) {
result = flush->batch.status;
goto error_batch;
}
} else {
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
assert(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT);
u_trace_flush(&cmd_buffers[i]->trace, flush, i == (cmd_buffer_count - 1));
}
}
flush->queue = queue;
*out_flush_data = flush;
return VK_SUCCESS;
error_batch:
anv_reloc_list_finish(&flush->relocs, &device->vk.alloc);
error_reloc_list:
anv_bo_pool_free(&device->utrace_bo_pool, flush->batch_bo);
error_batch_buf:
anv_bo_pool_free(&device->utrace_bo_pool, flush->trace_bo);
error_trace_buf:
vk_sync_destroy(&device->vk, flush->sync);
error_sync:
vk_free(&device->vk.alloc, flush);
return result;
}
static void *
anv_utrace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size_b)
{
struct anv_device *device =
container_of(utctx, struct anv_device, ds.trace_context);
struct anv_bo *bo = NULL;
UNUSED VkResult result =
anv_bo_pool_alloc(&device->utrace_bo_pool,
align_u32(size_b, 4096),
&bo);
assert(result == VK_SUCCESS);
return bo;
}
static void
anv_utrace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
{
struct anv_device *device =
container_of(utctx, struct anv_device, ds.trace_context);
struct anv_bo *bo = timestamps;
anv_bo_pool_free(&device->utrace_bo_pool, bo);
}
static void
anv_utrace_record_ts(struct u_trace *ut, void *cs,
void *timestamps, unsigned idx,
bool end_of_pipe)
{
struct anv_cmd_buffer *cmd_buffer =
container_of(ut, struct anv_cmd_buffer, trace);
struct anv_device *device = cmd_buffer->device;
struct anv_bo *bo = timestamps;
device->physical->cmd_emit_timestamp(&cmd_buffer->batch, device,
(struct anv_address) {
.bo = bo,
.offset = idx * sizeof(uint64_t) },
end_of_pipe);
}
static uint64_t
anv_utrace_read_ts(struct u_trace_context *utctx,
void *timestamps, unsigned idx, void *flush_data)
{
struct anv_device *device =
container_of(utctx, struct anv_device, ds.trace_context);
struct anv_bo *bo = timestamps;
struct anv_utrace_flush_copy *flush = flush_data;
/* Only need to stall on results for the first entry: */
if (idx == 0) {
UNUSED VkResult result =
vk_sync_wait(&device->vk,
flush->sync,
0,
VK_SYNC_WAIT_COMPLETE,
os_time_get_absolute_timeout(OS_TIMEOUT_INFINITE));
assert(result == VK_SUCCESS);
}
uint64_t *ts = bo->map;
/* Don't translate the no-timestamp marker: */
if (ts[idx] == U_TRACE_NO_TIMESTAMP)
return U_TRACE_NO_TIMESTAMP;
return intel_device_info_timebase_scale(device->info, ts[idx]);
}
static const char *
queue_family_to_name(const struct anv_queue_family *family)
{
switch (family->engine_class) {
case I915_ENGINE_CLASS_RENDER:
return "render";
case I915_ENGINE_CLASS_COPY:
return "copy";
case I915_ENGINE_CLASS_VIDEO:
return "video";
case I915_ENGINE_CLASS_VIDEO_ENHANCE:
return "video-enh";
default:
return "unknown";
}
}
void
anv_device_utrace_init(struct anv_device *device)
{
anv_bo_pool_init(&device->utrace_bo_pool, device, "utrace");
intel_ds_device_init(&device->ds, device->info, device->fd,
device->physical->local_minor - 128,
INTEL_DS_API_VULKAN);
u_trace_context_init(&device->ds.trace_context,
&device->ds,
anv_utrace_create_ts_buffer,
anv_utrace_destroy_ts_buffer,
anv_utrace_record_ts,
anv_utrace_read_ts,
anv_utrace_delete_flush_data);
for (uint32_t q = 0; q < device->queue_count; q++) {
struct anv_queue *queue = &device->queues[q];
queue->ds =
intel_ds_device_add_queue(&device->ds, "%s%u",
queue_family_to_name(queue->family),
queue->index_in_family);
}
}
void
anv_device_utrace_finish(struct anv_device *device)
{
u_trace_context_process(&device->ds.trace_context, true);
intel_ds_device_fini(&device->ds);
anv_bo_pool_finish(&device->utrace_bo_pool);
}
enum intel_ds_stall_flag
anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits)
{
static const struct {
enum anv_pipe_bits anv;
enum intel_ds_stall_flag ds;
} anv_to_ds_flags[] = {
{ .anv = ANV_PIPE_DEPTH_CACHE_FLUSH_BIT, .ds = INTEL_DS_DEPTH_CACHE_FLUSH_BIT, },
{ .anv = ANV_PIPE_DATA_CACHE_FLUSH_BIT, .ds = INTEL_DS_DATA_CACHE_FLUSH_BIT, },
{ .anv = ANV_PIPE_TILE_CACHE_FLUSH_BIT, .ds = INTEL_DS_TILE_CACHE_FLUSH_BIT, },
{ .anv = ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT, .ds = INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT, },
{ .anv = ANV_PIPE_STATE_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_STATE_CACHE_INVALIDATE_BIT, },
{ .anv = ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_CONST_CACHE_INVALIDATE_BIT, },
{ .anv = ANV_PIPE_VF_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_VF_CACHE_INVALIDATE_BIT, },
{ .anv = ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT, },
{ .anv = ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_INST_CACHE_INVALIDATE_BIT, },
{ .anv = ANV_PIPE_DEPTH_STALL_BIT, .ds = INTEL_DS_DEPTH_STALL_BIT, },
{ .anv = ANV_PIPE_CS_STALL_BIT, .ds = INTEL_DS_CS_STALL_BIT, },
{ .anv = ANV_PIPE_HDC_PIPELINE_FLUSH_BIT, .ds = INTEL_DS_HDC_PIPELINE_FLUSH_BIT, },
{ .anv = ANV_PIPE_STALL_AT_SCOREBOARD_BIT, .ds = INTEL_DS_STALL_AT_SCOREBOARD_BIT, },
{ .anv = ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT, .ds = INTEL_DS_UNTYPED_DATAPORT_CACHE_FLUSH_BIT, },
};
enum intel_ds_stall_flag ret = 0;
for (uint32_t i = 0; i < ARRAY_SIZE(anv_to_ds_flags); i++) {
if (anv_to_ds_flags[i].anv & bits)
ret |= anv_to_ds_flags[i].ds;
}
return ret;
}

View File

@ -0,0 +1,118 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_private.h"
#include "anv_measure.h"
#include "wsi_common.h"
#include "vk_fence.h"
#include "vk_queue.h"
#include "vk_semaphore.h"
#include "vk_util.h"
static PFN_vkVoidFunction
anv_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName)
{
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
return vk_instance_get_proc_addr_unchecked(&pdevice->instance->vk, pName);
}
VkResult
anv_init_wsi(struct anv_physical_device *physical_device)
{
VkResult result;
result = wsi_device_init(&physical_device->wsi_device,
anv_physical_device_to_handle(physical_device),
anv_wsi_proc_addr,
&physical_device->instance->vk.alloc,
physical_device->master_fd,
&physical_device->instance->dri_options,
false);
if (result != VK_SUCCESS)
return result;
physical_device->wsi_device.supports_modifiers = true;
physical_device->wsi_device.signal_semaphore_with_memory = true;
physical_device->wsi_device.signal_fence_with_memory = true;
physical_device->vk.wsi_device = &physical_device->wsi_device;
wsi_device_setup_syncobj_fd(&physical_device->wsi_device,
physical_device->local_fd);
return VK_SUCCESS;
}
void
anv_finish_wsi(struct anv_physical_device *physical_device)
{
physical_device->vk.wsi_device = NULL;
wsi_device_finish(&physical_device->wsi_device,
&physical_device->instance->vk.alloc);
}
VkResult anv_AcquireNextImage2KHR(
VkDevice _device,
const VkAcquireNextImageInfoKHR *pAcquireInfo,
uint32_t *pImageIndex)
{
VK_FROM_HANDLE(anv_device, device, _device);
VkResult result =
wsi_common_acquire_next_image2(&device->physical->wsi_device,
_device, pAcquireInfo, pImageIndex);
if (result == VK_SUCCESS)
anv_measure_acquire(device);
return result;
}
VkResult anv_QueuePresentKHR(
VkQueue _queue,
const VkPresentInfoKHR* pPresentInfo)
{
ANV_FROM_HANDLE(anv_queue, queue, _queue);
struct anv_device *device = queue->device;
VkResult result;
if (device->debug_frame_desc) {
device->debug_frame_desc->frame_id++;
if (device->physical->memory.need_clflush) {
intel_clflush_range(device->debug_frame_desc,
sizeof(*device->debug_frame_desc));
}
}
result = vk_queue_wait_before_present(&queue->vk, pPresentInfo);
if (result != VK_SUCCESS)
return result;
result = wsi_common_queue_present(&device->physical->wsi_device,
anv_device_to_handle(queue->device),
_queue, 0,
pPresentInfo);
u_trace_context_process(&device->ds.trace_context, true);
return result;
}

View File

@ -0,0 +1,410 @@
/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <assert.h>
#include "anv_private.h"
#include "anv_measure.h"
/* These are defined in anv_private.h and blorp_genX_exec.h */
#undef __gen_address_type
#undef __gen_user_data
#undef __gen_combine_address
#include "common/intel_l3_config.h"
#include "blorp/blorp_genX_exec.h"
#include "ds/intel_tracepoints.h"
static void blorp_measure_start(struct blorp_batch *_batch,
const struct blorp_params *params)
{
struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch;
trace_intel_begin_blorp(&cmd_buffer->trace);
anv_measure_snapshot(cmd_buffer,
params->snapshot_type,
NULL, 0);
}
static void blorp_measure_end(struct blorp_batch *_batch,
const struct blorp_params *params)
{
struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch;
trace_intel_end_blorp(&cmd_buffer->trace,
params->x1 - params->x0,
params->y1 - params->y0,
params->hiz_op,
params->fast_clear_op,
params->shader_type,
params->shader_pipeline);
}
static void *
blorp_emit_dwords(struct blorp_batch *batch, unsigned n)
{
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
return anv_batch_emit_dwords(&cmd_buffer->batch, n);
}
static uint64_t
blorp_emit_reloc(struct blorp_batch *batch,
void *location, struct blorp_address address, uint32_t delta)
{
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
assert(cmd_buffer->batch.start <= location &&
location < cmd_buffer->batch.end);
return anv_batch_emit_reloc(&cmd_buffer->batch, location,
address.buffer, address.offset + delta);
}
static void
blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
struct blorp_address address, uint32_t delta)
{
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
VkResult result;
if (ANV_ALWAYS_SOFTPIN) {
result = anv_reloc_list_add_bo(&cmd_buffer->surface_relocs,
&cmd_buffer->vk.pool->alloc,
address.buffer);
if (unlikely(result != VK_SUCCESS))
anv_batch_set_error(&cmd_buffer->batch, result);
return;
}
uint64_t address_u64 = 0;
result = anv_reloc_list_add(&cmd_buffer->surface_relocs,
&cmd_buffer->vk.pool->alloc,
ss_offset, address.buffer,
address.offset + delta,
&address_u64);
if (result != VK_SUCCESS)
anv_batch_set_error(&cmd_buffer->batch, result);
void *dest = anv_block_pool_map(
&cmd_buffer->device->surface_state_pool.block_pool, ss_offset, 8);
write_reloc(cmd_buffer->device, dest, address_u64, false);
}
static uint64_t
blorp_get_surface_address(struct blorp_batch *blorp_batch,
struct blorp_address address)
{
if (ANV_ALWAYS_SOFTPIN) {
struct anv_address anv_addr = {
.bo = address.buffer,
.offset = address.offset,
};
return anv_address_physical(anv_addr);
} else {
/* We'll let blorp_surface_reloc write the address. */
return 0;
}
}
#if GFX_VER >= 7 && GFX_VER < 10
static struct blorp_address
blorp_get_surface_base_address(struct blorp_batch *batch)
{
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
return (struct blorp_address) {
.buffer = cmd_buffer->device->surface_state_pool.block_pool.bo,
.offset = 0,
};
}
#endif
static void *
blorp_alloc_dynamic_state(struct blorp_batch *batch,
uint32_t size,
uint32_t alignment,
uint32_t *offset)
{
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
struct anv_state state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment);
*offset = state.offset;
return state.map;
}
UNUSED static void *
blorp_alloc_general_state(struct blorp_batch *batch,
uint32_t size,
uint32_t alignment,
uint32_t *offset)
{
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
struct anv_state state =
anv_state_stream_alloc(&cmd_buffer->general_state_stream, size,
alignment);
*offset = state.offset;
return state.map;
}
static void
blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries,
unsigned state_size, unsigned state_alignment,
uint32_t *bt_offset,
uint32_t *surface_offsets, void **surface_maps)
{
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
uint32_t state_offset;
struct anv_state bt_state;
VkResult result =
anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, num_entries,
&state_offset, &bt_state);
if (result != VK_SUCCESS)
return;
uint32_t *bt_map = bt_state.map;
*bt_offset = bt_state.offset;
for (unsigned i = 0; i < num_entries; i++) {
struct anv_state surface_state =
anv_cmd_buffer_alloc_surface_state(cmd_buffer);
bt_map[i] = surface_state.offset + state_offset;
surface_offsets[i] = surface_state.offset;
surface_maps[i] = surface_state.map;
}
}
static uint32_t
blorp_binding_table_offset_to_pointer(struct blorp_batch *batch,
uint32_t offset)
{
return offset;
}
static void *
blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,
struct blorp_address *addr)
{
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
struct anv_state vb_state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 64);
*addr = (struct blorp_address) {
.buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
.offset = vb_state.offset,
.mocs = isl_mocs(&cmd_buffer->device->isl_dev,
ISL_SURF_USAGE_VERTEX_BUFFER_BIT, false),
};
return vb_state.map;
}
static void
blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch,
const struct blorp_address *addrs,
uint32_t *sizes,
unsigned num_vbs)
{
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
for (unsigned i = 0; i < num_vbs; i++) {
struct anv_address anv_addr = {
.bo = addrs[i].buffer,
.offset = addrs[i].offset,
};
genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer,
i, anv_addr, sizes[i]);
}
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
/* Technically, we should call this *after* 3DPRIMITIVE but it doesn't
* really matter for blorp because we never call apply_pipe_flushes after
* this point.
*/
genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(cmd_buffer, SEQUENTIAL,
(1 << num_vbs) - 1);
}
UNUSED static struct blorp_address
blorp_get_workaround_address(struct blorp_batch *batch)
{
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
return (struct blorp_address) {
.buffer = cmd_buffer->device->workaround_address.bo,
.offset = cmd_buffer->device->workaround_address.offset,
};
}
static void
blorp_flush_range(struct blorp_batch *batch, void *start, size_t size)
{
/* We don't need to flush states anymore, since everything will be snooped.
*/
}
static const struct intel_l3_config *
blorp_get_l3_config(struct blorp_batch *batch)
{
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
return cmd_buffer->state.current_l3_config;
}
static void
blorp_exec_on_render(struct blorp_batch *batch,
const struct blorp_params *params)
{
assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT);
const unsigned scale = params->fast_clear_op ? UINT_MAX : 1;
genX(cmd_buffer_emit_hashing_mode)(cmd_buffer, params->x1 - params->x0,
params->y1 - params->y0, scale);
#if GFX_VER >= 11
/* The PIPE_CONTROL command description says:
*
* "Whenever a Binding Table Index (BTI) used by a Render Target Message
* points to a different RENDER_SURFACE_STATE, SW must issue a Render
* Target Cache Flush by enabling this bit. When render target flush
* is set due to new association of BTI, PS Scoreboard Stall bit must
* be set in this packet."
*/
anv_add_pending_pipe_bits(cmd_buffer,
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
"before blorp BTI change");
#endif
if (params->depth.enabled &&
!(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL))
genX(cmd_buffer_emit_gfx12_depth_wa)(cmd_buffer, &params->depth.surf);
genX(flush_pipeline_select_3d)(cmd_buffer);
/* Apply any outstanding flushes in case pipeline select haven't. */
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
genX(cmd_buffer_emit_gfx7_depth_flush)(cmd_buffer);
/* BLORP doesn't do anything fancy with depth such as discards, so we want
* the PMA fix off. Also, off is always the safe option.
*/
genX(cmd_buffer_enable_pma_fix)(cmd_buffer, false);
blorp_exec(batch, params);
#if GFX_VER >= 11
/* The PIPE_CONTROL command description says:
*
* "Whenever a Binding Table Index (BTI) used by a Render Target Message
* points to a different RENDER_SURFACE_STATE, SW must issue a Render
* Target Cache Flush by enabling this bit. When render target flush
* is set due to new association of BTI, PS Scoreboard Stall bit must
* be set in this packet."
*/
anv_add_pending_pipe_bits(cmd_buffer,
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
"after blorp BTI change");
#endif
/* Calculate state that does not get touched by blorp.
* Flush everything else.
*/
anv_cmd_dirty_mask_t dirty = ~(ANV_CMD_DIRTY_INDEX_BUFFER |
ANV_CMD_DIRTY_XFB_ENABLE);
BITSET_DECLARE(dyn_dirty, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX);
BITSET_ONES(dyn_dirty);
BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE);
BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_VP_SCISSOR_COUNT);
BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_VP_SCISSORS);
BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE);
BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_FSR);
BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS);
if (!params->wm_prog_data) {
BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES);
BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP);
}
cmd_buffer->state.gfx.vb_dirty = ~0;
cmd_buffer->state.gfx.dirty |= dirty;
BITSET_OR(cmd_buffer->vk.dynamic_graphics_state.dirty,
cmd_buffer->vk.dynamic_graphics_state.dirty, dyn_dirty);
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS;
}
static void
blorp_exec_on_compute(struct blorp_batch *batch,
const struct blorp_params *params)
{
assert(batch->flags & BLORP_BATCH_USE_COMPUTE);
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_COMPUTE_BIT);
genX(flush_pipeline_select_gpgpu)(cmd_buffer);
/* Apply any outstanding flushes in case pipeline select haven't. */
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
blorp_exec(batch, params);
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
}
void
genX(blorp_exec)(struct blorp_batch *batch,
const struct blorp_params *params)
{
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
if (!cmd_buffer->state.current_l3_config) {
const struct intel_l3_config *cfg =
intel_get_default_l3_config(cmd_buffer->device->info);
genX(cmd_buffer_config_l3)(cmd_buffer, cfg);
}
#if GFX_VER == 7
/* The MI_LOAD/STORE_REGISTER_MEM commands which BLORP uses to implement
* indirect fast-clear colors can cause GPU hangs if we don't stall first.
* See genX(cmd_buffer_mi_memcpy) for more details.
*/
if (params->src.clear_color_addr.buffer ||
params->dst.clear_color_addr.buffer) {
anv_add_pending_pipe_bits(cmd_buffer,
ANV_PIPE_CS_STALL_BIT,
"before blorp prep fast clear");
}
#endif
if (batch->flags & BLORP_BATCH_USE_COMPUTE)
blorp_exec_on_compute(batch, params);
else
blorp_exec_on_render(batch, params);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,324 @@
/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_private.h"
#include "genxml/gen_macros.h"
#include "genxml/genX_pack.h"
#include "common/intel_l3_config.h"
/**
* This file implements some lightweight memcpy/memset operations on the GPU
* using a vertex buffer and streamout.
*/
/**
* Returns the greatest common divisor of a and b that is a power of two.
*/
static uint64_t
gcd_pow2_u64(uint64_t a, uint64_t b)
{
assert(a > 0 || b > 0);
unsigned a_log2 = ffsll(a) - 1;
unsigned b_log2 = ffsll(b) - 1;
/* If either a or b is 0, then a_log2 or b_log2 will be UINT_MAX in which
* case, the MIN2() will take the other one. If both are 0 then we will
* hit the assert above.
*/
return 1 << MIN2(a_log2, b_log2);
}
static void
emit_common_so_memcpy(struct anv_batch *batch, struct anv_device *device,
const struct intel_l3_config *l3_config)
{
#if GFX_VER >= 8
anv_batch_emit(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
vfi.InstancingEnable = false;
vfi.VertexElementIndex = 0;
}
anv_batch_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs);
#endif
/* Disable all shader stages */
anv_batch_emit(batch, GENX(3DSTATE_VS), vs);
anv_batch_emit(batch, GENX(3DSTATE_HS), hs);
anv_batch_emit(batch, GENX(3DSTATE_TE), te);
anv_batch_emit(batch, GENX(3DSTATE_DS), DS);
anv_batch_emit(batch, GENX(3DSTATE_GS), gs);
anv_batch_emit(batch, GENX(3DSTATE_PS), gs);
anv_batch_emit(batch, GENX(3DSTATE_SBE), sbe) {
sbe.VertexURBEntryReadOffset = 1;
sbe.NumberofSFOutputAttributes = 1;
sbe.VertexURBEntryReadLength = 1;
#if GFX_VER >= 8
sbe.ForceVertexURBEntryReadLength = true;
sbe.ForceVertexURBEntryReadOffset = true;
#endif
#if GFX_VER >= 9
for (unsigned i = 0; i < 32; i++)
sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
#endif
}
/* Emit URB setup. We tell it that the VS is active because we want it to
* allocate space for the VS. Even though one isn't run, we need VUEs to
* store the data that VF is going to pass to SOL.
*/
const unsigned entry_size[4] = { DIV_ROUND_UP(32, 64), 1, 1, 1 };
genX(emit_urb_setup)(device, batch, l3_config,
VK_SHADER_STAGE_VERTEX_BIT, entry_size, NULL);
#if GFX_VER >= 12
/* Disable Primitive Replication. */
anv_batch_emit(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
#endif
#if GFX_VER >= 8
anv_batch_emit(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
topo.PrimitiveTopologyType = _3DPRIM_POINTLIST;
}
#endif
anv_batch_emit(batch, GENX(3DSTATE_VF_STATISTICS), vf) {
vf.StatisticsEnable = false;
}
}
static void
emit_so_memcpy(struct anv_batch *batch, struct anv_device *device,
struct anv_address dst, struct anv_address src,
uint32_t size)
{
/* The maximum copy block size is 4 32-bit components at a time. */
assert(size % 4 == 0);
unsigned bs = gcd_pow2_u64(16, size);
enum isl_format format;
switch (bs) {
case 4: format = ISL_FORMAT_R32_UINT; break;
case 8: format = ISL_FORMAT_R32G32_UINT; break;
case 16: format = ISL_FORMAT_R32G32B32A32_UINT; break;
default:
unreachable("Invalid size");
}
uint32_t *dw;
dw = anv_batch_emitn(batch, 5, GENX(3DSTATE_VERTEX_BUFFERS));
GENX(VERTEX_BUFFER_STATE_pack)(batch, dw + 1,
&(struct GENX(VERTEX_BUFFER_STATE)) {
.VertexBufferIndex = 32, /* Reserved for this */
.AddressModifyEnable = true,
.BufferStartingAddress = src,
.BufferPitch = bs,
.MOCS = anv_mocs(device, src.bo, 0),
#if GFX_VER >= 12
.L3BypassDisable = true,
#endif
#if (GFX_VER >= 8)
.BufferSize = size,
#else
.EndAddress = anv_address_add(src, size - 1),
#endif
});
dw = anv_batch_emitn(batch, 3, GENX(3DSTATE_VERTEX_ELEMENTS));
GENX(VERTEX_ELEMENT_STATE_pack)(batch, dw + 1,
&(struct GENX(VERTEX_ELEMENT_STATE)) {
.VertexBufferIndex = 32,
.Valid = true,
.SourceElementFormat = format,
.SourceElementOffset = 0,
.Component0Control = (bs >= 4) ? VFCOMP_STORE_SRC : VFCOMP_STORE_0,
.Component1Control = (bs >= 8) ? VFCOMP_STORE_SRC : VFCOMP_STORE_0,
.Component2Control = (bs >= 12) ? VFCOMP_STORE_SRC : VFCOMP_STORE_0,
.Component3Control = (bs >= 16) ? VFCOMP_STORE_SRC : VFCOMP_STORE_0,
});
anv_batch_emit(batch, GENX(3DSTATE_SO_BUFFER), sob) {
#if GFX_VER < 12
sob.SOBufferIndex = 0;
#else
sob._3DCommandOpcode = 0;
sob._3DCommandSubOpcode = SO_BUFFER_INDEX_0_CMD;
#endif
sob.MOCS = anv_mocs(device, dst.bo, 0),
sob.SurfaceBaseAddress = dst;
#if GFX_VER >= 8
sob.SOBufferEnable = true;
sob.SurfaceSize = size / 4 - 1;
#else
sob.SurfacePitch = bs;
sob.SurfaceEndAddress = anv_address_add(dst, size);
#endif
#if GFX_VER >= 8
/* As SOL writes out data, it updates the SO_WRITE_OFFSET registers with
* the end position of the stream. We need to reset this value to 0 at
* the beginning of the run or else SOL will start at the offset from
* the previous draw.
*/
sob.StreamOffsetWriteEnable = true;
sob.StreamOffset = 0;
#endif
}
#if GFX_VER <= 7
/* The hardware can do this for us on BDW+ (see above) */
anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), load) {
load.RegisterOffset = GENX(SO_WRITE_OFFSET0_num);
load.DataDWord = 0;
}
#endif
dw = anv_batch_emitn(batch, 5, GENX(3DSTATE_SO_DECL_LIST),
.StreamtoBufferSelects0 = (1 << 0),
.NumEntries0 = 1);
GENX(SO_DECL_ENTRY_pack)(batch, dw + 3,
&(struct GENX(SO_DECL_ENTRY)) {
.Stream0Decl = {
.OutputBufferSlot = 0,
.RegisterIndex = 0,
.ComponentMask = (1 << (bs / 4)) - 1,
},
});
anv_batch_emit(batch, GENX(3DSTATE_STREAMOUT), so) {
so.SOFunctionEnable = true;
so.RenderingDisable = true;
so.Stream0VertexReadOffset = 0;
so.Stream0VertexReadLength = DIV_ROUND_UP(32, 64);
#if GFX_VER >= 8
so.Buffer0SurfacePitch = bs;
#else
so.SOBufferEnable0 = true;
#endif
}
anv_batch_emit(batch, GENX(3DPRIMITIVE), prim) {
prim.VertexAccessType = SEQUENTIAL;
prim.PrimitiveTopologyType = _3DPRIM_POINTLIST;
prim.VertexCountPerInstance = size / bs;
prim.StartVertexLocation = 0;
prim.InstanceCount = 1;
prim.StartInstanceLocation = 0;
prim.BaseVertexLocation = 0;
}
}
void
genX(emit_so_memcpy_init)(struct anv_memcpy_state *state,
struct anv_device *device,
struct anv_batch *batch)
{
memset(state, 0, sizeof(*state));
state->batch = batch;
state->device = device;
const struct intel_l3_config *cfg = intel_get_default_l3_config(device->info);
genX(emit_l3_config)(batch, device, cfg);
anv_batch_emit(batch, GENX(PIPELINE_SELECT), ps) {
#if GFX_VER >= 9
ps.MaskBits = GFX_VER >= 12 ? 0x13 : 3;
ps.MediaSamplerDOPClockGateEnable = GFX_VER >= 12;
#endif
ps.PipelineSelection = _3D;
}
emit_common_so_memcpy(batch, device, cfg);
}
void
genX(emit_so_memcpy_fini)(struct anv_memcpy_state *state)
{
genX(emit_apply_pipe_flushes)(state->batch, state->device, _3D,
ANV_PIPE_END_OF_PIPE_SYNC_BIT);
anv_batch_emit(state->batch, GENX(MI_BATCH_BUFFER_END), end);
if ((state->batch->next - state->batch->start) & 4)
anv_batch_emit(state->batch, GENX(MI_NOOP), noop);
}
void
genX(emit_so_memcpy)(struct anv_memcpy_state *state,
struct anv_address dst, struct anv_address src,
uint32_t size)
{
if (GFX_VER >= 8 && GFX_VER <= 9 &&
!anv_use_relocations(state->device->physical) &&
anv_gfx8_9_vb_cache_range_needs_workaround(&state->vb_bound,
&state->vb_dirty,
src, size)) {
genX(emit_apply_pipe_flushes)(state->batch, state->device, _3D,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_VF_CACHE_INVALIDATE_BIT);
memset(&state->vb_dirty, 0, sizeof(state->vb_dirty));
}
emit_so_memcpy(state->batch, state->device, dst, src, size);
}
void
genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address dst, struct anv_address src,
uint32_t size)
{
if (size == 0)
return;
if (!cmd_buffer->state.current_l3_config) {
const struct intel_l3_config *cfg =
intel_get_default_l3_config(cmd_buffer->device->info);
genX(cmd_buffer_config_l3)(cmd_buffer, cfg);
}
genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, 32, src, size);
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
genX(flush_pipeline_select_3d)(cmd_buffer);
emit_common_so_memcpy(&cmd_buffer->batch, cmd_buffer->device,
cmd_buffer->state.current_l3_config);
emit_so_memcpy(&cmd_buffer->batch, cmd_buffer->device, dst, src, size);
genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(cmd_buffer, SEQUENTIAL,
1ull << 32);
/* Invalidate pipeline & raster discard since we touch
* 3DSTATE_STREAMOUT.
*/
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_PIPELINE;
BITSET_SET(cmd_buffer->vk.dynamic_graphics_state.dirty,
MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,314 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <assert.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include "anv_private.h"
#include "vk_format.h"
#include "genxml/gen_macros.h"
#include "genxml/genX_pack.h"
static uint32_t
get_depth_format(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
switch (gfx->depth_att.vk_format) {
case VK_FORMAT_D16_UNORM:
case VK_FORMAT_D16_UNORM_S8_UINT:
return D16_UNORM;
case VK_FORMAT_X8_D24_UNORM_PACK32:
case VK_FORMAT_D24_UNORM_S8_UINT:
return D24_UNORM_X8_UINT;
case VK_FORMAT_D32_SFLOAT:
case VK_FORMAT_D32_SFLOAT_S8_UINT:
return D32_FLOAT;
default:
return D16_UNORM;
}
}
void
genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
const struct vk_dynamic_graphics_state *dyn =
&cmd_buffer->vk.dynamic_graphics_state;
if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_RENDER_TARGETS)) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CULL_MODE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_FRONT_FACE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH)) {
/* Take dynamic primitive topology in to account with
* 3DSTATE_SF::MultisampleRasterizationMode
*/
VkPolygonMode dynamic_raster_mode =
genX(raster_polygon_mode)(cmd_buffer->state.gfx.pipeline,
dyn->ia.primitive_topology);
uint32_t ms_rast_mode =
genX(ms_rasterization_mode)(pipeline, dynamic_raster_mode);
bool aa_enable = anv_rasterization_aa_mode(dynamic_raster_mode,
pipeline->line_mode);
uint32_t sf_dw[GENX(3DSTATE_SF_length)];
struct GENX(3DSTATE_SF) sf = {
GENX(3DSTATE_SF_header),
.DepthBufferSurfaceFormat = get_depth_format(cmd_buffer),
.LineWidth = dyn->rs.line.width,
.AntialiasingEnable = aa_enable,
.CullMode = genX(vk_to_intel_cullmode)[dyn->rs.cull_mode],
.FrontWinding = genX(vk_to_intel_front_face)[dyn->rs.front_face],
.MultisampleRasterizationMode = ms_rast_mode,
.GlobalDepthOffsetEnableSolid = dyn->rs.depth_bias.enable,
.GlobalDepthOffsetEnableWireframe = dyn->rs.depth_bias.enable,
.GlobalDepthOffsetEnablePoint = dyn->rs.depth_bias.enable,
.GlobalDepthOffsetConstant = dyn->rs.depth_bias.constant,
.GlobalDepthOffsetScale = dyn->rs.depth_bias.slope,
.GlobalDepthOffsetClamp = dyn->rs.depth_bias.clamp,
};
GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gfx7.sf);
}
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
struct anv_state cc_state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
GENX(COLOR_CALC_STATE_length) * 4,
64);
struct GENX(COLOR_CALC_STATE) cc = {
.BlendConstantColorRed = dyn->cb.blend_constants[0],
.BlendConstantColorGreen = dyn->cb.blend_constants[1],
.BlendConstantColorBlue = dyn->cb.blend_constants[2],
.BlendConstantColorAlpha = dyn->cb.blend_constants[3],
.StencilReferenceValue = dyn->ds.stencil.front.reference & 0xff,
.BackfaceStencilReferenceValue = dyn->ds.stencil.back.reference & 0xff,
};
GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
ccp.ColorCalcStatePointer = cc_state.offset;
}
}
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_LINE_STIPPLE), ls) {
ls.LineStipplePattern = dyn->rs.line.stipple.pattern;
ls.LineStippleInverseRepeatCount =
1.0f / MAX2(1, dyn->rs.line.stipple.factor);
ls.LineStippleRepeatCount = dyn->rs.line.stipple.factor;
}
}
if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_RENDER_TARGETS)) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK)) {
uint32_t depth_stencil_dw[GENX(DEPTH_STENCIL_STATE_length)];
VkImageAspectFlags ds_aspects = 0;
if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED)
ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED)
ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
struct vk_depth_stencil_state opt_ds = dyn->ds;
vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
struct GENX(DEPTH_STENCIL_STATE) depth_stencil = {
.DoubleSidedStencilEnable = true,
.StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff,
.StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff,
.BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff,
.BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff,
.DepthTestEnable = opt_ds.depth.test_enable,
.DepthBufferWriteEnable = opt_ds.depth.write_enable,
.DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op],
.StencilTestEnable = opt_ds.stencil.test_enable,
.StencilBufferWriteEnable = opt_ds.stencil.write_enable,
.StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail],
.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass],
.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail],
.StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare],
.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail],
.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass],
.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail],
.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare],
};
GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil);
struct anv_state ds_state =
anv_cmd_buffer_emit_dynamic(cmd_buffer, depth_stencil_dw,
sizeof(depth_stencil_dw), 64);
anv_batch_emit(&cmd_buffer->batch,
GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), dsp) {
dsp.PointertoDEPTH_STENCIL_STATE = ds_state.offset;
}
}
if (cmd_buffer->state.gfx.index_buffer &&
((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_INDEX_BUFFER)) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE))) {
struct anv_buffer *buffer = cmd_buffer->state.gfx.index_buffer;
uint32_t offset = cmd_buffer->state.gfx.index_offset;
#if GFX_VERx10 == 75
anv_batch_emit(&cmd_buffer->batch, GFX75_3DSTATE_VF, vf) {
vf.IndexedDrawCutIndexEnable = dyn->ia.primitive_restart_enable;
vf.CutIndex = cmd_buffer->state.gfx.restart_index;
}
#endif
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
#if GFX_VERx10 != 75
ib.CutIndexEnable = dyn->ia.primitive_restart_enable;
#endif
ib.IndexFormat = cmd_buffer->state.gfx.index_type;
ib.MOCS = anv_mocs(cmd_buffer->device,
buffer->address.bo,
ISL_SURF_USAGE_INDEX_BUFFER_BIT);
ib.BufferStartingAddress = anv_address_add(buffer->address, offset);
ib.BufferEndingAddress = anv_address_add(buffer->address,
buffer->vk.size);
}
}
/* 3DSTATE_WM in the hope we can avoid spawning fragment shaders
* threads or if we have dirty dynamic primitive topology state and
* need to toggle 3DSTATE_WM::MultisampleRasterizationMode dynamically.
*/
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
VkPolygonMode dynamic_raster_mode =
genX(raster_polygon_mode)(cmd_buffer->state.gfx.pipeline,
dyn->ia.primitive_topology);
uint32_t dwords[GENX(3DSTATE_WM_length)];
struct GENX(3DSTATE_WM) wm = {
GENX(3DSTATE_WM_header),
.ThreadDispatchEnable = anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) &&
(pipeline->force_fragment_thread_dispatch ||
!anv_cmd_buffer_all_color_write_masked(cmd_buffer)),
.MultisampleRasterizationMode =
genX(ms_rasterization_mode)(pipeline,
dynamic_raster_mode),
};
GENX(3DSTATE_WM_pack)(NULL, dwords, &wm);
anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx7.wm);
}
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS)) {
const uint32_t samples = MAX2(1, cmd_buffer->state.gfx.samples);
const struct vk_sample_locations_state *sl = dyn->ms.sample_locations;
genX(emit_multisample)(&cmd_buffer->batch, samples,
sl->per_pixel == samples ? sl : NULL);
}
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
const uint8_t color_writes = dyn->cb.color_write_enables;
/* Blend states of each RT */
uint32_t blend_dws[GENX(BLEND_STATE_length) +
MAX_RTS * GENX(BLEND_STATE_ENTRY_length)];
uint32_t *dws = blend_dws;
memset(blend_dws, 0, sizeof(blend_dws));
/* Skip this part */
dws += GENX(BLEND_STATE_length);
for (uint32_t i = 0; i < MAX_RTS; i++) {
/* Disable anything above the current number of color attachments. */
bool write_disabled = i >= cmd_buffer->state.gfx.color_att_count ||
(color_writes & BITFIELD_BIT(i)) == 0;
struct GENX(BLEND_STATE_ENTRY) entry = {
.WriteDisableAlpha = write_disabled ||
(pipeline->color_comp_writes[i] &
VK_COLOR_COMPONENT_A_BIT) == 0,
.WriteDisableRed = write_disabled ||
(pipeline->color_comp_writes[i] &
VK_COLOR_COMPONENT_R_BIT) == 0,
.WriteDisableGreen = write_disabled ||
(pipeline->color_comp_writes[i] &
VK_COLOR_COMPONENT_G_BIT) == 0,
.WriteDisableBlue = write_disabled ||
(pipeline->color_comp_writes[i] &
VK_COLOR_COMPONENT_B_BIT) == 0,
.LogicOpFunction = genX(vk_to_intel_logic_op)[dyn->cb.logic_op],
};
GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry);
dws += GENX(BLEND_STATE_ENTRY_length);
}
uint32_t num_dwords = GENX(BLEND_STATE_length) +
GENX(BLEND_STATE_ENTRY_length) * MAX_RTS;
struct anv_state blend_states =
anv_cmd_buffer_merge_dynamic(cmd_buffer, blend_dws,
pipeline->gfx7.blend_state, num_dwords, 64);
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
bsp.BlendStatePointer = blend_states.offset;
}
}
/* When we're done, there is no more dirty gfx state. */
vk_dynamic_graphics_state_clear_dirty(&cmd_buffer->vk.dynamic_graphics_state);
cmd_buffer->state.gfx.dirty = 0;
}
void
genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer,
bool enable)
{
/* The NP PMA fix doesn't exist on gfx7 */
}

View File

@ -0,0 +1,706 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <assert.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include "anv_private.h"
#include "genxml/gen_macros.h"
#include "genxml/genX_pack.h"
void
genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
{
if (cmd_buffer->state.pma_fix_enabled == enable)
return;
cmd_buffer->state.pma_fix_enabled = enable;
/* According to the Broadwell PIPE_CONTROL documentation, software should
* emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set
* prior to the LRI. If stencil buffer writes are enabled, then a Render
* Cache Flush is also necessary.
*
* The Skylake docs say to use a depth stall rather than a command
* streamer stall. However, the hardware seems to violently disagree.
* A full command streamer stall seems to be needed in both cases.
*/
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
pc.DepthCacheFlushEnable = true;
pc.CommandStreamerStallEnable = true;
pc.RenderTargetCacheFlushEnable = true;
#if GFX_VER >= 12
pc.TileCacheFlushEnable = true;
/* Wa_1409600907: "PIPE_CONTROL with Depth Stall Enable bit must
* be set with any PIPE_CONTROL with Depth Flush Enable bit set.
*/
pc.DepthStallEnable = true;
#endif
}
#if GFX_VER == 9
uint32_t cache_mode;
anv_pack_struct(&cache_mode, GENX(CACHE_MODE_0),
.STCPMAOptimizationEnable = enable,
.STCPMAOptimizationEnableMask = true);
anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
lri.RegisterOffset = GENX(CACHE_MODE_0_num);
lri.DataDWord = cache_mode;
}
#elif GFX_VER == 8
uint32_t cache_mode;
anv_pack_struct(&cache_mode, GENX(CACHE_MODE_1),
.NPPMAFixEnable = enable,
.NPEarlyZFailsDisable = enable,
.NPPMAFixEnableMask = true,
.NPEarlyZFailsDisableMask = true);
anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
lri.RegisterOffset = GENX(CACHE_MODE_1_num);
lri.DataDWord = cache_mode;
}
#endif /* GFX_VER == 8 */
/* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
* Flush bits is often necessary. We do it regardless because it's easier.
* The render cache flush is also necessary if stencil writes are enabled.
*
* Again, the Skylake docs give a different set of flushes but the BDW
* flushes seem to work just as well.
*/
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
pc.DepthStallEnable = true;
pc.DepthCacheFlushEnable = true;
pc.RenderTargetCacheFlushEnable = true;
#if GFX_VER >= 12
pc.TileCacheFlushEnable = true;
#endif
}
}
UNUSED static bool
want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer,
const struct vk_depth_stencil_state *ds)
{
assert(GFX_VER == 8);
/* From the Broadwell PRM Vol. 2c CACHE_MODE_1::NP_PMA_FIX_ENABLE:
*
* SW must set this bit in order to enable this fix when following
* expression is TRUE.
*
* 3DSTATE_WM::ForceThreadDispatch != 1 &&
* !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
* (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
* (3DSTATE_DEPTH_BUFFER::HIZ Enable) &&
* !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) &&
* (3DSTATE_PS_EXTRA::PixelShaderValid) &&
* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
* 3DSTATE_WM_HZ_OP::StencilBufferClear) &&
* (3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable) &&
* (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
* 3DSTATE_PS_BLEND::AlphaTestEnable ||
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) &&
* 3DSTATE_WM::ForceKillPix != ForceOff &&
* ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
* 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) ||
* (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
* 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||
* (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
*/
/* These are always true:
* 3DSTATE_WM::ForceThreadDispatch != 1 &&
* !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
*/
/* We only enable the PMA fix if we know for certain that HiZ is enabled.
* If we don't know whether HiZ is enabled or not, we disable the PMA fix
* and there is no harm.
*
* (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
* 3DSTATE_DEPTH_BUFFER::HIZ Enable
*/
if (!cmd_buffer->state.hiz_enabled)
return false;
/* 3DSTATE_PS_EXTRA::PixelShaderValid */
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
return false;
/* !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) */
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
if (wm_prog_data->early_fragment_tests)
return false;
/* We never use anv_pipeline for HiZ ops so this is trivially true:
* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
* 3DSTATE_WM_HZ_OP::StencilBufferClear)
*/
/* 3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable */
if (!ds->depth.test_enable)
return false;
/* (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
* 3DSTATE_PS_BLEND::AlphaTestEnable ||
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) &&
* 3DSTATE_WM::ForceKillPix != ForceOff &&
* ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
* 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) ||
* (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
* 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||
* (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
*/
return (pipeline->kill_pixel && (ds->depth.write_enable ||
ds->stencil.write_enable)) ||
wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
}
UNUSED static bool
want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer,
const struct vk_depth_stencil_state *ds)
{
if (GFX_VER > 9)
return false;
assert(GFX_VER == 9);
/* From the Skylake PRM Vol. 2c CACHE_MODE_1::STC PMA Optimization Enable:
*
* Clearing this bit will force the STC cache to wait for pending
* retirement of pixels at the HZ-read stage and do the STC-test for
* Non-promoted, R-computed and Computed depth modes instead of
* postponing the STC-test to RCPFE.
*
* STC_TEST_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
* 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
*
* STC_WRITE_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
* (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
* 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
*
* COMP_STC_EN = STC_TEST_EN &&
* 3DSTATE_PS_EXTRA::PixelShaderComputesStencil
*
* SW parses the pipeline states to generate the following logical
* signal indicating if PMA FIX can be enabled.
*
* STC_PMA_OPT =
* 3DSTATE_WM::ForceThreadDispatch != 1 &&
* !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
* 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
* 3DSTATE_DEPTH_BUFFER::HIZ Enable &&
* !(3DSTATE_WM::EDSC_Mode == 2) &&
* 3DSTATE_PS_EXTRA::PixelShaderValid &&
* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
* 3DSTATE_WM_HZ_OP::StencilBufferClear) &&
* (COMP_STC_EN || STC_WRITE_EN) &&
* ((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
* 3DSTATE_WM::ForceKillPix == ON ||
* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
* 3DSTATE_PS_BLEND::AlphaTestEnable ||
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
* (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
*/
/* These are always true:
* 3DSTATE_WM::ForceThreadDispatch != 1 &&
* !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
*/
/* We only enable the PMA fix if we know for certain that HiZ is enabled.
* If we don't know whether HiZ is enabled or not, we disable the PMA fix
* and there is no harm.
*
* (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
* 3DSTATE_DEPTH_BUFFER::HIZ Enable
*/
if (!cmd_buffer->state.hiz_enabled)
return false;
/* We can't possibly know if HiZ is enabled without the depth attachment */
ASSERTED const struct anv_image_view *d_iview =
cmd_buffer->state.gfx.depth_att.iview;
assert(d_iview && d_iview->image->planes[0].aux_usage == ISL_AUX_USAGE_HIZ);
/* 3DSTATE_PS_EXTRA::PixelShaderValid */
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
return false;
/* !(3DSTATE_WM::EDSC_Mode == 2) */
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
if (wm_prog_data->early_fragment_tests)
return false;
/* We never use anv_pipeline for HiZ ops so this is trivially true:
* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
* 3DSTATE_WM_HZ_OP::StencilBufferClear)
*/
/* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
* 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
*/
const bool stc_test_en = ds->stencil.test_enable;
/* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
* (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
* 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
*/
const bool stc_write_en = ds->stencil.write_enable;
/* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */
const bool comp_stc_en = stc_test_en && wm_prog_data->computed_stencil;
/* COMP_STC_EN || STC_WRITE_EN */
if (!(comp_stc_en || stc_write_en))
return false;
/* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
* 3DSTATE_WM::ForceKillPix == ON ||
* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
* 3DSTATE_PS_BLEND::AlphaTestEnable ||
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
* (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF)
*/
return pipeline->kill_pixel ||
wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
}
void
genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
const struct vk_dynamic_graphics_state *dyn =
&cmd_buffer->vk.dynamic_graphics_state;
#if GFX_VER >= 11
if (cmd_buffer->device->vk.enabled_extensions.KHR_fragment_shading_rate &&
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR))
genX(emit_shading_rate)(&cmd_buffer->batch, pipeline, &dyn->fsr);
#endif /* GFX_VER >= 11 */
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH)) {
uint32_t sf_dw[GENX(3DSTATE_SF_length)];
struct GENX(3DSTATE_SF) sf = {
GENX(3DSTATE_SF_header),
};
#if GFX_VER == 8
if (cmd_buffer->device->info->platform == INTEL_PLATFORM_CHV) {
sf.CHVLineWidth = dyn->rs.line.width;
} else {
sf.LineWidth = dyn->rs.line.width;
}
#else
sf.LineWidth = dyn->rs.line.width,
#endif
GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gfx8.sf);
}
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CULL_MODE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_FRONT_FACE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS)) {
/* Take dynamic primitive topology in to account with
* 3DSTATE_RASTER::APIMode
* 3DSTATE_RASTER::DXMultisampleRasterizationEnable
* 3DSTATE_RASTER::AntialiasingEnable
*/
uint32_t api_mode = 0;
bool msaa_raster_enable = false;
VkPolygonMode dynamic_raster_mode =
genX(raster_polygon_mode)(cmd_buffer->state.gfx.pipeline,
dyn->ia.primitive_topology);
genX(rasterization_mode)(dynamic_raster_mode,
pipeline->line_mode, dyn->rs.line.width,
&api_mode, &msaa_raster_enable);
bool aa_enable = anv_rasterization_aa_mode(dynamic_raster_mode,
pipeline->line_mode);
uint32_t raster_dw[GENX(3DSTATE_RASTER_length)];
struct GENX(3DSTATE_RASTER) raster = {
GENX(3DSTATE_RASTER_header),
.APIMode = api_mode,
.DXMultisampleRasterizationEnable = msaa_raster_enable,
.AntialiasingEnable = aa_enable,
.CullMode = genX(vk_to_intel_cullmode)[dyn->rs.cull_mode],
.FrontWinding = genX(vk_to_intel_front_face)[dyn->rs.front_face],
.GlobalDepthOffsetEnableSolid = dyn->rs.depth_bias.enable,
.GlobalDepthOffsetEnableWireframe = dyn->rs.depth_bias.enable,
.GlobalDepthOffsetEnablePoint = dyn->rs.depth_bias.enable,
.GlobalDepthOffsetConstant = dyn->rs.depth_bias.constant,
.GlobalDepthOffsetScale = dyn->rs.depth_bias.slope,
.GlobalDepthOffsetClamp = dyn->rs.depth_bias.clamp,
};
GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster);
anv_batch_emit_merge(&cmd_buffer->batch, raster_dw,
pipeline->gfx8.raster);
}
/* Stencil reference values moved from COLOR_CALC_STATE in gfx8 to
* 3DSTATE_WM_DEPTH_STENCIL in gfx9. That means the dirty bits gets split
* across different state packets for gfx8 and gfx9. We handle that by
* using a big old #if switch here.
*/
#if GFX_VER == 8
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
struct anv_state cc_state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
GENX(COLOR_CALC_STATE_length) * 4,
64);
struct GENX(COLOR_CALC_STATE) cc = {
.BlendConstantColorRed = dyn->cb.blend_constants[0],
.BlendConstantColorGreen = dyn->cb.blend_constants[1],
.BlendConstantColorBlue = dyn->cb.blend_constants[2],
.BlendConstantColorAlpha = dyn->cb.blend_constants[3],
.StencilReferenceValue = dyn->ds.stencil.front.reference & 0xff,
.BackfaceStencilReferenceValue = dyn->ds.stencil.back.reference & 0xff,
};
GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
ccp.ColorCalcStatePointer = cc_state.offset;
ccp.ColorCalcStatePointerValid = true;
}
}
if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_RENDER_TARGETS)) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK)) {
VkImageAspectFlags ds_aspects = 0;
if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED)
ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED)
ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
struct vk_depth_stencil_state opt_ds = dyn->ds;
vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) {
ds.DoubleSidedStencilEnable = true;
ds.StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff;
ds.StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff;
ds.BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff;
ds.BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff;
ds.DepthTestEnable = opt_ds.depth.test_enable;
ds.DepthBufferWriteEnable = opt_ds.depth.write_enable;
ds.DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op];
ds.StencilTestEnable = opt_ds.stencil.test_enable;
ds.StencilBufferWriteEnable = opt_ds.stencil.write_enable;
ds.StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail];
ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass];
ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail];
ds.StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare];
ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail];
ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass];
ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail];
ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare];
}
const bool pma = want_depth_pma_fix(cmd_buffer, &opt_ds);
genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma);
}
#else
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
struct anv_state cc_state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
GENX(COLOR_CALC_STATE_length) * 4,
64);
struct GENX(COLOR_CALC_STATE) cc = {
.BlendConstantColorRed = dyn->cb.blend_constants[0],
.BlendConstantColorGreen = dyn->cb.blend_constants[1],
.BlendConstantColorBlue = dyn->cb.blend_constants[2],
.BlendConstantColorAlpha = dyn->cb.blend_constants[3],
};
GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
ccp.ColorCalcStatePointer = cc_state.offset;
ccp.ColorCalcStatePointerValid = true;
}
}
if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_RENDER_TARGETS)) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE)) {
VkImageAspectFlags ds_aspects = 0;
if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED)
ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED)
ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
struct vk_depth_stencil_state opt_ds = dyn->ds;
vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) {
ds.DoubleSidedStencilEnable = true;
ds.StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff;
ds.StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff;
ds.BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff;
ds.BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff;
ds.StencilReferenceValue = opt_ds.stencil.front.reference & 0xff;
ds.BackfaceStencilReferenceValue = opt_ds.stencil.back.reference & 0xff;
ds.DepthTestEnable = opt_ds.depth.test_enable;
ds.DepthBufferWriteEnable = opt_ds.depth.write_enable;
ds.DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op];
ds.StencilTestEnable = opt_ds.stencil.test_enable;
ds.StencilBufferWriteEnable = opt_ds.stencil.write_enable;
ds.StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail];
ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass];
ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail];
ds.StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare];
ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail];
ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass];
ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail];
ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare];
}
const bool pma = want_stencil_pma_fix(cmd_buffer, &opt_ds);
genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma);
}
#endif
#if GFX_VER >= 12
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_BOUNDS)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
db.DepthBoundsTestEnable = dyn->ds.depth.bounds_test.enable;
db.DepthBoundsTestMinValue = dyn->ds.depth.bounds_test.min;
db.DepthBoundsTestMaxValue = dyn->ds.depth.bounds_test.max;
}
}
#endif
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_LINE_STIPPLE), ls) {
ls.LineStipplePattern = dyn->rs.line.stipple.pattern;
ls.LineStippleInverseRepeatCount =
1.0f / MAX2(1, dyn->rs.line.stipple.factor);
ls.LineStippleRepeatCount = dyn->rs.line.stipple.factor;
}
}
if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_INDEX_BUFFER)) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) {
#if GFX_VERx10 >= 125
vf.GeometryDistributionEnable = true;
#endif
vf.IndexedDrawCutIndexEnable = dyn->ia.primitive_restart_enable;
vf.CutIndex = cmd_buffer->state.gfx.restart_index;
}
}
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_INDEX_BUFFER) {
struct anv_buffer *buffer = cmd_buffer->state.gfx.index_buffer;
uint32_t offset = cmd_buffer->state.gfx.index_offset;
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
ib.IndexFormat = cmd_buffer->state.gfx.index_type;
ib.MOCS = anv_mocs(cmd_buffer->device,
buffer->address.bo,
ISL_SURF_USAGE_INDEX_BUFFER_BIT);
#if GFX_VER >= 12
ib.L3BypassDisable = true;
#endif
ib.BufferStartingAddress = anv_address_add(buffer->address, offset);
ib.BufferSize = vk_buffer_range(&buffer->vk, offset,
VK_WHOLE_SIZE);
}
}
#if GFX_VERx10 >= 125
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VFG), vfg) {
/* If 3DSTATE_TE: TE Enable == 1 then RR_STRICT else RR_FREE*/
vfg.DistributionMode =
anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL) ? RR_STRICT :
RR_FREE;
vfg.DistributionGranularity = BatchLevelGranularity;
/* Wa_14014890652 */
if (intel_device_info_is_dg2(cmd_buffer->device->info))
vfg.GranularityThresholdDisable = 1;
vfg.ListCutIndexEnable = dyn->ia.primitive_restart_enable;
/* 192 vertices for TRILIST_ADJ */
vfg.ListNBatchSizeScale = 0;
/* Batch size of 384 vertices */
vfg.List3BatchSizeScale = 2;
/* Batch size of 128 vertices */
vfg.List2BatchSizeScale = 1;
/* Batch size of 128 vertices */
vfg.List1BatchSizeScale = 2;
/* Batch size of 256 vertices for STRIP topologies */
vfg.StripBatchSizeScale = 3;
/* 192 control points for PATCHLIST_3 */
vfg.PatchBatchSizeScale = 1;
/* 192 control points for PATCHLIST_3 */
vfg.PatchBatchSizeMultiplier = 31;
}
}
#endif
if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations &&
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS))
genX(emit_sample_pattern)(&cmd_buffer->batch, dyn->ms.sample_locations);
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
/* 3DSTATE_WM in the hope we can avoid spawning fragment shaders
* threads.
*/
uint32_t wm_dwords[GENX(3DSTATE_WM_length)];
struct GENX(3DSTATE_WM) wm = {
GENX(3DSTATE_WM_header),
.ForceThreadDispatchEnable = anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) &&
(pipeline->force_fragment_thread_dispatch ||
anv_cmd_buffer_all_color_write_masked(cmd_buffer)) ?
ForceON : 0,
};
GENX(3DSTATE_WM_pack)(NULL, wm_dwords, &wm);
anv_batch_emit_merge(&cmd_buffer->batch, wm_dwords, pipeline->gfx8.wm);
}
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
const uint8_t color_writes = dyn->cb.color_write_enables;
const struct anv_cmd_graphics_state *state = &cmd_buffer->state.gfx;
bool has_writeable_rt =
anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) &&
(color_writes & ((1u << state->color_att_count) - 1)) != 0;
/* 3DSTATE_PS_BLEND to be consistent with the rest of the
* BLEND_STATE_ENTRY.
*/
uint32_t ps_blend_dwords[GENX(3DSTATE_PS_BLEND_length)];
struct GENX(3DSTATE_PS_BLEND) ps_blend = {
GENX(3DSTATE_PS_BLEND_header),
.HasWriteableRT = has_writeable_rt,
};
GENX(3DSTATE_PS_BLEND_pack)(NULL, ps_blend_dwords, &ps_blend);
anv_batch_emit_merge(&cmd_buffer->batch, ps_blend_dwords,
pipeline->gfx8.ps_blend);
uint32_t blend_dws[GENX(BLEND_STATE_length) +
MAX_RTS * GENX(BLEND_STATE_ENTRY_length)];
uint32_t *dws = blend_dws;
memset(blend_dws, 0, sizeof(blend_dws));
/* Skip this part */
dws += GENX(BLEND_STATE_length);
for (uint32_t i = 0; i < MAX_RTS; i++) {
/* Disable anything above the current number of color attachments. */
bool write_disabled = i >= cmd_buffer->state.gfx.color_att_count ||
(color_writes & BITFIELD_BIT(i)) == 0;
struct GENX(BLEND_STATE_ENTRY) entry = {
.WriteDisableAlpha = write_disabled ||
(pipeline->color_comp_writes[i] &
VK_COLOR_COMPONENT_A_BIT) == 0,
.WriteDisableRed = write_disabled ||
(pipeline->color_comp_writes[i] &
VK_COLOR_COMPONENT_R_BIT) == 0,
.WriteDisableGreen = write_disabled ||
(pipeline->color_comp_writes[i] &
VK_COLOR_COMPONENT_G_BIT) == 0,
.WriteDisableBlue = write_disabled ||
(pipeline->color_comp_writes[i] &
VK_COLOR_COMPONENT_B_BIT) == 0,
.LogicOpFunction = genX(vk_to_intel_logic_op)[dyn->cb.logic_op],
};
GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry);
dws += GENX(BLEND_STATE_ENTRY_length);
}
uint32_t num_dwords = GENX(BLEND_STATE_length) +
GENX(BLEND_STATE_ENTRY_length) * MAX_RTS;
struct anv_state blend_states =
anv_cmd_buffer_merge_dynamic(cmd_buffer, blend_dws,
pipeline->gfx8.blend_state, num_dwords, 64);
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
bsp.BlendStatePointer = blend_states.offset;
bsp.BlendStatePointerValid = true;
}
}
/* When we're done, there is no more dirty gfx state. */
vk_dynamic_graphics_state_clear_dirty(&cmd_buffer->vk.dynamic_graphics_state);
cmd_buffer->state.gfx.dirty = 0;
}

View File

@ -0,0 +1,265 @@
# Copyright © 2017-2019 Intel Corporation
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
anv_hasvk_entrypoints = custom_target(
'anv_hasvk_entrypoints',
input : [vk_entrypoints_gen, vk_api_xml],
output : ['anv_entrypoints.h', 'anv_entrypoints.c'],
command : [
prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak',
'--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'anv',
'--device-prefix', 'gfx7', '--device-prefix', 'gfx75',
'--device-prefix', 'gfx8', '--device-prefix', 'gfx9',
'--device-prefix', 'gfx11', '--device-prefix', 'gfx12',
'--device-prefix', 'gfx125',
],
depend_files : vk_entrypoints_gen_depend_files,
)
intel_hasvk_icd = custom_target(
'intel_hasvk_icd',
input : [vk_icd_gen, vk_api_xml],
output : 'intel_hasvk_icd.@0@.json'.format(host_machine.cpu()),
command : [
prog_python, '@INPUT0@',
'--api-version', '1.3', '--xml', '@INPUT1@',
'--lib-path', join_paths(get_option('prefix'), get_option('libdir'),
'libvulkan_intel_hasvk.so'),
'--out', '@OUTPUT@',
],
build_by_default : true,
install_dir : with_vulkan_icd_dir,
install : true,
)
if meson.version().version_compare('>= 0.58')
_dev_icdname = 'intel_hasvk_devenv_icd.@0@.json'.format(host_machine.cpu())
custom_target(
'intel_hasvk_devenv_icd',
input : [vk_icd_gen, vk_api_xml],
output : _dev_icdname,
command : [
prog_python, '@INPUT0@',
'--api-version', '1.3', '--xml', '@INPUT1@',
'--lib-path', meson.current_build_dir() / 'libvulkan_intel_hasvk.so',
'--out', '@OUTPUT@',
],
build_by_default : true,
)
devenv.append('VK_ICD_FILENAMES', meson.current_build_dir() / _dev_icdname)
endif
libanv_per_hw_ver_libs = []
anv_per_hw_ver_files = files(
'genX_blorp_exec.c',
'genX_cmd_buffer.c',
'genX_gpu_memcpy.c',
'genX_pipeline.c',
'genX_query.c',
'genX_state.c',
)
foreach g : [['70', ['gfx7_cmd_buffer.c']], ['75', ['gfx7_cmd_buffer.c']],
['80', ['gfx8_cmd_buffer.c']], ['90', ['gfx8_cmd_buffer.c']],
['110', ['gfx8_cmd_buffer.c']], ['120', ['gfx8_cmd_buffer.c']],
['125', ['gfx8_cmd_buffer.c']]]
_gfx_ver = g[0]
libanv_per_hw_ver_libs += static_library(
'anv_per_hw_ver@0@'.format(_gfx_ver),
[anv_per_hw_ver_files, g[1], anv_hasvk_entrypoints[0]],
include_directories : [
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_compiler, inc_intel,
],
c_args : [
no_override_init_args, c_sse2_args,
'-DGFX_VERx10=@0@'.format(_gfx_ver),
],
gnu_symbol_visibility : 'hidden',
dependencies : [
dep_libdrm, dep_valgrind, idep_nir_headers, idep_genxml,
idep_vulkan_util_headers, idep_vulkan_wsi_headers,
idep_vulkan_runtime_headers, idep_intel_driver_ds_headers,
],
)
endforeach
libanv_files = files(
'anv_acceleration_structure.c',
'anv_allocator.c',
'anv_android.h',
'anv_batch_chain.c',
'anv_blorp.c',
'anv_bo_sync.c',
'anv_cmd_buffer.c',
'anv_descriptor_set.c',
'anv_device.c',
'anv_formats.c',
'anv_genX.h',
'anv_image.c',
'anv_measure.c',
'anv_measure.h',
'anv_nir.h',
'anv_nir_add_base_work_group_id.c',
'anv_nir_apply_pipeline_layout.c',
'anv_nir_compute_push_layout.c',
'anv_nir_lower_multiview.c',
'anv_nir_lower_ubo_loads.c',
'anv_nir_lower_ycbcr_textures.c',
'anv_perf.c',
'anv_pipeline.c',
'anv_pipeline_cache.c',
'anv_private.h',
'anv_queue.c',
'anv_util.c',
'anv_utrace.c',
'anv_wsi.c',
)
anv_deps = [
dep_libdrm,
dep_valgrind,
idep_genxml,
idep_nir_headers,
idep_vulkan_util_headers,
idep_vulkan_runtime_headers,
idep_vulkan_wsi_headers,
]
anv_flags = [
no_override_init_args,
c_sse2_args,
]
anv_cpp_flags = []
if with_platform_x11
anv_deps += dep_xcb_dri3
endif
if with_platform_wayland
anv_deps += dep_wayland_client
endif
if with_xlib_lease
anv_deps += [dep_xlib_xrandr]
endif
if with_platform_android
libanv_files += files('anv_android.c')
else
libanv_files += files('anv_android_stubs.c')
endif
anv_deps += idep_intel_driver_ds_headers
libanv_hasvk_common = static_library(
'anv_hasvk_common',
[
libanv_files, anv_hasvk_entrypoints, sha1_h,
gen_xml_pack,
],
include_directories : [
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
inc_util,
],
c_args : anv_flags,
cpp_args : anv_cpp_flags,
gnu_symbol_visibility : 'hidden',
dependencies : anv_deps,
)
libvulkan_intel_hasvk = shared_library(
'vulkan_intel_hasvk',
[files('anv_gem.c'), anv_hasvk_entrypoints[0]],
include_directories : [
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
],
link_whole : [libanv_hasvk_common, libanv_per_hw_ver_libs],
link_with : [
libintel_compiler, libintel_dev, libisl, libblorp, libintel_perf,
],
dependencies : [
dep_thread, dep_dl, dep_m, anv_deps, idep_libintel_common,
idep_nir, idep_genxml, idep_vulkan_util, idep_vulkan_wsi,
idep_vulkan_runtime, idep_mesautil, idep_xmlconfig,
idep_intel_driver_ds,
],
c_args : anv_flags,
gnu_symbol_visibility : 'hidden',
link_args : [ld_args_build_id, ld_args_bsymbolic, ld_args_gc_sections],
install : true,
)
if with_symbols_check
test(
'anv symbols check',
symbols_check,
args : [
'--lib', libvulkan_intel_hasvk,
'--symbols-file', vulkan_icd_symbols,
symbols_check_args,
],
suite : ['intel'],
)
endif
if with_tests
libvulkan_intel_hasvk_test = static_library(
'vulkan_intel_hasvk_test',
[files('anv_gem_stubs.c'), anv_hasvk_entrypoints[0]],
include_directories : [
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
],
link_whole : libanv_hasvk_common,
link_with : [
libanv_per_hw_ver_libs, libintel_compiler, libintel_common, libintel_dev,
libisl, libblorp, libintel_perf,
],
dependencies : [
dep_thread, dep_dl, dep_m, anv_deps,
idep_nir, idep_vulkan_util, idep_vulkan_wsi, idep_vulkan_runtime,
idep_mesautil,
],
c_args : anv_flags,
gnu_symbol_visibility : 'hidden',
)
foreach t : ['block_pool_no_free', 'block_pool_grow_first',
'state_pool_no_free', 'state_pool_free_list_only',
'state_pool', 'state_pool_padding']
test(
'anv_hasvk_@0@'.format(t),
executable(
t,
['tests/@0@.c'.format(t), anv_hasvk_entrypoints[0]],
c_args : [ c_sse2_args ],
link_with : libvulkan_intel_hasvk_test,
dependencies : [
dep_libdrm, dep_thread, dep_m, dep_valgrind,
idep_vulkan_util, idep_vulkan_wsi_headers,
idep_vulkan_runtime, idep_intel_driver_ds,
],
include_directories : [
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
],
),
suite : ['intel'],
)
endforeach
endif

View File

@ -0,0 +1,67 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_private.h"
#include "test_common.h"
int main(void)
{
struct anv_physical_device physical_device = {
.use_softpin = true,
};
struct anv_device device = {};
struct anv_block_pool pool;
/* Create a pool with initial size smaller than the block allocated, so
* that it must grow in the first allocation.
*/
const uint32_t block_size = 16 * 1024;
const uint32_t initial_size = block_size / 2;
anv_device_set_physical(&device, &physical_device);
pthread_mutex_init(&device.mutex, NULL);
anv_bo_cache_init(&device.bo_cache, &device);
anv_block_pool_init(&pool, &device, "test", 4096, initial_size);
ASSERT(pool.size == initial_size);
uint32_t padding;
int32_t offset = anv_block_pool_alloc(&pool, block_size, &padding);
/* Pool will have grown at least space to fit the new allocation. */
ASSERT(pool.size > initial_size);
ASSERT(pool.size >= initial_size + block_size);
/* The whole initial size is considered padding and the allocation should be
* right next to it.
*/
ASSERT(padding == initial_size);
ASSERT(offset == initial_size);
/* Use the memory to ensure it is valid. */
void *map = anv_block_pool_map(&pool, offset, block_size);
memset(map, 22, block_size);
anv_block_pool_finish(&pool);
anv_bo_cache_finish(&device.bo_cache);
pthread_mutex_destroy(&device.mutex);
}

View File

@ -0,0 +1,153 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <pthread.h>
#include "anv_private.h"
#include "test_common.h"
#define NUM_THREADS 16
#define BLOCKS_PER_THREAD 1024
#define NUM_RUNS 64
struct job {
pthread_t thread;
unsigned id;
struct anv_block_pool *pool;
int32_t blocks[BLOCKS_PER_THREAD];
int32_t back_blocks[BLOCKS_PER_THREAD];
} jobs[NUM_THREADS];
static void *alloc_blocks(void *_job)
{
struct job *job = _job;
uint32_t job_id = job - jobs;
uint32_t block_size = 16 * ((job_id % 4) + 1);
int32_t block, *data;
for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) {
block = anv_block_pool_alloc(job->pool, block_size, NULL);
data = anv_block_pool_map(job->pool, block, block_size);
*data = block;
ASSERT(block >= 0);
job->blocks[i] = block;
block = anv_block_pool_alloc_back(job->pool, block_size);
data = anv_block_pool_map(job->pool, block, block_size);
*data = block;
ASSERT(block < 0);
job->back_blocks[i] = -block;
}
for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) {
block = job->blocks[i];
data = anv_block_pool_map(job->pool, block, block_size);
ASSERT(*data == block);
block = -job->back_blocks[i];
data = anv_block_pool_map(job->pool, block, block_size);
ASSERT(*data == block);
}
return NULL;
}
static void validate_monotonic(int32_t **blocks)
{
/* A list of indices, one per thread */
unsigned next[NUM_THREADS];
memset(next, 0, sizeof(next));
int highest = -1;
while (true) {
/* First, we find which thread has the lowest next element */
int32_t thread_min = INT32_MAX;
int min_thread_idx = -1;
for (unsigned i = 0; i < NUM_THREADS; i++) {
if (next[i] >= BLOCKS_PER_THREAD)
continue;
if (thread_min > blocks[i][next[i]]) {
thread_min = blocks[i][next[i]];
min_thread_idx = i;
}
}
/* The only way this can happen is if all of the next[] values are at
* BLOCKS_PER_THREAD, in which case, we're done.
*/
if (thread_min == INT32_MAX)
break;
/* That next element had better be higher than the previous highest */
ASSERT(blocks[min_thread_idx][next[min_thread_idx]] > highest);
highest = blocks[min_thread_idx][next[min_thread_idx]];
next[min_thread_idx]++;
}
}
static void run_test()
{
struct anv_physical_device physical_device = {
.use_relocations = true,
};
struct anv_device device = {};
struct anv_block_pool pool;
anv_device_set_physical(&device, &physical_device);
pthread_mutex_init(&device.mutex, NULL);
anv_bo_cache_init(&device.bo_cache, &device);
anv_block_pool_init(&pool, &device, "test", 4096, 4096);
for (unsigned i = 0; i < NUM_THREADS; i++) {
jobs[i].pool = &pool;
jobs[i].id = i;
pthread_create(&jobs[i].thread, NULL, alloc_blocks, &jobs[i]);
}
for (unsigned i = 0; i < NUM_THREADS; i++)
pthread_join(jobs[i].thread, NULL);
/* Validate that the block allocations were monotonic */
int32_t *block_ptrs[NUM_THREADS];
for (unsigned i = 0; i < NUM_THREADS; i++)
block_ptrs[i] = jobs[i].blocks;
validate_monotonic(block_ptrs);
/* Validate that the back block allocations were monotonic */
for (unsigned i = 0; i < NUM_THREADS; i++)
block_ptrs[i] = jobs[i].back_blocks;
validate_monotonic(block_ptrs);
anv_block_pool_finish(&pool);
anv_bo_cache_finish(&device.bo_cache);
pthread_mutex_destroy(&device.mutex);
}
int main(void)
{
for (unsigned i = 0; i < NUM_RUNS; i++)
run_test();
}

View File

@ -0,0 +1,59 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <pthread.h>
#include "anv_private.h"
#include "test_common.h"
#define NUM_THREADS 8
#define STATES_PER_THREAD_LOG2 10
#define STATES_PER_THREAD (1 << STATES_PER_THREAD_LOG2)
#define NUM_RUNS 64
#include "state_pool_test_helper.h"
int main(void)
{
struct anv_physical_device physical_device = { };
struct anv_device device = {};
struct anv_state_pool state_pool;
anv_device_set_physical(&device, &physical_device);
pthread_mutex_init(&device.mutex, NULL);
anv_bo_cache_init(&device.bo_cache, &device);
for (unsigned i = 0; i < NUM_RUNS; i++) {
anv_state_pool_init(&state_pool, &device, "test", 4096, 0, 256);
/* Grab one so a zero offset is impossible */
anv_state_pool_alloc(&state_pool, 16, 16);
run_state_pool_test(&state_pool);
anv_state_pool_finish(&state_pool);
}
anv_bo_cache_finish(&device.bo_cache);
pthread_mutex_destroy(&device.mutex);
}

View File

@ -0,0 +1,68 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <pthread.h>
#include "anv_private.h"
#include "test_common.h"
#define NUM_THREADS 8
#define STATES_PER_THREAD_LOG2 12
#define STATES_PER_THREAD (1 << STATES_PER_THREAD_LOG2)
#include "state_pool_test_helper.h"
int main(void)
{
struct anv_physical_device physical_device = { };
struct anv_device device = {};
struct anv_state_pool state_pool;
anv_device_set_physical(&device, &physical_device);
pthread_mutex_init(&device.mutex, NULL);
anv_bo_cache_init(&device.bo_cache, &device);
anv_state_pool_init(&state_pool, &device, "test", 4096, 0, 4096);
/* Grab one so a zero offset is impossible */
anv_state_pool_alloc(&state_pool, 16, 16);
/* Grab and return enough states that the state pool test below won't
* actually ever resize anything.
*/
{
struct anv_state states[NUM_THREADS * STATES_PER_THREAD];
for (unsigned i = 0; i < NUM_THREADS * STATES_PER_THREAD; i++) {
states[i] = anv_state_pool_alloc(&state_pool, 16, 16);
ASSERT(states[i].offset != 0);
}
for (unsigned i = 0; i < NUM_THREADS * STATES_PER_THREAD; i++)
anv_state_pool_free(&state_pool, states[i]);
}
run_state_pool_test(&state_pool);
anv_state_pool_finish(&state_pool);
anv_bo_cache_finish(&device.bo_cache);
pthread_mutex_destroy(&device.mutex);
}

View File

@ -0,0 +1,119 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <pthread.h>
#include "anv_private.h"
#include "test_common.h"
#define NUM_THREADS 16
#define STATES_PER_THREAD 1024
#define NUM_RUNS 64
struct job {
pthread_t thread;
unsigned id;
struct anv_state_pool *pool;
uint32_t offsets[STATES_PER_THREAD];
} jobs[NUM_THREADS];
pthread_barrier_t barrier;
static void *alloc_states(void *_job)
{
struct job *job = _job;
pthread_barrier_wait(&barrier);
for (unsigned i = 0; i < STATES_PER_THREAD; i++) {
struct anv_state state = anv_state_pool_alloc(job->pool, 16, 16);
job->offsets[i] = state.offset;
}
return NULL;
}
static void run_test()
{
struct anv_physical_device physical_device = { };
struct anv_device device = {};
struct anv_state_pool state_pool;
anv_device_set_physical(&device, &physical_device);
pthread_mutex_init(&device.mutex, NULL);
anv_bo_cache_init(&device.bo_cache, &device);
anv_state_pool_init(&state_pool, &device, "test", 4096, 0, 64);
pthread_barrier_init(&barrier, NULL, NUM_THREADS);
for (unsigned i = 0; i < NUM_THREADS; i++) {
jobs[i].pool = &state_pool;
jobs[i].id = i;
pthread_create(&jobs[i].thread, NULL, alloc_states, &jobs[i]);
}
for (unsigned i = 0; i < NUM_THREADS; i++)
pthread_join(jobs[i].thread, NULL);
/* A list of indices, one per thread */
unsigned next[NUM_THREADS];
memset(next, 0, sizeof(next));
int highest = -1;
while (true) {
/* First, we find which thread has the highest next element */
int thread_max = -1;
int max_thread_idx = -1;
for (unsigned i = 0; i < NUM_THREADS; i++) {
if (next[i] >= STATES_PER_THREAD)
continue;
if (thread_max < jobs[i].offsets[next[i]]) {
thread_max = jobs[i].offsets[next[i]];
max_thread_idx = i;
}
}
/* The only way this can happen is if all of the next[] values are at
* BLOCKS_PER_THREAD, in which case, we're done.
*/
if (thread_max == -1)
break;
/* That next element had better be higher than the previous highest */
ASSERT(jobs[max_thread_idx].offsets[next[max_thread_idx]] > highest);
highest = jobs[max_thread_idx].offsets[next[max_thread_idx]];
next[max_thread_idx]++;
}
anv_state_pool_finish(&state_pool);
anv_bo_cache_finish(&device.bo_cache);
pthread_mutex_destroy(&device.mutex);
}
int main(void)
{
for (unsigned i = 0; i < NUM_RUNS; i++)
run_test();
}

View File

@ -0,0 +1,79 @@
/*
* Copyright © 2018 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_private.h"
#include "test_common.h"
int main(void)
{
struct anv_physical_device physical_device = {
.use_softpin = true,
};
struct anv_device device = {};
struct anv_state_pool state_pool;
anv_device_set_physical(&device, &physical_device);
pthread_mutex_init(&device.mutex, NULL);
anv_bo_cache_init(&device.bo_cache, &device);
anv_state_pool_init(&state_pool, &device, "test", 4096, 0, 4096);
/* Get the size of the underlying block_pool */
struct anv_block_pool *bp = &state_pool.block_pool;
uint64_t pool_size = bp->size;
/* Grab one so the pool has some initial usage */
anv_state_pool_alloc(&state_pool, 16, 16);
/* Grab a state that is the size of the initial allocation */
struct anv_state state = anv_state_pool_alloc(&state_pool, pool_size, 16);
/* The pool must have grown */
ASSERT(bp->size > pool_size);
/* And the state must have been allocated at the end of the original size */
ASSERT(state.offset == pool_size);
/* A new allocation that fits into the returned empty space should have an
* offset within the original pool size
*/
state = anv_state_pool_alloc(&state_pool, 4096, 16);
ASSERT(state.offset + state.alloc_size <= pool_size);
/* We should be able to allocate pool->block_size'd chunks in the returned area
*/
int left_chunks = pool_size / 4096 - 2;
for (int i = 0; i < left_chunks; i++) {
state = anv_state_pool_alloc(&state_pool, 4096, 16);
ASSERT(state.offset + state.alloc_size <= pool_size);
}
/* Now the next chunk to be allocated should make the pool grow again */
pool_size = bp->size;
state = anv_state_pool_alloc(&state_pool, 4096, 16);
ASSERT(bp->size > pool_size);
ASSERT(state.offset == pool_size);
anv_state_pool_finish(&state_pool);
anv_bo_cache_finish(&device.bo_cache);
pthread_mutex_destroy(&device.mutex);
}

View File

@ -0,0 +1,71 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <pthread.h>
struct job {
struct anv_state_pool *pool;
unsigned id;
pthread_t thread;
} jobs[NUM_THREADS];
pthread_barrier_t barrier;
static void *alloc_states(void *void_job)
{
struct job *job = void_job;
const unsigned chunk_size = 1 << (job->id % STATES_PER_THREAD_LOG2);
const unsigned num_chunks = STATES_PER_THREAD / chunk_size;
struct anv_state states[chunk_size];
pthread_barrier_wait(&barrier);
for (unsigned c = 0; c < num_chunks; c++) {
for (unsigned i = 0; i < chunk_size; i++) {
states[i] = anv_state_pool_alloc(job->pool, 16, 16);
memset(states[i].map, 139, 16);
ASSERT(states[i].offset != 0);
}
for (unsigned i = 0; i < chunk_size; i++)
anv_state_pool_free(job->pool, states[i]);
}
return NULL;
}
static void run_state_pool_test(struct anv_state_pool *state_pool)
{
pthread_barrier_init(&barrier, NULL, NUM_THREADS);
for (unsigned i = 0; i < NUM_THREADS; i++) {
jobs[i].pool = state_pool;
jobs[i].id = i;
pthread_create(&jobs[i].thread, NULL, alloc_states, &jobs[i]);
}
for (unsigned i = 0; i < NUM_THREADS; i++)
pthread_join(jobs[i].thread, NULL);
}

View File

@ -0,0 +1,34 @@
/*
* Copyright © 2020 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <stdio.h>
#include <stdlib.h>
#define ASSERT(cond) \
do { \
if (!(cond)) { \
fprintf(stderr, "%s:%d: Test assertion `%s` failed.\n", \
__FILE__, __LINE__, # cond); \
abort(); \
} \
} while (false)