intel: add a hasvk vulkan driver

This new driver is a copy of the current Anv code, it will only load on gfx7/8 platforms though. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Jason Ekstrand <jason.ekstrand@collabora.com> Acked-by: Jason Ekstrand <jason@jlekstrand.net> Acked-by: Jason Ekstrand <jason.ekstrand@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18208>
2024-11-23 18:24:13 +08:00 · 2022-08-03 11:43:36 +03:00 · 2022-08-03 11:43:36 +03:00 · 50013ca9a5
commit 50013ca9a5
parent 0013ef89bf
54 changed files with 49575 additions and 4 deletions
--- a/meson.build
+++ b/meson.build
@ -250,7 +250,7 @@ _vulkan_drivers = get_option('vulkan-drivers')
 if _vulkan_drivers.contains('auto')
  if system_has_kms_drm
    if host_machine.cpu_family().startswith('x86')
-      _vulkan_drivers = ['amd', 'intel', 'swrast']
+      _vulkan_drivers = ['amd', 'intel', 'intel_hasvk', 'swrast']
    elif ['arm', 'aarch64'].contains(host_machine.cpu_family())
      _vulkan_drivers = ['swrast']
    elif ['mips', 'mips64', 'riscv32', 'riscv64'].contains(host_machine.cpu_family())
@ -269,6 +269,7 @@ if _vulkan_drivers.contains('auto')
 endif

 with_intel_vk = _vulkan_drivers.contains('intel')
+with_intel_hasvk = _vulkan_drivers.contains('intel_hasvk')
 with_amd_vk = _vulkan_drivers.contains('amd')
 with_freedreno_vk = _vulkan_drivers.contains('freedreno')
 with_panfrost_vk = _vulkan_drivers.contains('panfrost')
@ -283,7 +284,7 @@ with_microsoft_vk = _vulkan_drivers.contains('microsoft-experimental')
 with_any_vk = _vulkan_drivers.length() != 0

 with_any_broadcom = with_gallium_vc4 or with_gallium_v3d or with_broadcom_vk
-with_any_intel = with_intel_vk or with_gallium_iris or with_gallium_crocus or with_intel_tools
+with_any_intel = with_intel_vk or with_intel_hasvk or with_gallium_iris or with_gallium_crocus or with_intel_tools

 if with_swrast_vk and not with_gallium_softpipe
  error('swrast vulkan requires gallium swrast')
@ -1549,7 +1550,7 @@ endif

 if cc.has_function('dl_iterate_phdr')
  pre_args += '-DHAVE_DL_ITERATE_PHDR'
-elif with_intel_vk
+elif with_intel_vk or with_intel_hasvk
  error('Intel "Anvil" Vulkan driver requires the dl_iterate_phdr function')
 endif

--- a/meson_options.txt
+++ b/meson_options.txt
@ -198,7 +198,7 @@ option(
  'vulkan-drivers',
  type : 'array',
  value : ['auto'],
-  choices : ['auto', 'amd', 'broadcom', 'freedreno', 'imagination-experimental', 'intel', 'microsoft-experimental', 'panfrost', 'swrast', 'virtio-experimental'],
+  choices : ['auto', 'amd', 'broadcom', 'freedreno', 'imagination-experimental', 'intel', 'intel_hasvk', 'microsoft-experimental', 'panfrost', 'swrast', 'virtio-experimental'],
  description : 'List of vulkan drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built'
 )
 option(
--- a/src/intel/meson.build
+++ b/src/intel/meson.build
@ -38,3 +38,6 @@ endif
 if with_intel_vk
  subdir('vulkan')
 endif
+if with_intel_hasvk
+  subdir('vulkan_hasvk')
+endif
--- a/src/intel/vulkan_hasvk/TODO
+++ b/src/intel/vulkan_hasvk/TODO
@ -0,0 +1,13 @@
+Intel Vulkan ToDo
+=================
+
+Missing Features:
+ - Investigate CTS failures on HSW
+ - Sparse memory
+
+Performance:
+ - Multi-{sampled/gfx8,LOD} HiZ
+ - MSAA fast clears
+ - Pushing pieces of UBOs?
+ - Enable guardband clipping
+ - Use soft-pin to avoid relocations
--- a/src/intel/vulkan_hasvk/anv_acceleration_structure.c
+++ b/src/intel/vulkan_hasvk/anv_acceleration_structure.c
@ -0,0 +1,251 @@
+/*
+ * Copyright © 2020 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_private.h"
+
+void
+anv_GetAccelerationStructureBuildSizesKHR(
+    VkDevice                                    device,
+    VkAccelerationStructureBuildTypeKHR         buildType,
+    const VkAccelerationStructureBuildGeometryInfoKHR* pBuildInfo,
+    const uint32_t*                             pMaxPrimitiveCounts,
+    VkAccelerationStructureBuildSizesInfoKHR*   pSizeInfo)
+{
+   assert(pSizeInfo->sType ==
+          VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR);
+
+   pSizeInfo->accelerationStructureSize = 0; /* TODO */
+
+   uint64_t cpu_build_scratch_size = 0; /* TODO */
+   uint64_t cpu_update_scratch_size = cpu_build_scratch_size;
+
+   uint64_t gpu_build_scratch_size = 0; /* TODO */
+   uint64_t gpu_update_scratch_size = gpu_build_scratch_size;
+
+   switch (buildType) {
+   case VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR:
+      pSizeInfo->buildScratchSize = cpu_build_scratch_size;
+      pSizeInfo->updateScratchSize = cpu_update_scratch_size;
+      break;
+
+   case VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR:
+      pSizeInfo->buildScratchSize = gpu_build_scratch_size;
+      pSizeInfo->updateScratchSize = gpu_update_scratch_size;
+      break;
+
+   case VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_OR_DEVICE_KHR:
+      pSizeInfo->buildScratchSize = MAX2(cpu_build_scratch_size,
+                                         gpu_build_scratch_size);
+      pSizeInfo->updateScratchSize = MAX2(cpu_update_scratch_size,
+                                          gpu_update_scratch_size);
+      break;
+
+   default:
+      unreachable("Invalid acceleration structure build type");
+   }
+}
+
+VkResult
+anv_CreateAccelerationStructureKHR(
+    VkDevice                                    _device,
+    const VkAccelerationStructureCreateInfoKHR* pCreateInfo,
+    const VkAllocationCallbacks*                pAllocator,
+    VkAccelerationStructureKHR*                 pAccelerationStructure)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer);
+   struct anv_acceleration_structure *accel;
+
+   accel = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*accel), 8,
+                      VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (accel == NULL)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   vk_object_base_init(&device->vk, &accel->base,
+                       VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR);
+
+   accel->size = pCreateInfo->size;
+   accel->address = anv_address_add(buffer->address, pCreateInfo->offset);
+
+   *pAccelerationStructure = anv_acceleration_structure_to_handle(accel);
+
+   return VK_SUCCESS;
+}
+
+void
+anv_DestroyAccelerationStructureKHR(
+    VkDevice                                    _device,
+    VkAccelerationStructureKHR                  accelerationStructure,
+    const VkAllocationCallbacks*                pAllocator)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   ANV_FROM_HANDLE(anv_acceleration_structure, accel, accelerationStructure);
+
+   if (!accel)
+      return;
+
+   vk_object_base_finish(&accel->base);
+   vk_free2(&device->vk.alloc, pAllocator, accel);
+}
+
+VkDeviceAddress
+anv_GetAccelerationStructureDeviceAddressKHR(
+    VkDevice                                    device,
+    const VkAccelerationStructureDeviceAddressInfoKHR* pInfo)
+{
+   ANV_FROM_HANDLE(anv_acceleration_structure, accel,
+                   pInfo->accelerationStructure);
+
+   assert(!anv_address_is_null(accel->address));
+   assert(anv_bo_is_pinned(accel->address.bo));
+
+   return anv_address_physical(accel->address);
+}
+
+void
+anv_GetDeviceAccelerationStructureCompatibilityKHR(
+    VkDevice                                    device,
+    const VkAccelerationStructureVersionInfoKHR* pVersionInfo,
+    VkAccelerationStructureCompatibilityKHR*    pCompatibility)
+{
+   unreachable("Unimplemented");
+}
+
+VkResult
+anv_BuildAccelerationStructuresKHR(
+    VkDevice                                    _device,
+    VkDeferredOperationKHR                      deferredOperation,
+    uint32_t                                    infoCount,
+    const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
+    const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   unreachable("Unimplemented");
+   return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
+}
+
+VkResult
+anv_CopyAccelerationStructureKHR(
+    VkDevice                                    _device,
+    VkDeferredOperationKHR                      deferredOperation,
+    const VkCopyAccelerationStructureInfoKHR*   pInfo)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   unreachable("Unimplemented");
+   return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
+}
+
+VkResult
+anv_CopyAccelerationStructureToMemoryKHR(
+    VkDevice                                    _device,
+    VkDeferredOperationKHR                      deferredOperation,
+    const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   unreachable("Unimplemented");
+   return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
+}
+
+VkResult
+anv_CopyMemoryToAccelerationStructureKHR(
+    VkDevice                                    _device,
+    VkDeferredOperationKHR                      deferredOperation,
+    const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   unreachable("Unimplemented");
+   return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
+}
+
+VkResult
+anv_WriteAccelerationStructuresPropertiesKHR(
+    VkDevice                                    _device,
+    uint32_t                                    accelerationStructureCount,
+    const VkAccelerationStructureKHR*           pAccelerationStructures,
+    VkQueryType                                 queryType,
+    size_t                                      dataSize,
+    void*                                       pData,
+    size_t                                      stride)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   unreachable("Unimplemented");
+   return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
+}
+
+void
+anv_CmdBuildAccelerationStructuresKHR(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    infoCount,
+    const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
+    const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos)
+{
+   unreachable("Unimplemented");
+}
+
+void
+anv_CmdBuildAccelerationStructuresIndirectKHR(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    infoCount,
+    const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
+    const VkDeviceAddress*                      pIndirectDeviceAddresses,
+    const uint32_t*                             pIndirectStrides,
+    const uint32_t* const*                      ppMaxPrimitiveCounts)
+{
+   unreachable("Unimplemented");
+}
+
+void
+anv_CmdCopyAccelerationStructureKHR(
+    VkCommandBuffer                             commandBuffer,
+    const VkCopyAccelerationStructureInfoKHR*   pInfo)
+{
+   unreachable("Unimplemented");
+}
+
+void
+anv_CmdCopyAccelerationStructureToMemoryKHR(
+    VkCommandBuffer                             commandBuffer,
+    const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo)
+{
+   unreachable("Unimplemented");
+}
+
+void
+anv_CmdCopyMemoryToAccelerationStructureKHR(
+    VkCommandBuffer                             commandBuffer,
+    const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo)
+{
+   unreachable("Unimplemented");
+}
+
+void
+anv_CmdWriteAccelerationStructuresPropertiesKHR(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    accelerationStructureCount,
+    const VkAccelerationStructureKHR*           pAccelerationStructures,
+    VkQueryType                                 queryType,
+    VkQueryPool                                 queryPool,
+    uint32_t                                    firstQuery)
+{
+   unreachable("Unimplemented");
+}
--- a/src/intel/vulkan_hasvk/anv_allocator.c
+++ b/src/intel/vulkan_hasvk/anv_allocator.c
--- a/src/intel/vulkan_hasvk/anv_android.c
+++ b/src/intel/vulkan_hasvk/anv_android.c
@ -0,0 +1,792 @@
+/*
+ * Copyright © 2017, Google Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <hardware/gralloc.h>
+
+#if ANDROID_API_LEVEL >= 26
+#include <hardware/gralloc1.h>
+#endif
+
+#include <hardware/hardware.h>
+#include <hardware/hwvulkan.h>
+#include <vulkan/vk_android_native_buffer.h>
+#include <vulkan/vk_icd.h>
+#include <sync/sync.h>
+
+#include "anv_private.h"
+#include "vk_common_entrypoints.h"
+#include "vk_util.h"
+
+static int anv_hal_open(const struct hw_module_t* mod, const char* id, struct hw_device_t** dev);
+static int anv_hal_close(struct hw_device_t *dev);
+
+static void UNUSED
+static_asserts(void)
+{
+   STATIC_ASSERT(HWVULKAN_DISPATCH_MAGIC == ICD_LOADER_MAGIC);
+}
+
+PUBLIC struct hwvulkan_module_t HAL_MODULE_INFO_SYM = {
+   .common = {
+      .tag = HARDWARE_MODULE_TAG,
+      .module_api_version = HWVULKAN_MODULE_API_VERSION_0_1,
+      .hal_api_version = HARDWARE_MAKE_API_VERSION(1, 0),
+      .id = HWVULKAN_HARDWARE_MODULE_ID,
+      .name = "Intel Vulkan HAL",
+      .author = "Intel",
+      .methods = &(hw_module_methods_t) {
+         .open = anv_hal_open,
+      },
+   },
+};
+
+/* If any bits in test_mask are set, then unset them and return true. */
+static inline bool
+unmask32(uint32_t *inout_mask, uint32_t test_mask)
+{
+   uint32_t orig_mask = *inout_mask;
+   *inout_mask &= ~test_mask;
+   return *inout_mask != orig_mask;
+}
+
+static int
+anv_hal_open(const struct hw_module_t* mod, const char* id,
+             struct hw_device_t** dev)
+{
+   assert(mod == &HAL_MODULE_INFO_SYM.common);
+   assert(strcmp(id, HWVULKAN_DEVICE_0) == 0);
+
+   hwvulkan_device_t *hal_dev = malloc(sizeof(*hal_dev));
+   if (!hal_dev)
+      return -1;
+
+   *hal_dev = (hwvulkan_device_t) {
+      .common = {
+         .tag = HARDWARE_DEVICE_TAG,
+         .version = HWVULKAN_DEVICE_API_VERSION_0_1,
+         .module = &HAL_MODULE_INFO_SYM.common,
+         .close = anv_hal_close,
+      },
+     .EnumerateInstanceExtensionProperties = anv_EnumerateInstanceExtensionProperties,
+     .CreateInstance = anv_CreateInstance,
+     .GetInstanceProcAddr = anv_GetInstanceProcAddr,
+   };
+
+   *dev = &hal_dev->common;
+   return 0;
+}
+
+static int
+anv_hal_close(struct hw_device_t *dev)
+{
+   /* hwvulkan.h claims that hw_device_t::close() is never called. */
+   return -1;
+}
+
+#if ANDROID_API_LEVEL >= 26
+#include <vndk/hardware_buffer.h>
+/* See i915_private_android_types.h in minigbm. */
+#define HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL 0x100
+
+enum {
+   /* Usage bit equal to GRALLOC_USAGE_HW_CAMERA_MASK */
+   BUFFER_USAGE_CAMERA_MASK = 0x00060000U,
+};
+
+inline VkFormat
+vk_format_from_android(unsigned android_format, unsigned android_usage)
+{
+   switch (android_format) {
+   case AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM:
+      return VK_FORMAT_R8G8B8A8_UNORM;
+   case AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM:
+   case AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM:
+      return VK_FORMAT_R8G8B8_UNORM;
+   case AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM:
+      return VK_FORMAT_R5G6B5_UNORM_PACK16;
+   case AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT:
+      return VK_FORMAT_R16G16B16A16_SFLOAT;
+   case AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM:
+      return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
+   case AHARDWAREBUFFER_FORMAT_Y8Cb8Cr8_420:
+   case HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL:
+      return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
+   case AHARDWAREBUFFER_FORMAT_IMPLEMENTATION_DEFINED:
+      if (android_usage & BUFFER_USAGE_CAMERA_MASK)
+         return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
+      else
+         return VK_FORMAT_R8G8B8_UNORM;
+   case AHARDWAREBUFFER_FORMAT_BLOB:
+   default:
+      return VK_FORMAT_UNDEFINED;
+   }
+}
+
+static inline unsigned
+android_format_from_vk(unsigned vk_format)
+{
+   switch (vk_format) {
+   case VK_FORMAT_R8G8B8A8_UNORM:
+      return AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM;
+   case VK_FORMAT_R8G8B8_UNORM:
+      return AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM;
+   case VK_FORMAT_R5G6B5_UNORM_PACK16:
+      return AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM;
+   case VK_FORMAT_R16G16B16A16_SFLOAT:
+      return AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT;
+   case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
+      return AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM;
+   case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
+#ifdef HAVE_CROS_GRALLOC
+      return AHARDWAREBUFFER_FORMAT_Y8Cb8Cr8_420;
+#else
+      return HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL;
+#endif
+   default:
+      return AHARDWAREBUFFER_FORMAT_BLOB;
+   }
+}
+
+static VkFormatFeatureFlags
+features2_to_features(VkFormatFeatureFlags2 features2)
+{
+   return features2 & VK_ALL_FORMAT_FEATURE_FLAG_BITS;
+}
+
+static VkResult
+get_ahw_buffer_format_properties2(
+   VkDevice device_h,
+   const struct AHardwareBuffer *buffer,
+   VkAndroidHardwareBufferFormatProperties2ANDROID *pProperties)
+{
+   ANV_FROM_HANDLE(anv_device, device, device_h);
+
+   /* Get a description of buffer contents . */
+   AHardwareBuffer_Desc desc;
+   AHardwareBuffer_describe(buffer, &desc);
+
+   /* Verify description. */
+   uint64_t gpu_usage =
+      AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE |
+      AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT |
+      AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
+
+   /* "Buffer must be a valid Android hardware buffer object with at least
+    * one of the AHARDWAREBUFFER_USAGE_GPU_* usage flags."
+    */
+   if (!(desc.usage & (gpu_usage)))
+      return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+
+   /* Fill properties fields based on description. */
+   VkAndroidHardwareBufferFormatProperties2ANDROID *p = pProperties;
+
+   p->format = vk_format_from_android(desc.format, desc.usage);
+
+   const struct anv_format *anv_format = anv_get_format(p->format);
+   p->externalFormat = (uint64_t) (uintptr_t) anv_format;
+
+   /* Default to OPTIMAL tiling but set to linear in case
+    * of AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER usage.
+    */
+   VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL;
+
+   if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER)
+      tiling = VK_IMAGE_TILING_LINEAR;
+
+   p->formatFeatures =
+      anv_get_image_format_features2(device->info, p->format, anv_format,
+                                     tiling, NULL);
+
+   /* "Images can be created with an external format even if the Android hardware
+    *  buffer has a format which has an equivalent Vulkan format to enable
+    *  consistent handling of images from sources that might use either category
+    *  of format. However, all images created with an external format are subject
+    *  to the valid usage requirements associated with external formats, even if
+    *  the Android hardware buffer’s format has a Vulkan equivalent."
+    *
+    * "The formatFeatures member *must* include
+    *  VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT and at least one of
+    *  VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT or
+    *  VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT"
+    */
+   p->formatFeatures |=
+      VK_FORMAT_FEATURE_2_MIDPOINT_CHROMA_SAMPLES_BIT;
+
+   /* "Implementations may not always be able to determine the color model,
+    *  numerical range, or chroma offsets of the image contents, so the values
+    *  in VkAndroidHardwareBufferFormatPropertiesANDROID are only suggestions.
+    *  Applications should treat these values as sensible defaults to use in
+    *  the absence of more reliable information obtained through some other
+    *  means."
+    */
+   p->samplerYcbcrConversionComponents.r = VK_COMPONENT_SWIZZLE_IDENTITY;
+   p->samplerYcbcrConversionComponents.g = VK_COMPONENT_SWIZZLE_IDENTITY;
+   p->samplerYcbcrConversionComponents.b = VK_COMPONENT_SWIZZLE_IDENTITY;
+   p->samplerYcbcrConversionComponents.a = VK_COMPONENT_SWIZZLE_IDENTITY;
+
+   p->suggestedYcbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601;
+   p->suggestedYcbcrRange = VK_SAMPLER_YCBCR_RANGE_ITU_FULL;
+
+   p->suggestedXChromaOffset = VK_CHROMA_LOCATION_MIDPOINT;
+   p->suggestedYChromaOffset = VK_CHROMA_LOCATION_MIDPOINT;
+
+   return VK_SUCCESS;
+}
+
+VkResult
+anv_GetAndroidHardwareBufferPropertiesANDROID(
+   VkDevice device_h,
+   const struct AHardwareBuffer *buffer,
+   VkAndroidHardwareBufferPropertiesANDROID *pProperties)
+{
+   ANV_FROM_HANDLE(anv_device, dev, device_h);
+
+   VkAndroidHardwareBufferFormatPropertiesANDROID *format_prop =
+      vk_find_struct(pProperties->pNext,
+                     ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_ANDROID);
+   /* Fill format properties of an Android hardware buffer. */
+   if (format_prop) {
+      VkAndroidHardwareBufferFormatProperties2ANDROID format_prop2 = {
+         .sType = VK_STRUCTURE_TYPE_ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_2_ANDROID,
+      };
+      get_ahw_buffer_format_properties2(device_h, buffer, &format_prop2);
+
+      format_prop->format                 = format_prop2.format;
+      format_prop->externalFormat         = format_prop2.externalFormat;
+      format_prop->formatFeatures         =
+         features2_to_features(format_prop2.formatFeatures);
+      format_prop->samplerYcbcrConversionComponents =
+         format_prop2.samplerYcbcrConversionComponents;
+      format_prop->suggestedYcbcrModel    = format_prop2.suggestedYcbcrModel;
+      format_prop->suggestedYcbcrRange    = format_prop2.suggestedYcbcrRange;
+      format_prop->suggestedXChromaOffset = format_prop2.suggestedXChromaOffset;
+      format_prop->suggestedYChromaOffset = format_prop2.suggestedYChromaOffset;
+   }
+
+   VkAndroidHardwareBufferFormatProperties2ANDROID *format_prop2 =
+      vk_find_struct(pProperties->pNext,
+                     ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_2_ANDROID);
+   if (format_prop2)
+      get_ahw_buffer_format_properties2(device_h, buffer, format_prop2);
+
+   /* NOTE - We support buffers with only one handle but do not error on
+    * multiple handle case. Reason is that we want to support YUV formats
+    * where we have many logical planes but they all point to the same
+    * buffer, like is the case with VK_FORMAT_G8_B8R8_2PLANE_420_UNORM.
+    */
+   const native_handle_t *handle =
+      AHardwareBuffer_getNativeHandle(buffer);
+   int dma_buf = (handle && handle->numFds) ? handle->data[0] : -1;
+   if (dma_buf < 0)
+      return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+
+   /* All memory types. */
+   uint32_t memory_types = (1ull << dev->physical->memory.type_count) - 1;
+
+   pProperties->allocationSize = lseek(dma_buf, 0, SEEK_END);
+   pProperties->memoryTypeBits = memory_types;
+
+   return VK_SUCCESS;
+}
+
+VkResult
+anv_GetMemoryAndroidHardwareBufferANDROID(
+   VkDevice device_h,
+   const VkMemoryGetAndroidHardwareBufferInfoANDROID *pInfo,
+   struct AHardwareBuffer **pBuffer)
+{
+   ANV_FROM_HANDLE(anv_device_memory, mem, pInfo->memory);
+
+   /* Some quotes from Vulkan spec:
+    *
+    * "If the device memory was created by importing an Android hardware
+    * buffer, vkGetMemoryAndroidHardwareBufferANDROID must return that same
+    * Android hardware buffer object."
+    *
+    * "VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID must
+    * have been included in VkExportMemoryAllocateInfo::handleTypes when
+    * memory was created."
+    */
+   if (mem->ahw) {
+      *pBuffer = mem->ahw;
+      /* Increase refcount. */
+      AHardwareBuffer_acquire(mem->ahw);
+      return VK_SUCCESS;
+   }
+
+   return VK_ERROR_OUT_OF_HOST_MEMORY;
+}
+
+#endif
+
+/* Construct ahw usage mask from image usage bits, see
+ * 'AHardwareBuffer Usage Equivalence' in Vulkan spec.
+ */
+uint64_t
+anv_ahw_usage_from_vk_usage(const VkImageCreateFlags vk_create,
+                            const VkImageUsageFlags vk_usage)
+{
+   uint64_t ahw_usage = 0;
+#if ANDROID_API_LEVEL >= 26
+   if (vk_usage & VK_IMAGE_USAGE_SAMPLED_BIT)
+      ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE;
+
+   if (vk_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)
+      ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE;
+
+   if (vk_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)
+      ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT;
+
+   if (vk_create & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)
+      ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP;
+
+   if (vk_create & VK_IMAGE_CREATE_PROTECTED_BIT)
+      ahw_usage |= AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT;
+
+   /* No usage bits set - set at least one GPU usage. */
+   if (ahw_usage == 0)
+      ahw_usage = AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE;
+#endif
+   return ahw_usage;
+}
+
+/*
+ * Called from anv_AllocateMemory when import AHardwareBuffer.
+ */
+VkResult
+anv_import_ahw_memory(VkDevice device_h,
+                      struct anv_device_memory *mem,
+                      const VkImportAndroidHardwareBufferInfoANDROID *info)
+{
+#if ANDROID_API_LEVEL >= 26
+   ANV_FROM_HANDLE(anv_device, device, device_h);
+
+   /* Import from AHardwareBuffer to anv_device_memory. */
+   const native_handle_t *handle =
+      AHardwareBuffer_getNativeHandle(info->buffer);
+
+   /* NOTE - We support buffers with only one handle but do not error on
+    * multiple handle case. Reason is that we want to support YUV formats
+    * where we have many logical planes but they all point to the same
+    * buffer, like is the case with VK_FORMAT_G8_B8R8_2PLANE_420_UNORM.
+    */
+   int dma_buf = (handle && handle->numFds) ? handle->data[0] : -1;
+   if (dma_buf < 0)
+      return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+
+   VkResult result = anv_device_import_bo(device, dma_buf, 0,
+                                          0 /* client_address */,
+                                          &mem->bo);
+   assert(result == VK_SUCCESS);
+
+   /* "If the vkAllocateMemory command succeeds, the implementation must
+    * acquire a reference to the imported hardware buffer, which it must
+    * release when the device memory object is freed. If the command fails,
+    * the implementation must not retain a reference."
+    */
+   AHardwareBuffer_acquire(info->buffer);
+   mem->ahw = info->buffer;
+
+   return VK_SUCCESS;
+#else
+   return VK_ERROR_EXTENSION_NOT_PRESENT;
+#endif
+}
+
+VkResult
+anv_create_ahw_memory(VkDevice device_h,
+                      struct anv_device_memory *mem,
+                      const VkMemoryAllocateInfo *pAllocateInfo)
+{
+#if ANDROID_API_LEVEL >= 26
+   const VkMemoryDedicatedAllocateInfo *dedicated_info =
+      vk_find_struct_const(pAllocateInfo->pNext,
+                           MEMORY_DEDICATED_ALLOCATE_INFO);
+
+   uint32_t w = 0;
+   uint32_t h = 1;
+   uint32_t layers = 1;
+   uint32_t format = 0;
+   uint64_t usage = 0;
+
+   /* If caller passed dedicated information. */
+   if (dedicated_info && dedicated_info->image) {
+      ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
+      w = image->vk.extent.width;
+      h = image->vk.extent.height;
+      layers = image->vk.array_layers;
+      format = android_format_from_vk(image->vk.format);
+      usage = anv_ahw_usage_from_vk_usage(image->vk.create_flags, image->vk.usage);
+   } else if (dedicated_info && dedicated_info->buffer) {
+      ANV_FROM_HANDLE(anv_buffer, buffer, dedicated_info->buffer);
+      w = buffer->vk.size;
+      format = AHARDWAREBUFFER_FORMAT_BLOB;
+      usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN |
+              AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN;
+   } else {
+      w = pAllocateInfo->allocationSize;
+      format = AHARDWAREBUFFER_FORMAT_BLOB;
+      usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN |
+              AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN;
+   }
+
+   struct AHardwareBuffer *ahw = NULL;
+   struct AHardwareBuffer_Desc desc = {
+      .width = w,
+      .height = h,
+      .layers = layers,
+      .format = format,
+      .usage = usage,
+    };
+
+   if (AHardwareBuffer_allocate(&desc, &ahw) != 0)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   const VkImportAndroidHardwareBufferInfoANDROID import_info = {
+      .buffer = ahw,
+   };
+   VkResult result = anv_import_ahw_memory(device_h, mem, &import_info);
+
+   /* Release a reference to avoid leak for AHB allocation. */
+   AHardwareBuffer_release(ahw);
+
+   return result;
+#else
+   return VK_ERROR_EXTENSION_NOT_PRESENT;
+#endif
+
+}
+
+VkResult
+anv_image_init_from_gralloc(struct anv_device *device,
+                            struct anv_image *image,
+                            const VkImageCreateInfo *base_info,
+                            const VkNativeBufferANDROID *gralloc_info)
+{
+   struct anv_bo *bo = NULL;
+   VkResult result;
+
+   struct anv_image_create_info anv_info = {
+      .vk_info = base_info,
+      .isl_extra_usage_flags = ISL_SURF_USAGE_DISABLE_AUX_BIT,
+   };
+
+   if (gralloc_info->handle->numFds != 1) {
+      return vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
+                       "VkNativeBufferANDROID::handle::numFds is %d, "
+                       "expected 1", gralloc_info->handle->numFds);
+   }
+
+   /* Do not close the gralloc handle's dma_buf. The lifetime of the dma_buf
+    * must exceed that of the gralloc handle, and we do not own the gralloc
+    * handle.
+    */
+   int dma_buf = gralloc_info->handle->data[0];
+
+   /* We need to set the WRITE flag on window system buffers so that GEM will
+    * know we're writing to them and synchronize uses on other rings (for
+    * example, if the display server uses the blitter ring).
+    *
+    * If this function fails and if the imported bo was resident in the cache,
+    * we should avoid updating the bo's flags. Therefore, we defer updating
+    * the flags until success is certain.
+    *
+    */
+   result = anv_device_import_bo(device, dma_buf,
+                                 ANV_BO_ALLOC_IMPLICIT_SYNC |
+                                 ANV_BO_ALLOC_IMPLICIT_WRITE,
+                                 0 /* client_address */,
+                                 &bo);
+   if (result != VK_SUCCESS) {
+      return vk_errorf(device, result,
+                       "failed to import dma-buf from VkNativeBufferANDROID");
+   }
+
+   enum isl_tiling tiling;
+   result = anv_device_get_bo_tiling(device, bo, &tiling);
+   if (result != VK_SUCCESS) {
+      return vk_errorf(device, result,
+                       "failed to get tiling from VkNativeBufferANDROID");
+   }
+   anv_info.isl_tiling_flags = 1u << tiling;
+
+   enum isl_format format = anv_get_isl_format(device->info,
+                                               base_info->format,
+                                               VK_IMAGE_ASPECT_COLOR_BIT,
+                                               base_info->tiling);
+   assert(format != ISL_FORMAT_UNSUPPORTED);
+
+   result = anv_image_init(device, image, &anv_info);
+   if (result != VK_SUCCESS)
+      goto fail_init;
+
+   VkMemoryRequirements2 mem_reqs = {
+      .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
+   };
+
+   anv_image_get_memory_requirements(device, image, image->vk.aspects,
+                                     &mem_reqs);
+
+   VkDeviceSize aligned_image_size =
+      align_u64(mem_reqs.memoryRequirements.size,
+                mem_reqs.memoryRequirements.alignment);
+
+   if (bo->size < aligned_image_size) {
+      result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
+                         "dma-buf from VkNativeBufferANDROID is too small for "
+                         "VkImage: %"PRIu64"B < %"PRIu64"B",
+                         bo->size, aligned_image_size);
+      goto fail_size;
+   }
+
+   assert(!image->disjoint);
+   assert(image->n_planes == 1);
+   assert(image->planes[0].primary_surface.memory_range.binding ==
+          ANV_IMAGE_MEMORY_BINDING_MAIN);
+   assert(image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN].address.bo == NULL);
+   assert(image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN].address.offset == 0);
+   image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN].address.bo = bo;
+   image->from_gralloc = true;
+
+   return VK_SUCCESS;
+
+ fail_size:
+   anv_image_finish(image);
+ fail_init:
+   anv_device_release_bo(device, bo);
+
+   return result;
+}
+
+VkResult
+anv_image_bind_from_gralloc(struct anv_device *device,
+                            struct anv_image *image,
+                            const VkNativeBufferANDROID *gralloc_info)
+{
+   /* Do not close the gralloc handle's dma_buf. The lifetime of the dma_buf
+    * must exceed that of the gralloc handle, and we do not own the gralloc
+    * handle.
+    */
+   int dma_buf = gralloc_info->handle->data[0];
+
+   /* We need to set the WRITE flag on window system buffers so that GEM will
+    * know we're writing to them and synchronize uses on other rings (for
+    * example, if the display server uses the blitter ring).
+    *
+    * If this function fails and if the imported bo was resident in the cache,
+    * we should avoid updating the bo's flags. Therefore, we defer updating
+    * the flags until success is certain.
+    *
+    */
+   struct anv_bo *bo = NULL;
+   VkResult result = anv_device_import_bo(device, dma_buf,
+                                          ANV_BO_ALLOC_IMPLICIT_SYNC |
+                                          ANV_BO_ALLOC_IMPLICIT_WRITE,
+                                          0 /* client_address */,
+                                          &bo);
+   if (result != VK_SUCCESS) {
+      return vk_errorf(device, result,
+                       "failed to import dma-buf from VkNativeBufferANDROID");
+   }
+
+   uint64_t img_size = image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN].memory_range.size;
+   if (img_size < bo->size) {
+      result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
+                         "dma-buf from VkNativeBufferANDROID is too small for "
+                         "VkImage: %"PRIu64"B < %"PRIu64"B",
+                         bo->size, img_size);
+      anv_device_release_bo(device, bo);
+      return result;
+   }
+
+   assert(!image->disjoint);
+   assert(image->n_planes == 1);
+   assert(image->planes[0].primary_surface.memory_range.binding ==
+          ANV_IMAGE_MEMORY_BINDING_MAIN);
+   assert(image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN].address.bo == NULL);
+   assert(image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN].address.offset == 0);
+   image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN].address.bo = bo;
+   image->from_gralloc = true;
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+format_supported_with_usage(VkDevice device_h, VkFormat format,
+                            VkImageUsageFlags imageUsage)
+{
+   ANV_FROM_HANDLE(anv_device, device, device_h);
+   VkPhysicalDevice phys_dev_h = anv_physical_device_to_handle(device->physical);
+   VkResult result;
+
+   const VkPhysicalDeviceImageFormatInfo2 image_format_info = {
+      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
+      .format = format,
+      .type = VK_IMAGE_TYPE_2D,
+      .tiling = VK_IMAGE_TILING_OPTIMAL,
+      .usage = imageUsage,
+   };
+
+   VkImageFormatProperties2 image_format_props = {
+      .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
+   };
+
+   /* Check that requested format and usage are supported. */
+   result = anv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h,
+               &image_format_info, &image_format_props);
+   if (result != VK_SUCCESS) {
+      return vk_errorf(device, result,
+                       "anv_GetPhysicalDeviceImageFormatProperties2 failed "
+                       "inside %s", __func__);
+   }
+   return VK_SUCCESS;
+}
+
+
+static VkResult
+setup_gralloc0_usage(struct anv_device *device, VkFormat format,
+                     VkImageUsageFlags imageUsage, int *grallocUsage)
+{
+   /* WARNING: Android's libvulkan.so hardcodes the VkImageUsageFlags
+    * returned to applications via VkSurfaceCapabilitiesKHR::supportedUsageFlags.
+    * The relevant code in libvulkan/swapchain.cpp contains this fun comment:
+    *
+    *     TODO(jessehall): I think these are right, but haven't thought hard
+    *     about it. Do we need to query the driver for support of any of
+    *     these?
+    *
+    * Any disagreement between this function and the hardcoded
+    * VkSurfaceCapabilitiesKHR:supportedUsageFlags causes tests
+    * dEQP-VK.wsi.android.swapchain.*.image_usage to fail.
+    */
+
+   if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT |
+                             VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
+      *grallocUsage |= GRALLOC_USAGE_HW_RENDER;
+
+   if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+                             VK_IMAGE_USAGE_SAMPLED_BIT |
+                             VK_IMAGE_USAGE_STORAGE_BIT |
+                             VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))
+      *grallocUsage |= GRALLOC_USAGE_HW_TEXTURE;
+
+   /* All VkImageUsageFlags not explicitly checked here are unsupported for
+    * gralloc swapchains.
+    */
+   if (imageUsage != 0) {
+      return vk_errorf(device, VK_ERROR_FORMAT_NOT_SUPPORTED,
+                       "unsupported VkImageUsageFlags(0x%x) for gralloc "
+                       "swapchain", imageUsage);
+   }
+
+   /* The below formats support GRALLOC_USAGE_HW_FB (that is, display
+    * scanout). This short list of formats is univserally supported on Intel
+    * but is incomplete.  The full set of supported formats is dependent on
+    * kernel and hardware.
+    *
+    * FINISHME: Advertise all display-supported formats.
+    */
+   switch (format) {
+      case VK_FORMAT_B8G8R8A8_UNORM:
+      case VK_FORMAT_R5G6B5_UNORM_PACK16:
+      case VK_FORMAT_R8G8B8A8_UNORM:
+      case VK_FORMAT_R8G8B8A8_SRGB:
+         *grallocUsage |= GRALLOC_USAGE_HW_FB |
+                          GRALLOC_USAGE_HW_COMPOSER |
+                          GRALLOC_USAGE_EXTERNAL_DISP;
+         break;
+      default:
+         mesa_logw("%s: unsupported format=%d", __func__, format);
+   }
+
+   if (*grallocUsage == 0)
+      return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+   return VK_SUCCESS;
+}
+
+#if ANDROID_API_LEVEL >= 26
+VkResult anv_GetSwapchainGrallocUsage2ANDROID(
+    VkDevice            device_h,
+    VkFormat            format,
+    VkImageUsageFlags   imageUsage,
+    VkSwapchainImageUsageFlagsANDROID swapchainImageUsage,
+    uint64_t*           grallocConsumerUsage,
+    uint64_t*           grallocProducerUsage)
+{
+   ANV_FROM_HANDLE(anv_device, device, device_h);
+   VkResult result;
+
+   *grallocConsumerUsage = 0;
+   *grallocProducerUsage = 0;
+   mesa_logd("%s: format=%d, usage=0x%x", __func__, format, imageUsage);
+
+   result = format_supported_with_usage(device_h, format, imageUsage);
+   if (result != VK_SUCCESS)
+      return result;
+
+   int32_t grallocUsage = 0;
+   result = setup_gralloc0_usage(device, format, imageUsage, &grallocUsage);
+   if (result != VK_SUCCESS)
+      return result;
+
+   /* Setup gralloc1 usage flags from gralloc0 flags. */
+
+   if (grallocUsage & GRALLOC_USAGE_HW_RENDER) {
+      *grallocProducerUsage |= GRALLOC1_PRODUCER_USAGE_GPU_RENDER_TARGET;
+      *grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_CLIENT_TARGET;
+   }
+
+   if (grallocUsage & GRALLOC_USAGE_HW_TEXTURE) {
+      *grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_GPU_TEXTURE;
+   }
+
+   if (grallocUsage & (GRALLOC_USAGE_HW_FB |
+                       GRALLOC_USAGE_HW_COMPOSER |
+                       GRALLOC_USAGE_EXTERNAL_DISP)) {
+      *grallocProducerUsage |= GRALLOC1_PRODUCER_USAGE_GPU_RENDER_TARGET;
+      *grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_HWCOMPOSER;
+   }
+
+   return VK_SUCCESS;
+}
+#endif
+
+VkResult anv_GetSwapchainGrallocUsageANDROID(
+    VkDevice            device_h,
+    VkFormat            format,
+    VkImageUsageFlags   imageUsage,
+    int*                grallocUsage)
+{
+   ANV_FROM_HANDLE(anv_device, device, device_h);
+   VkResult result;
+
+   *grallocUsage = 0;
+   mesa_logd("%s: format=%d, usage=0x%x", __func__, format, imageUsage);
+
+   result = format_supported_with_usage(device_h, format, imageUsage);
+   if (result != VK_SUCCESS)
+      return result;
+
+   return setup_gralloc0_usage(device, format, imageUsage, grallocUsage);
+}
--- a/src/intel/vulkan_hasvk/anv_android.h
+++ b/src/intel/vulkan_hasvk/anv_android.h
@ -0,0 +1,57 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef ANV_ANDROID_H
+#define ANV_ANDROID_H
+
+#if defined(ANDROID) && ANDROID_API_LEVEL >= 26
+#include <vndk/hardware_buffer.h>
+#endif
+#include <vulkan/vulkan.h>
+#include <vulkan/vulkan_android.h>
+#include <vulkan/vk_android_native_buffer.h>
+
+struct anv_device_memory;
+struct anv_device;
+struct anv_image;
+
+VkResult anv_image_init_from_gralloc(struct anv_device *device,
+                                     struct anv_image *image,
+                                     const VkImageCreateInfo *base_info,
+                                     const VkNativeBufferANDROID *gralloc_info);
+
+VkResult anv_image_bind_from_gralloc(struct anv_device *device,
+                                     struct anv_image *image,
+                                     const VkNativeBufferANDROID *gralloc_info);
+
+uint64_t anv_ahw_usage_from_vk_usage(const VkImageCreateFlags vk_create,
+                                     const VkImageUsageFlags vk_usage);
+
+VkResult anv_import_ahw_memory(VkDevice device_h,
+                               struct anv_device_memory *mem,
+                               const VkImportAndroidHardwareBufferInfoANDROID *info);
+
+VkResult anv_create_ahw_memory(VkDevice device_h,
+                               struct anv_device_memory *mem,
+                               const VkMemoryAllocateInfo *pAllocateInfo);
+#endif /* ANV_ANDROID_H */
--- a/src/intel/vulkan_hasvk/anv_android_stubs.c
+++ b/src/intel/vulkan_hasvk/anv_android_stubs.c
@ -0,0 +1,63 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_android.h"
+
+VkResult
+anv_image_init_from_gralloc(struct anv_device *device,
+                            struct anv_image *image,
+                            const VkImageCreateInfo *base_info,
+                            const VkNativeBufferANDROID *gralloc_info)
+{
+   return VK_ERROR_EXTENSION_NOT_PRESENT;
+}
+
+VkResult anv_image_bind_from_gralloc(struct anv_device *device,
+                                     struct anv_image *image,
+                                     const VkNativeBufferANDROID *gralloc_info)
+{
+   return VK_ERROR_EXTENSION_NOT_PRESENT;
+}
+
+uint64_t
+anv_ahw_usage_from_vk_usage(const VkImageCreateFlags vk_create,
+                            const VkImageUsageFlags vk_usage)
+{
+   return 0;
+}
+
+VkResult
+anv_import_ahw_memory(VkDevice device_h,
+                      struct anv_device_memory *mem,
+                      const VkImportAndroidHardwareBufferInfoANDROID *info)
+{
+   return VK_ERROR_EXTENSION_NOT_PRESENT;
+}
+
+VkResult
+anv_create_ahw_memory(VkDevice device_h,
+                      struct anv_device_memory *mem,
+                      const VkMemoryAllocateInfo *pAllocateInfo)
+{
+   return VK_ERROR_EXTENSION_NOT_PRESENT;
+}
--- a/src/intel/vulkan_hasvk/anv_batch_chain.c
+++ b/src/intel/vulkan_hasvk/anv_batch_chain.c
--- a/src/intel/vulkan_hasvk/anv_blorp.c
+++ b/src/intel/vulkan_hasvk/anv_blorp.c
--- a/src/intel/vulkan_hasvk/anv_bo_sync.c
+++ b/src/intel/vulkan_hasvk/anv_bo_sync.c
@ -0,0 +1,237 @@
+/*
+ * Copyright © 2021 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_private.h"
+
+#include "util/os_time.h"
+
+static struct anv_bo_sync *
+to_anv_bo_sync(struct vk_sync *sync)
+{
+   assert(sync->type == &anv_bo_sync_type);
+   return container_of(sync, struct anv_bo_sync, sync);
+}
+
+static VkResult
+anv_bo_sync_init(struct vk_device *vk_device,
+                 struct vk_sync *vk_sync,
+                 uint64_t initial_value)
+{
+   struct anv_device *device = container_of(vk_device, struct anv_device, vk);
+   struct anv_bo_sync *sync = to_anv_bo_sync(vk_sync);
+
+   sync->state = initial_value ? ANV_BO_SYNC_STATE_SIGNALED :
+                                 ANV_BO_SYNC_STATE_RESET;
+
+   return anv_device_alloc_bo(device, "bo-sync", 4096,
+                              ANV_BO_ALLOC_EXTERNAL |
+                              ANV_BO_ALLOC_IMPLICIT_SYNC,
+                              0 /* explicit_address */,
+                              &sync->bo);
+}
+
+static void
+anv_bo_sync_finish(struct vk_device *vk_device,
+                   struct vk_sync *vk_sync)
+{
+   struct anv_device *device = container_of(vk_device, struct anv_device, vk);
+   struct anv_bo_sync *sync = to_anv_bo_sync(vk_sync);
+
+   anv_device_release_bo(device, sync->bo);
+}
+
+static VkResult
+anv_bo_sync_reset(struct vk_device *vk_device,
+                  struct vk_sync *vk_sync)
+{
+   struct anv_bo_sync *sync = to_anv_bo_sync(vk_sync);
+
+   sync->state = ANV_BO_SYNC_STATE_RESET;
+
+   return VK_SUCCESS;
+}
+
+static int64_t
+anv_get_relative_timeout(uint64_t abs_timeout)
+{
+   uint64_t now = os_time_get_nano();
+
+   /* We don't want negative timeouts.
+    *
+    * DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is
+    * supposed to block indefinitely timeouts < 0.  Unfortunately,
+    * this was broken for a couple of kernel releases.  Since there's
+    * no way to know whether or not the kernel we're using is one of
+    * the broken ones, the best we can do is to clamp the timeout to
+    * INT64_MAX.  This limits the maximum timeout from 584 years to
+    * 292 years - likely not a big deal.
+    */
+   if (abs_timeout < now)
+      return 0;
+
+   uint64_t rel_timeout = abs_timeout - now;
+   if (rel_timeout > (uint64_t) INT64_MAX)
+      rel_timeout = INT64_MAX;
+
+   return rel_timeout;
+}
+
+static VkResult
+anv_bo_sync_wait(struct vk_device *vk_device,
+                 uint32_t wait_count,
+                 const struct vk_sync_wait *waits,
+                 enum vk_sync_wait_flags wait_flags,
+                 uint64_t abs_timeout_ns)
+{
+   struct anv_device *device = container_of(vk_device, struct anv_device, vk);
+   VkResult result;
+
+   uint32_t pending = wait_count;
+   while (pending) {
+      pending = 0;
+      bool signaled = false;
+      for (uint32_t i = 0; i < wait_count; i++) {
+         struct anv_bo_sync *sync = to_anv_bo_sync(waits[i].sync);
+         switch (sync->state) {
+         case ANV_BO_SYNC_STATE_RESET:
+            /* This fence hasn't been submitted yet, we'll catch it the next
+             * time around.  Yes, this may mean we dead-loop but, short of
+             * lots of locking and a condition variable, there's not much that
+             * we can do about that.
+             */
+            assert(!(wait_flags & VK_SYNC_WAIT_PENDING));
+            pending++;
+            continue;
+
+         case ANV_BO_SYNC_STATE_SIGNALED:
+            /* This fence is not pending.  If waitAll isn't set, we can return
+             * early.  Otherwise, we have to keep going.
+             */
+            if (wait_flags & VK_SYNC_WAIT_ANY)
+               return VK_SUCCESS;
+            continue;
+
+         case ANV_BO_SYNC_STATE_SUBMITTED:
+            /* These are the fences we really care about.  Go ahead and wait
+             * on it until we hit a timeout.
+             */
+            if (!(wait_flags & VK_SYNC_WAIT_PENDING)) {
+               uint64_t rel_timeout = anv_get_relative_timeout(abs_timeout_ns);
+               result = anv_device_wait(device, sync->bo, rel_timeout);
+               /* This also covers VK_TIMEOUT */
+               if (result != VK_SUCCESS)
+                  return result;
+
+               sync->state = ANV_BO_SYNC_STATE_SIGNALED;
+               signaled = true;
+            }
+            if (wait_flags & VK_SYNC_WAIT_ANY)
+               return VK_SUCCESS;
+            break;
+
+         default:
+            unreachable("Invalid BO sync state");
+         }
+      }
+
+      if (pending && !signaled) {
+         /* If we've hit this then someone decided to vkWaitForFences before
+          * they've actually submitted any of them to a queue.  This is a
+          * fairly pessimal case, so it's ok to lock here and use a standard
+          * pthreads condition variable.
+          */
+         pthread_mutex_lock(&device->mutex);
+
+         /* It's possible that some of the fences have changed state since the
+          * last time we checked.  Now that we have the lock, check for
+          * pending fences again and don't wait if it's changed.
+          */
+         uint32_t now_pending = 0;
+         for (uint32_t i = 0; i < wait_count; i++) {
+            struct anv_bo_sync *sync = to_anv_bo_sync(waits[i].sync);
+            if (sync->state == ANV_BO_SYNC_STATE_RESET)
+               now_pending++;
+         }
+         assert(now_pending <= pending);
+
+         if (now_pending == pending) {
+            struct timespec abstime = {
+               .tv_sec = abs_timeout_ns / NSEC_PER_SEC,
+               .tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
+            };
+
+            ASSERTED int ret;
+            ret = pthread_cond_timedwait(&device->queue_submit,
+                                         &device->mutex, &abstime);
+            assert(ret != EINVAL);
+            if (os_time_get_nano() >= abs_timeout_ns) {
+               pthread_mutex_unlock(&device->mutex);
+               return VK_TIMEOUT;
+            }
+         }
+
+         pthread_mutex_unlock(&device->mutex);
+      }
+   }
+
+   return VK_SUCCESS;
+}
+
+const struct vk_sync_type anv_bo_sync_type = {
+   .size = sizeof(struct anv_bo_sync),
+   .features = VK_SYNC_FEATURE_BINARY |
+               VK_SYNC_FEATURE_GPU_WAIT |
+               VK_SYNC_FEATURE_GPU_MULTI_WAIT |
+               VK_SYNC_FEATURE_CPU_WAIT |
+               VK_SYNC_FEATURE_CPU_RESET |
+               VK_SYNC_FEATURE_WAIT_ANY |
+               VK_SYNC_FEATURE_WAIT_PENDING,
+   .init = anv_bo_sync_init,
+   .finish = anv_bo_sync_finish,
+   .reset = anv_bo_sync_reset,
+   .wait_many = anv_bo_sync_wait,
+};
+
+VkResult
+anv_create_sync_for_memory(struct vk_device *device,
+                           VkDeviceMemory memory,
+                           bool signal_memory,
+                           struct vk_sync **sync_out)
+{
+   ANV_FROM_HANDLE(anv_device_memory, mem, memory);
+   struct anv_bo_sync *bo_sync;
+
+   bo_sync = vk_zalloc(&device->alloc, sizeof(*bo_sync), 8,
+                       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (bo_sync == NULL)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   bo_sync->sync.type = &anv_bo_sync_type;
+   bo_sync->state = signal_memory ? ANV_BO_SYNC_STATE_RESET :
+                                    ANV_BO_SYNC_STATE_SUBMITTED;
+   bo_sync->bo = anv_bo_ref(mem->bo);
+
+   *sync_out = &bo_sync->sync;
+
+   return VK_SUCCESS;
+}
--- a/src/intel/vulkan_hasvk/anv_cmd_buffer.c
+++ b/src/intel/vulkan_hasvk/anv_cmd_buffer.c
--- a/src/intel/vulkan_hasvk/anv_descriptor_set.c
+++ b/src/intel/vulkan_hasvk/anv_descriptor_set.c
--- a/src/intel/vulkan_hasvk/anv_device.c
+++ b/src/intel/vulkan_hasvk/anv_device.c
--- a/src/intel/vulkan_hasvk/anv_formats.c
+++ b/src/intel/vulkan_hasvk/anv_formats.c
--- a/src/intel/vulkan_hasvk/anv_gem.c
+++ b/src/intel/vulkan_hasvk/anv_gem.c
@ -0,0 +1,405 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "anv_private.h"
+#include "common/intel_defines.h"
+#include "common/intel_gem.h"
+
+/**
+ * Wrapper around DRM_IOCTL_I915_GEM_CREATE.
+ *
+ * Return gem handle, or 0 on failure. Gem handles are never 0.
+ */
+uint32_t
+anv_gem_create(struct anv_device *device, uint64_t size)
+{
+   struct drm_i915_gem_create gem_create = {
+      .size = size,
+   };
+
+   int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create);
+   if (ret != 0) {
+      /* FIXME: What do we do if this fails? */
+      return 0;
+   }
+
+   return gem_create.handle;
+}
+
+void
+anv_gem_close(struct anv_device *device, uint32_t gem_handle)
+{
+   struct drm_gem_close close = {
+      .handle = gem_handle,
+   };
+
+   intel_ioctl(device->fd, DRM_IOCTL_GEM_CLOSE, &close);
+}
+
+uint32_t
+anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,
+                       uint32_t flags, uint32_t num_regions,
+                       struct drm_i915_gem_memory_class_instance *regions)
+{
+   /* Check for invalid flags */
+   assert((flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) == 0);
+
+   struct drm_i915_gem_create_ext_memory_regions ext_regions = {
+      .base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS },
+      .num_regions = num_regions,
+      .regions = (uintptr_t)regions,
+   };
+
+   struct drm_i915_gem_create_ext gem_create = {
+      .size = anv_bo_size,
+      .extensions = (uintptr_t) &ext_regions,
+      .flags = flags,
+   };
+
+   int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE_EXT,
+                         &gem_create);
+   if (ret != 0) {
+      return 0;
+   }
+
+   return gem_create.handle;
+}
+
+/**
+ * Wrapper around DRM_IOCTL_I915_GEM_MMAP. Returns MAP_FAILED on error.
+ */
+static void*
+anv_gem_mmap_offset(struct anv_device *device, uint32_t gem_handle,
+                    uint64_t offset, uint64_t size, uint32_t flags)
+{
+   struct drm_i915_gem_mmap_offset gem_mmap = {
+      .handle = gem_handle,
+      .flags = device->info->has_local_mem ? I915_MMAP_OFFSET_FIXED :
+         (flags & I915_MMAP_WC) ? I915_MMAP_OFFSET_WC : I915_MMAP_OFFSET_WB,
+   };
+   assert(offset == 0);
+
+   /* Get the fake offset back */
+   int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &gem_mmap);
+   if (ret != 0)
+      return MAP_FAILED;
+
+   /* And map it */
+   void *map = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
+                    device->fd, gem_mmap.offset);
+   return map;
+}
+
+static void*
+anv_gem_mmap_legacy(struct anv_device *device, uint32_t gem_handle,
+                    uint64_t offset, uint64_t size, uint32_t flags)
+{
+   assert(!device->info->has_local_mem);
+
+   struct drm_i915_gem_mmap gem_mmap = {
+      .handle = gem_handle,
+      .offset = offset,
+      .size = size,
+      .flags = flags,
+   };
+
+   int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap);
+   if (ret != 0)
+      return MAP_FAILED;
+
+   return (void *)(uintptr_t) gem_mmap.addr_ptr;
+}
+
+/**
+ * Wrapper around DRM_IOCTL_I915_GEM_MMAP. Returns MAP_FAILED on error.
+ */
+void*
+anv_gem_mmap(struct anv_device *device, uint32_t gem_handle,
+             uint64_t offset, uint64_t size, uint32_t flags)
+{
+   void *map;
+   if (device->physical->has_mmap_offset)
+      map = anv_gem_mmap_offset(device, gem_handle, offset, size, flags);
+   else
+      map = anv_gem_mmap_legacy(device, gem_handle, offset, size, flags);
+
+   if (map != MAP_FAILED)
+      VG(VALGRIND_MALLOCLIKE_BLOCK(map, size, 0, 1));
+
+   return map;
+}
+
+/* This is just a wrapper around munmap, but it also notifies valgrind that
+ * this map is no longer valid.  Pair this with anv_gem_mmap().
+ */
+void
+anv_gem_munmap(struct anv_device *device, void *p, uint64_t size)
+{
+   VG(VALGRIND_FREELIKE_BLOCK(p, 0));
+   munmap(p, size);
+}
+
+uint32_t
+anv_gem_userptr(struct anv_device *device, void *mem, size_t size)
+{
+   struct drm_i915_gem_userptr userptr = {
+      .user_ptr = (__u64)((unsigned long) mem),
+      .user_size = size,
+      .flags = 0,
+   };
+
+   if (device->physical->has_userptr_probe)
+      userptr.flags |= I915_USERPTR_PROBE;
+
+   int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
+   if (ret == -1)
+      return 0;
+
+   return userptr.handle;
+}
+
+int
+anv_gem_set_caching(struct anv_device *device,
+                    uint32_t gem_handle, uint32_t caching)
+{
+   struct drm_i915_gem_caching gem_caching = {
+      .handle = gem_handle,
+      .caching = caching,
+   };
+
+   return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_CACHING, &gem_caching);
+}
+
+/**
+ * On error, \a timeout_ns holds the remaining time.
+ */
+int
+anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns)
+{
+   struct drm_i915_gem_wait wait = {
+      .bo_handle = gem_handle,
+      .timeout_ns = *timeout_ns,
+      .flags = 0,
+   };
+
+   int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
+   *timeout_ns = wait.timeout_ns;
+
+   return ret;
+}
+
+int
+anv_gem_execbuffer(struct anv_device *device,
+                   struct drm_i915_gem_execbuffer2 *execbuf)
+{
+   if (execbuf->flags & I915_EXEC_FENCE_OUT)
+      return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2_WR, execbuf);
+   else
+      return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf);
+}
+
+/** Return -1 on error. */
+int
+anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle)
+{
+   if (!device->info->has_tiling_uapi)
+      return -1;
+
+   struct drm_i915_gem_get_tiling get_tiling = {
+      .handle = gem_handle,
+   };
+
+   /* FIXME: On discrete platforms we don't have DRM_IOCTL_I915_GEM_GET_TILING
+    * anymore, so we will need another way to get the tiling. Apparently this
+    * is only used in Android code, so we may need some other way to
+    * communicate the tiling mode.
+    */
+   if (intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling)) {
+      assert(!"Failed to get BO tiling");
+      return -1;
+   }
+
+   return get_tiling.tiling_mode;
+}
+
+int
+anv_gem_set_tiling(struct anv_device *device,
+                   uint32_t gem_handle, uint32_t stride, uint32_t tiling)
+{
+   int ret;
+
+   /* On discrete platforms we don't have DRM_IOCTL_I915_GEM_SET_TILING. So
+    * nothing needs to be done.
+    */
+   if (!device->info->has_tiling_uapi)
+      return 0;
+
+   /* set_tiling overwrites the input on the error path, so we have to open
+    * code intel_ioctl.
+    */
+   do {
+      struct drm_i915_gem_set_tiling set_tiling = {
+         .handle = gem_handle,
+         .tiling_mode = tiling,
+         .stride = stride,
+      };
+
+      ret = ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
+   } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
+
+   return ret;
+}
+
+int
+anv_gem_get_param(int fd, uint32_t param)
+{
+   int tmp;
+
+   drm_i915_getparam_t gp = {
+      .param = param,
+      .value = &tmp,
+   };
+
+   int ret = intel_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
+   if (ret == 0)
+      return tmp;
+
+   return 0;
+}
+
+bool
+anv_gem_has_context_priority(int fd, int priority)
+{
+   return !anv_gem_set_context_param(fd, 0, I915_CONTEXT_PARAM_PRIORITY,
+                                     priority);
+}
+
+int
+anv_gem_create_context(struct anv_device *device)
+{
+   struct drm_i915_gem_context_create create = { 0 };
+
+   int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
+   if (ret == -1)
+      return -1;
+
+   return create.ctx_id;
+}
+
+int
+anv_gem_destroy_context(struct anv_device *device, int context)
+{
+   struct drm_i915_gem_context_destroy destroy = {
+      .ctx_id = context,
+   };
+
+   return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy);
+}
+
+int
+anv_gem_set_context_param(int fd, int context, uint32_t param, uint64_t value)
+{
+   struct drm_i915_gem_context_param p = {
+      .ctx_id = context,
+      .param = param,
+      .value = value,
+   };
+   int err = 0;
+
+   if (intel_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p))
+      err = -errno;
+   return err;
+}
+
+int
+anv_gem_context_get_reset_stats(int fd, int context,
+                                uint32_t *active, uint32_t *pending)
+{
+   struct drm_i915_reset_stats stats = {
+      .ctx_id = context,
+   };
+
+   int ret = intel_ioctl(fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats);
+   if (ret == 0) {
+      *active = stats.batch_active;
+      *pending = stats.batch_pending;
+   }
+
+   return ret;
+}
+
+int
+anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle)
+{
+   struct drm_prime_handle args = {
+      .handle = gem_handle,
+      .flags = DRM_CLOEXEC | DRM_RDWR,
+   };
+
+   int ret = intel_ioctl(device->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
+   if (ret == -1)
+      return -1;
+
+   return args.fd;
+}
+
+uint32_t
+anv_gem_fd_to_handle(struct anv_device *device, int fd)
+{
+   struct drm_prime_handle args = {
+      .fd = fd,
+   };
+
+   int ret = intel_ioctl(device->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args);
+   if (ret == -1)
+      return 0;
+
+   return args.handle;
+}
+
+int
+anv_gem_reg_read(int fd, uint32_t offset, uint64_t *result)
+{
+   struct drm_i915_reg_read args = {
+      .offset = offset
+   };
+
+   int ret = intel_ioctl(fd, DRM_IOCTL_I915_REG_READ, &args);
+
+   *result = args.val;
+   return ret;
+}
+
+struct drm_i915_query_engine_info *
+anv_gem_get_engine_info(int fd)
+{
+   return intel_i915_query_alloc(fd, DRM_I915_QUERY_ENGINE_INFO, NULL);
+}
--- a/src/intel/vulkan_hasvk/anv_gem_stubs.c
+++ b/src/intel/vulkan_hasvk/anv_gem_stubs.c
@ -0,0 +1,187 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <sys/mman.h>
+#include <sys/syscall.h>
+
+#include "util/anon_file.h"
+#include "anv_private.h"
+
+uint32_t
+anv_gem_create(struct anv_device *device, uint64_t size)
+{
+   int fd = os_create_anonymous_file(size, "fake bo");
+   if (fd == -1)
+      return 0;
+
+   assert(fd != 0);
+
+   return fd;
+}
+
+void
+anv_gem_close(struct anv_device *device, uint32_t gem_handle)
+{
+   close(gem_handle);
+}
+
+uint32_t
+anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,
+                       uint32_t flags, uint32_t num_regions,
+                       struct drm_i915_gem_memory_class_instance *regions)
+{
+   return 0;
+}
+
+void*
+anv_gem_mmap(struct anv_device *device, uint32_t gem_handle,
+             uint64_t offset, uint64_t size, uint32_t flags)
+{
+   /* Ignore flags, as they're specific to I915_GEM_MMAP. */
+   (void) flags;
+
+   return mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
+               gem_handle, offset);
+}
+
+/* This is just a wrapper around munmap, but it also notifies valgrind that
+ * this map is no longer valid.  Pair this with anv_gem_mmap().
+ */
+void
+anv_gem_munmap(struct anv_device *device, void *p, uint64_t size)
+{
+   munmap(p, size);
+}
+
+uint32_t
+anv_gem_userptr(struct anv_device *device, void *mem, size_t size)
+{
+   int fd = os_create_anonymous_file(size, "fake bo");
+   if (fd == -1)
+      return 0;
+
+   assert(fd != 0);
+
+   return fd;
+}
+
+int
+anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns)
+{
+   return 0;
+}
+
+int
+anv_gem_execbuffer(struct anv_device *device,
+                   struct drm_i915_gem_execbuffer2 *execbuf)
+{
+   return 0;
+}
+
+int
+anv_gem_set_tiling(struct anv_device *device,
+                   uint32_t gem_handle, uint32_t stride, uint32_t tiling)
+{
+   return 0;
+}
+
+int
+anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle)
+{
+   return 0;
+}
+
+int
+anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle,
+                    uint32_t caching)
+{
+   return 0;
+}
+
+int
+anv_gem_get_param(int fd, uint32_t param)
+{
+   unreachable("Unused");
+}
+
+int
+anv_gem_create_context(struct anv_device *device)
+{
+   unreachable("Unused");
+}
+
+int
+anv_gem_destroy_context(struct anv_device *device, int context)
+{
+   unreachable("Unused");
+}
+
+int
+anv_gem_set_context_param(int fd, int context, uint32_t param, uint64_t value)
+{
+   unreachable("Unused");
+}
+
+bool
+anv_gem_has_context_priority(int fd, int priority)
+{
+   unreachable("Unused");
+}
+
+int
+anv_gem_context_get_reset_stats(int fd, int context,
+                                uint32_t *active, uint32_t *pending)
+{
+   unreachable("Unused");
+}
+
+int
+anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle)
+{
+   unreachable("Unused");
+}
+
+uint32_t
+anv_gem_fd_to_handle(struct anv_device *device, int fd)
+{
+   unreachable("Unused");
+}
+
+int
+anv_i915_query(int fd, uint64_t query_id, void *buffer,
+               int32_t *buffer_len)
+{
+   unreachable("Unused");
+}
+
+struct drm_i915_query_engine_info *
+anv_gem_get_engine_info(int fd)
+{
+   unreachable("Unused");
+}
+
+int
+anv_gem_reg_read(int fd, uint32_t offset, uint64_t *result)
+{
+   unreachable("Unused");
+}
--- a/src/intel/vulkan_hasvk/anv_genX.h
+++ b/src/intel/vulkan_hasvk/anv_genX.h
@ -0,0 +1,180 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/*
+ * NOTE: The header can be included multiple times, from the same file.
+ */
+
+/*
+ * Gen-specific function declarations.  This header must *not* be included
+ * directly.  Instead, it is included multiple times by anv_private.h.
+ *
+ * In this header file, the usual genx() macro is available.
+ */
+
+#ifndef ANV_PRIVATE_H
+#error This file is included by means other than anv_private.h
+#endif
+
+struct intel_sample_positions;
+
+typedef struct VkRenderingSelfDependencyInfoMESA VkRenderingSelfDependencyInfoMESA;
+
+extern const uint32_t genX(vk_to_intel_cullmode)[];
+
+extern const uint32_t genX(vk_to_intel_front_face)[];
+
+extern const uint32_t genX(vk_to_intel_primitive_type)[];
+
+extern const uint32_t genX(vk_to_intel_compare_op)[];
+
+extern const uint32_t genX(vk_to_intel_stencil_op)[];
+
+extern const uint32_t genX(vk_to_intel_logic_op)[];
+
+void genX(init_physical_device_state)(struct anv_physical_device *device);
+
+VkResult genX(init_device_state)(struct anv_device *device);
+
+void genX(init_cps_device_state)(struct anv_device *device);
+
+void genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer);
+
+void genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer);
+
+void genX(cmd_buffer_emit_gfx7_depth_flush)(struct anv_cmd_buffer *cmd_buffer);
+
+void genX(cmd_buffer_emit_gfx12_depth_wa)(struct anv_cmd_buffer *cmd_buffer,
+                                          const struct isl_surf *surf);
+
+void genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(struct anv_cmd_buffer *cmd_buffer,
+                                                    int vb_index,
+                                                    struct anv_address vb_address,
+                                                    uint32_t vb_size);
+void genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(struct anv_cmd_buffer *cmd_buffer,
+                                                         uint32_t access_type,
+                                                         uint64_t vb_used);
+
+void genX(cmd_buffer_emit_hashing_mode)(struct anv_cmd_buffer *cmd_buffer,
+                                        unsigned width, unsigned height,
+                                        unsigned scale);
+
+void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer);
+void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer);
+
+enum anv_pipe_bits
+genX(emit_apply_pipe_flushes)(struct anv_batch *batch,
+                              struct anv_device *device,
+                              uint32_t current_pipeline,
+                              enum anv_pipe_bits bits);
+
+void genX(emit_so_memcpy_init)(struct anv_memcpy_state *state,
+                               struct anv_device *device,
+                               struct anv_batch *batch);
+
+void genX(emit_so_memcpy_fini)(struct anv_memcpy_state *state);
+
+void genX(emit_so_memcpy)(struct anv_memcpy_state *state,
+                          struct anv_address dst, struct anv_address src,
+                          uint32_t size);
+
+void genX(emit_l3_config)(struct anv_batch *batch,
+                          const struct anv_device *device,
+                          const struct intel_l3_config *cfg);
+
+void genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
+                                const struct intel_l3_config *cfg);
+
+void genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer);
+void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer);
+
+void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer);
+
+void genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer,
+                                     bool enable);
+
+void genX(cmd_buffer_mark_image_written)(struct anv_cmd_buffer *cmd_buffer,
+                                         const struct anv_image *image,
+                                         VkImageAspectFlagBits aspect,
+                                         enum isl_aux_usage aux_usage,
+                                         uint32_t level,
+                                         uint32_t base_layer,
+                                         uint32_t layer_count);
+
+void genX(cmd_emit_conditional_render_predicate)(struct anv_cmd_buffer *cmd_buffer);
+
+struct anv_state genX(cmd_buffer_ray_query_globals)(struct anv_cmd_buffer *cmd_buffer);
+
+void
+genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
+                     const struct intel_l3_config *l3_config,
+                     VkShaderStageFlags active_stages,
+                     const unsigned entry_size[4],
+                     enum intel_urb_deref_block_size *deref_block_size);
+
+void genX(emit_multisample)(struct anv_batch *batch, uint32_t samples,
+                            const struct vk_sample_locations_state *sl);
+
+void genX(emit_sample_pattern)(struct anv_batch *batch,
+                               const struct vk_sample_locations_state *sl);
+
+void genX(emit_shading_rate)(struct anv_batch *batch,
+                             const struct anv_graphics_pipeline *pipeline,
+                             const struct vk_fragment_shading_rate_state *fsr);
+
+void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
+                                struct anv_address dst, struct anv_address src,
+                                uint32_t size);
+
+void genX(blorp_exec)(struct blorp_batch *batch,
+                      const struct blorp_params *params);
+
+void genX(cmd_emit_timestamp)(struct anv_batch *batch,
+                              struct anv_device *device,
+                              struct anv_address addr,
+                              bool end_of_pipe);
+
+void
+genX(rasterization_mode)(VkPolygonMode raster_mode,
+                         VkLineRasterizationModeEXT line_mode,
+                         float line_width,
+                         uint32_t *api_mode,
+                         bool *msaa_rasterization_enable);
+
+uint32_t
+genX(ms_rasterization_mode)(struct anv_graphics_pipeline *pipeline,
+                            VkPolygonMode raster_mode);
+
+VkPolygonMode
+genX(raster_polygon_mode)(struct anv_graphics_pipeline *pipeline,
+                          VkPrimitiveTopology primitive_topology);
+
+void
+genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
+                             const struct vk_graphics_pipeline_state *state);
+
+void
+genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline);
+
+void
+genX(ray_tracing_pipeline_emit)(struct anv_ray_tracing_pipeline *pipeline);
--- a/src/intel/vulkan_hasvk/anv_image.c
+++ b/src/intel/vulkan_hasvk/anv_image.c
--- a/src/intel/vulkan_hasvk/anv_measure.c
+++ b/src/intel/vulkan_hasvk/anv_measure.c
@ -0,0 +1,516 @@
+/*
+ * Copyright © 2020 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "anv_measure.h"
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "common/intel_measure.h"
+#include "util/debug.h"
+
+struct anv_measure_batch {
+   struct anv_bo *bo;
+   struct intel_measure_batch base;
+};
+
+void
+anv_measure_device_init(struct anv_physical_device *device)
+{
+   switch (device->info.verx10) {
+   case 125:
+      device->cmd_emit_timestamp = &gfx125_cmd_emit_timestamp;
+      break;
+   case 120:
+      device->cmd_emit_timestamp = &gfx12_cmd_emit_timestamp;
+      break;
+   case 110:
+      device->cmd_emit_timestamp = &gfx11_cmd_emit_timestamp;
+      break;
+   case 90:
+      device->cmd_emit_timestamp = &gfx9_cmd_emit_timestamp;
+      break;
+   case 80:
+      device->cmd_emit_timestamp = &gfx8_cmd_emit_timestamp;
+      break;
+   case 75:
+      device->cmd_emit_timestamp = &gfx75_cmd_emit_timestamp;
+      break;
+   case 70:
+      device->cmd_emit_timestamp = &gfx7_cmd_emit_timestamp;
+      break;
+   default:
+      assert(false);
+   }
+
+   /* initialise list of measure structures that await rendering */
+   struct intel_measure_device *measure_device = &device->measure_device;
+   intel_measure_init(measure_device);
+   struct intel_measure_config *config = measure_device->config;
+   if (config == NULL)
+      return;
+
+   /* the final member of intel_measure_ringbuffer is a zero-length array of
+    * intel_measure_buffered_result objects.  Allocate additional space for
+    * the buffered objects based on the run-time configurable buffer_size
+    */
+   const size_t rb_bytes = sizeof(struct intel_measure_ringbuffer) +
+      config->buffer_size * sizeof(struct intel_measure_buffered_result);
+   struct intel_measure_ringbuffer * rb =
+      vk_zalloc(&device->instance->vk.alloc,
+                rb_bytes, 8,
+                VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   measure_device->ringbuffer = rb;
+}
+
+static struct intel_measure_config*
+config_from_command_buffer(struct anv_cmd_buffer *cmd_buffer)
+{
+   return cmd_buffer->device->physical->measure_device.config;
+}
+
+void
+anv_measure_init(struct anv_cmd_buffer *cmd_buffer)
+{
+   struct intel_measure_config *config = config_from_command_buffer(cmd_buffer);
+   struct anv_device *device = cmd_buffer->device;
+
+   if (!config || !config->enabled) {
+      cmd_buffer->measure = NULL;
+      return;
+   }
+
+   /* the final member of anv_measure is a zero-length array of
+    * intel_measure_snapshot objects.  Create additional space for the
+    * snapshot objects based on the run-time configurable batch_size
+    */
+   const size_t batch_bytes = sizeof(struct anv_measure_batch) +
+      config->batch_size * sizeof(struct intel_measure_snapshot);
+   struct anv_measure_batch * measure =
+      vk_alloc(&cmd_buffer->vk.pool->alloc,
+               batch_bytes, 8,
+               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+
+   memset(measure, 0, batch_bytes);
+   ASSERTED VkResult result =
+      anv_device_alloc_bo(device, "measure data",
+                          config->batch_size * sizeof(uint64_t),
+                          ANV_BO_ALLOC_MAPPED,
+                          0,
+                          (struct anv_bo**)&measure->bo);
+   measure->base.timestamps = measure->bo->map;
+   assert(result == VK_SUCCESS);
+
+   cmd_buffer->measure = measure;
+}
+
+static void
+anv_measure_start_snapshot(struct anv_cmd_buffer *cmd_buffer,
+                           enum intel_measure_snapshot_type type,
+                           const char *event_name,
+                           uint32_t count)
+{
+   struct anv_batch *batch = &cmd_buffer->batch;
+   struct anv_measure_batch *measure = cmd_buffer->measure;
+   struct anv_physical_device *device = cmd_buffer->device->physical;
+   struct intel_measure_device *measure_device = &device->measure_device;
+
+   const unsigned device_frame = measure_device->frame;
+
+   /* if the command buffer is not associated with a frame, associate it with
+    * the most recent acquired frame
+    */
+   if (measure->base.frame == 0)
+      measure->base.frame = device_frame;
+
+//   uintptr_t framebuffer = (uintptr_t)cmd_buffer->state.framebuffer;
+//
+//   if (!measure->base.framebuffer &&
+//       cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY)
+//      /* secondary command buffer inherited the framebuffer from the primary */
+//      measure->base.framebuffer = framebuffer;
+//
+//   /* verify framebuffer has been properly tracked */
+//   assert(type == INTEL_SNAPSHOT_END ||
+//          framebuffer == measure->base.framebuffer ||
+//          framebuffer == 0 ); /* compute has no framebuffer */
+
+   unsigned index = measure->base.index++;
+
+   (*device->cmd_emit_timestamp)(batch, cmd_buffer->device,
+                                 (struct anv_address) {
+                                    .bo = measure->bo,
+                                    .offset = index * sizeof(uint64_t) },
+                                 true /* end_of_pipe */);
+
+   if (event_name == NULL)
+      event_name = intel_measure_snapshot_string(type);
+
+   struct intel_measure_snapshot *snapshot = &(measure->base.snapshots[index]);
+   memset(snapshot, 0, sizeof(*snapshot));
+   snapshot->type = type;
+   snapshot->count = (unsigned) count;
+   snapshot->event_count = measure->base.event_count;
+   snapshot->event_name = event_name;
+//   snapshot->framebuffer = framebuffer;
+
+   if (type == INTEL_SNAPSHOT_COMPUTE && cmd_buffer->state.compute.pipeline) {
+      snapshot->cs = (uintptr_t) cmd_buffer->state.compute.pipeline->cs;
+   } else if (cmd_buffer->state.gfx.pipeline) {
+      const struct anv_graphics_pipeline *pipeline =
+         cmd_buffer->state.gfx.pipeline;
+      snapshot->vs = (uintptr_t) pipeline->shaders[MESA_SHADER_VERTEX];
+      snapshot->tcs = (uintptr_t) pipeline->shaders[MESA_SHADER_TESS_CTRL];
+      snapshot->tes = (uintptr_t) pipeline->shaders[MESA_SHADER_TESS_EVAL];
+      snapshot->gs = (uintptr_t) pipeline->shaders[MESA_SHADER_GEOMETRY];
+      snapshot->fs = (uintptr_t) pipeline->shaders[MESA_SHADER_FRAGMENT];
+   }
+}
+
+static void
+anv_measure_end_snapshot(struct anv_cmd_buffer *cmd_buffer,
+                         uint32_t event_count)
+{
+   struct anv_batch *batch = &cmd_buffer->batch;
+   struct anv_measure_batch *measure = cmd_buffer->measure;
+   struct anv_physical_device *device = cmd_buffer->device->physical;
+
+   unsigned index = measure->base.index++;
+   assert(index % 2 == 1);
+
+   (*device->cmd_emit_timestamp)(batch, cmd_buffer->device,
+                                 (struct anv_address) {
+                                    .bo = measure->bo,
+                                    .offset = index * sizeof(uint64_t) },
+                                 true /* end_of_pipe */);
+
+   struct intel_measure_snapshot *snapshot = &(measure->base.snapshots[index]);
+   memset(snapshot, 0, sizeof(*snapshot));
+   snapshot->type = INTEL_SNAPSHOT_END;
+   snapshot->event_count = event_count;
+}
+
+static bool
+state_changed(struct anv_cmd_buffer *cmd_buffer,
+              enum intel_measure_snapshot_type type)
+{
+   uintptr_t vs=0, tcs=0, tes=0, gs=0, fs=0, cs=0;
+
+   if (cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)
+      /* can't record timestamps in this mode */
+      return false;
+
+   if (type == INTEL_SNAPSHOT_COMPUTE) {
+      const struct anv_compute_pipeline *cs_pipe =
+         cmd_buffer->state.compute.pipeline;
+      assert(cs_pipe);
+      cs = (uintptr_t)cs_pipe->cs;
+   } else if (type == INTEL_SNAPSHOT_DRAW) {
+      const struct anv_graphics_pipeline *gfx = cmd_buffer->state.gfx.pipeline;
+      assert(gfx);
+      vs = (uintptr_t) gfx->shaders[MESA_SHADER_VERTEX];
+      tcs = (uintptr_t) gfx->shaders[MESA_SHADER_TESS_CTRL];
+      tes = (uintptr_t) gfx->shaders[MESA_SHADER_TESS_EVAL];
+      gs = (uintptr_t) gfx->shaders[MESA_SHADER_GEOMETRY];
+      fs = (uintptr_t) gfx->shaders[MESA_SHADER_FRAGMENT];
+   }
+   /* else blorp, all programs NULL */
+
+   return intel_measure_state_changed(&cmd_buffer->measure->base,
+                                      vs, tcs, tes, gs, fs, cs);
+}
+
+void
+_anv_measure_snapshot(struct anv_cmd_buffer *cmd_buffer,
+                     enum intel_measure_snapshot_type type,
+                     const char *event_name,
+                     uint32_t count)
+{
+   struct intel_measure_config *config = config_from_command_buffer(cmd_buffer);
+   struct anv_measure_batch *measure = cmd_buffer->measure;
+
+   assert(config);
+   if (measure == NULL)
+      return;
+
+   assert(type != INTEL_SNAPSHOT_END);
+   if (!state_changed(cmd_buffer, type)) {
+      /* filter out this event */
+      return;
+   }
+
+   /* increment event count */
+   ++measure->base.event_count;
+   if (measure->base.event_count == 1 ||
+       measure->base.event_count == config->event_interval + 1) {
+      /* the first event of an interval */
+
+      if (measure->base.index % 2) {
+         /* end the previous event */
+         anv_measure_end_snapshot(cmd_buffer, measure->base.event_count - 1);
+      }
+      measure->base.event_count = 1;
+
+      if (measure->base.index == config->batch_size) {
+         /* Snapshot buffer is full.  The batch must be flushed before
+          * additional snapshots can be taken.
+          */
+         static bool warned = false;
+         if (unlikely(!warned)) {
+            fprintf(config->file,
+                    "WARNING: batch size exceeds INTEL_MEASURE limit: %d. "
+                    "Data has been dropped. "
+                    "Increase setting with INTEL_MEASURE=batch_size={count}\n",
+                    config->batch_size);
+         }
+
+         warned = true;
+         return;
+      }
+
+      anv_measure_start_snapshot(cmd_buffer, type, event_name, count);
+   }
+}
+
+/**
+ * Called when a command buffer is reset.  Re-initializes existing anv_measure
+ * data structures.
+ */
+void
+anv_measure_reset(struct anv_cmd_buffer *cmd_buffer)
+{
+   struct intel_measure_config *config = config_from_command_buffer(cmd_buffer);
+   struct anv_device *device = cmd_buffer->device;
+   struct anv_measure_batch *measure = cmd_buffer->measure;
+
+   if (!config)
+      return;
+
+   if (!config->enabled) {
+      cmd_buffer->measure = NULL;
+      return;
+   }
+
+   if (!measure) {
+      /* Capture has recently been enabled. Instead of resetting, a new data
+       * structure must be allocated and initialized.
+       */
+      return anv_measure_init(cmd_buffer);
+   }
+
+   /* it is possible that the command buffer contains snapshots that have not
+    * yet been processed
+    */
+   intel_measure_gather(&device->physical->measure_device,
+                        device->info);
+
+   assert(cmd_buffer->device != NULL);
+
+   measure->base.index = 0;
+//   measure->base.framebuffer = 0;
+   measure->base.frame = 0;
+   measure->base.event_count = 0;
+   list_inithead(&measure->base.link);
+}
+
+void
+anv_measure_destroy(struct anv_cmd_buffer *cmd_buffer)
+{
+   struct intel_measure_config *config = config_from_command_buffer(cmd_buffer);
+   struct anv_measure_batch *measure = cmd_buffer->measure;
+   struct anv_device *device = cmd_buffer->device;
+   struct anv_physical_device *physical = device->physical;
+
+   if (!config)
+      return;
+   if (measure == NULL)
+      return;
+
+   /* it is possible that the command buffer contains snapshots that have not
+    * yet been processed
+    */
+   intel_measure_gather(&physical->measure_device, &physical->info);
+
+   anv_device_release_bo(device, measure->bo);
+   vk_free(&cmd_buffer->vk.pool->alloc, measure);
+   cmd_buffer->measure = NULL;
+}
+
+static struct intel_measure_config*
+config_from_device(struct anv_device *device)
+{
+   return device->physical->measure_device.config;
+}
+
+void
+anv_measure_device_destroy(struct anv_physical_device *device)
+{
+   struct intel_measure_device *measure_device = &device->measure_device;
+   struct intel_measure_config *config = measure_device->config;
+
+   if (!config)
+      return;
+
+   if (measure_device->ringbuffer != NULL) {
+      vk_free(&device->instance->vk.alloc, measure_device->ringbuffer);
+      measure_device->ringbuffer = NULL;
+   }
+}
+
+/**
+ *  Hook for command buffer submission.
+ */
+void
+_anv_measure_submit(struct anv_cmd_buffer *cmd_buffer)
+{
+   struct intel_measure_config *config = config_from_command_buffer(cmd_buffer);
+   struct anv_measure_batch *measure = cmd_buffer->measure;
+   struct intel_measure_device *measure_device = &cmd_buffer->device->physical->measure_device;
+
+   if (!config)
+      return;
+   if (measure == NULL)
+      return;
+
+   struct intel_measure_batch *base = &measure->base;
+   if (base->index == 0)
+      /* no snapshots were started */
+      return;
+
+   /* finalize snapshots and enqueue them */
+   static unsigned cmd_buffer_count = 0;
+   base->batch_count = p_atomic_inc_return(&cmd_buffer_count);
+
+   if (base->index %2 == 1) {
+      anv_measure_end_snapshot(cmd_buffer, base->event_count);
+      base->event_count = 0;
+   }
+
+   /* Mark the final timestamp as 'not completed'.  This marker will be used
+    * to verify that rendering is complete.
+    */
+   base->timestamps[base->index - 1] = 0;
+
+   /* add to the list of submitted snapshots */
+   pthread_mutex_lock(&measure_device->mutex);
+   list_addtail(&measure->base.link, &measure_device->queued_snapshots);
+   pthread_mutex_unlock(&measure_device->mutex);
+}
+
+/**
+ *  Hook for the start of a frame.
+ */
+void
+_anv_measure_acquire(struct anv_device *device)
+{
+   struct intel_measure_config *config = config_from_device(device);
+   struct intel_measure_device *measure_device = &device->physical->measure_device;
+
+   if (!config)
+      return;
+   if (measure_device == NULL)
+      return;
+
+   intel_measure_frame_transition(p_atomic_inc_return(&measure_device->frame));
+
+   /* iterate the queued snapshots and publish those that finished */
+   intel_measure_gather(measure_device, &device->physical->info);
+}
+
+void
+_anv_measure_endcommandbuffer(struct anv_cmd_buffer *cmd_buffer)
+{
+   struct intel_measure_config *config = config_from_command_buffer(cmd_buffer);
+   struct anv_measure_batch *measure = cmd_buffer->measure;
+
+   if (!config)
+      return;
+   if (measure == NULL)
+      return;
+   if (measure->base.index % 2 == 0)
+      return;
+
+   anv_measure_end_snapshot(cmd_buffer, measure->base.event_count);
+   measure->base.event_count = 0;
+}
+
+void
+_anv_measure_beginrenderpass(struct anv_cmd_buffer *cmd_buffer)
+{
+   struct intel_measure_config *config = config_from_command_buffer(cmd_buffer);
+   struct anv_measure_batch *measure = cmd_buffer->measure;
+
+   if (!config)
+      return;
+   if (measure == NULL)
+      return;
+
+//   if (measure->base.framebuffer == (uintptr_t) cmd_buffer->state.framebuffer)
+//      /* no change */
+//      return;
+
+   bool filtering = (config->flags & (INTEL_MEASURE_RENDERPASS |
+                                      INTEL_MEASURE_SHADER));
+   if (filtering && measure->base.index % 2 == 1) {
+      /* snapshot for previous renderpass was not ended */
+      anv_measure_end_snapshot(cmd_buffer,
+                               measure->base.event_count);
+      measure->base.event_count = 0;
+   }
+
+//   measure->base.framebuffer = (uintptr_t) cmd_buffer->state.framebuffer;
+}
+
+void
+_anv_measure_add_secondary(struct anv_cmd_buffer *primary,
+                           struct anv_cmd_buffer *secondary)
+{
+   struct intel_measure_config *config = config_from_command_buffer(primary);
+   struct anv_measure_batch *measure = primary->measure;
+   if (!config)
+      return;
+   if (measure == NULL)
+      return;
+   if (config->flags & (INTEL_MEASURE_BATCH | INTEL_MEASURE_FRAME))
+      /* secondary timing will be contained within the primary */
+      return;
+   if (secondary->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT) {
+         static bool warned = false;
+         if (unlikely(!warned)) {
+            fprintf(config->file,
+                    "WARNING: INTEL_MEASURE cannot capture timings of commands "
+                    "in secondary command buffers with "
+                    "VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT set.\n");
+         }
+      return;
+   }
+
+   if (measure->base.index % 2 == 1)
+      anv_measure_end_snapshot(primary, measure->base.event_count);
+
+   struct intel_measure_snapshot *snapshot = &(measure->base.snapshots[measure->base.index]);
+   _anv_measure_snapshot(primary, INTEL_SNAPSHOT_SECONDARY_BATCH, NULL, 0);
+
+   snapshot->secondary = &secondary->measure->base;
+}
--- a/src/intel/vulkan_hasvk/anv_measure.h
+++ b/src/intel/vulkan_hasvk/anv_measure.h
@ -0,0 +1,82 @@
+/*
+ * Copyright © 2020 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef ANV_MEASURE_H
+#define ANV_MEASURE_H
+
+#include "anv_private.h"
+#include "common/intel_measure.h"
+
+void anv_measure_device_init(struct anv_physical_device *device);
+void anv_measure_device_destroy(struct anv_physical_device *device);
+
+void anv_measure_init(struct anv_cmd_buffer *cmd_buffer);
+void anv_measure_destroy(struct anv_cmd_buffer *cmd_buffer);
+void anv_measure_reset(struct anv_cmd_buffer *cmd_buffer);
+
+void _anv_measure_snapshot(struct anv_cmd_buffer *cmd_buffer,
+                           enum intel_measure_snapshot_type type,
+                           const char *event_name,
+                           uint32_t count);
+
+/* ends snapshots before command buffer submission */
+void _anv_measure_endcommandbuffer(struct anv_cmd_buffer *cmd_buffer);
+
+/* when measuring render passes, inserts a timestamp */
+void _anv_measure_beginrenderpass(struct anv_cmd_buffer *cmd_buffer);
+
+/* tracks frame progression */
+void _anv_measure_acquire(struct anv_device *device);
+
+/* should be combined with endcommandbuffer */
+void _anv_measure_submit(struct anv_cmd_buffer *cmd_buffer);
+
+void
+_anv_measure_add_secondary(struct anv_cmd_buffer *primary,
+                           struct anv_cmd_buffer *secondary);
+
+#define anv_measure_acquire(device) \
+   if (unlikely(device->physical->measure_device.config)) \
+      _anv_measure_acquire(device)
+
+#define anv_measure_snapshot(cmd_buffer, type, event_name, count) \
+   if (unlikely(cmd_buffer->measure)) \
+      _anv_measure_snapshot(cmd_buffer, type, event_name, count)
+
+#define anv_measure_endcommandbuffer(cmd_buffer) \
+   if (unlikely(cmd_buffer->measure)) \
+      _anv_measure_endcommandbuffer(cmd_buffer)
+
+#define anv_measure_beginrenderpass(cmd_buffer) \
+   if (unlikely(cmd_buffer->measure)) \
+      _anv_measure_beginrenderpass(cmd_buffer)
+
+#define anv_measure_submit(cmd_buffer) \
+   if (unlikely(cmd_buffer->measure)) \
+      _anv_measure_submit(cmd_buffer)
+
+#define anv_measure_add_secondary(primary, secondary) \
+   if (unlikely(primary->measure)) \
+      _anv_measure_add_secondary(primary, secondary)
+
+#endif   /* ANV_MEASURE_H */
--- a/src/intel/vulkan_hasvk/anv_nir.h
+++ b/src/intel/vulkan_hasvk/anv_nir.h
@ -0,0 +1,97 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef ANV_NIR_H
+#define ANV_NIR_H
+
+#include "nir/nir.h"
+#include "anv_private.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool anv_check_for_primitive_replication(struct anv_device *device,
+                                         VkShaderStageFlags stages,
+                                         nir_shader **shaders,
+                                         uint32_t view_mask);
+
+bool anv_nir_lower_multiview(nir_shader *shader, uint32_t view_mask,
+                             bool use_primitive_replication);
+
+bool anv_nir_lower_ycbcr_textures(nir_shader *shader,
+                                  const struct anv_pipeline_layout *layout);
+
+static inline nir_address_format
+anv_nir_ssbo_addr_format(const struct anv_physical_device *pdevice,
+                         bool robust_buffer_access)
+{
+   if (pdevice->has_a64_buffer_access) {
+      if (robust_buffer_access)
+         return nir_address_format_64bit_bounded_global;
+      else
+         return nir_address_format_64bit_global_32bit_offset;
+   } else {
+      return nir_address_format_32bit_index_offset;
+   }
+}
+
+static inline nir_address_format
+anv_nir_ubo_addr_format(const struct anv_physical_device *pdevice,
+                        bool robust_buffer_access)
+{
+   if (pdevice->has_a64_buffer_access) {
+      if (robust_buffer_access)
+         return nir_address_format_64bit_bounded_global;
+      else
+         return nir_address_format_64bit_global_32bit_offset;
+   } else {
+      return nir_address_format_32bit_index_offset;
+   }
+}
+
+bool anv_nir_lower_ubo_loads(nir_shader *shader);
+
+void anv_nir_apply_pipeline_layout(nir_shader *shader,
+                                   const struct anv_physical_device *pdevice,
+                                   bool robust_buffer_access,
+                                   const struct anv_pipeline_layout *layout,
+                                   struct anv_pipeline_bind_map *map);
+
+void anv_nir_compute_push_layout(nir_shader *nir,
+                                 const struct anv_physical_device *pdevice,
+                                 bool robust_buffer_access,
+                                 struct brw_stage_prog_data *prog_data,
+                                 struct anv_pipeline_bind_map *map,
+                                 void *mem_ctx);
+
+void anv_nir_validate_push_layout(struct brw_stage_prog_data *prog_data,
+                                  struct anv_pipeline_bind_map *map);
+
+bool anv_nir_add_base_work_group_id(nir_shader *shader);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ANV_NIR_H */
--- a/src/intel/vulkan_hasvk/anv_nir_add_base_work_group_id.c
+++ b/src/intel/vulkan_hasvk/anv_nir_add_base_work_group_id.c
@ -0,0 +1,63 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_nir.h"
+#include "nir/nir_builder.h"
+#include "compiler/brw_compiler.h"
+
+static bool
+anv_nir_add_base_work_group_id_instr(nir_builder *b,
+                                     nir_instr *instr,
+                                     UNUSED void *cb_data)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *load_id = nir_instr_as_intrinsic(instr);
+   if (load_id->intrinsic != nir_intrinsic_load_workgroup_id)
+      return false;
+
+   b->cursor = nir_after_instr(&load_id->instr);
+
+   nir_ssa_def *load_base =
+      nir_load_push_constant(b, 3, 32, nir_imm_int(b, 0),
+                             .base = offsetof(struct anv_push_constants, cs.base_work_group_id),
+                             .range = 3 * sizeof(uint32_t));
+
+   nir_ssa_def *id = nir_iadd(b, &load_id->dest.ssa, load_base);
+
+   nir_ssa_def_rewrite_uses_after(&load_id->dest.ssa, id, id->parent_instr);
+   return true;
+}
+
+bool
+anv_nir_add_base_work_group_id(nir_shader *shader)
+{
+   assert(shader->info.stage == MESA_SHADER_COMPUTE);
+
+   return nir_shader_instructions_pass(shader,
+                                       anv_nir_add_base_work_group_id_instr,
+                                       nir_metadata_block_index |
+                                       nir_metadata_dominance,
+                                       NULL);
+}
--- a/src/intel/vulkan_hasvk/anv_nir_apply_pipeline_layout.c
+++ b/src/intel/vulkan_hasvk/anv_nir_apply_pipeline_layout.c
--- a/src/intel/vulkan_hasvk/anv_nir_compute_push_layout.c
+++ b/src/intel/vulkan_hasvk/anv_nir_compute_push_layout.c
@ -0,0 +1,290 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_nir.h"
+#include "nir_builder.h"
+#include "compiler/brw_nir.h"
+#include "util/mesa-sha1.h"
+
+#define sizeof_field(type, field) sizeof(((type *)0)->field)
+
+void
+anv_nir_compute_push_layout(nir_shader *nir,
+                            const struct anv_physical_device *pdevice,
+                            bool robust_buffer_access,
+                            struct brw_stage_prog_data *prog_data,
+                            struct anv_pipeline_bind_map *map,
+                            void *mem_ctx)
+{
+   const struct brw_compiler *compiler = pdevice->compiler;
+   const struct intel_device_info *devinfo = compiler->devinfo;
+   memset(map->push_ranges, 0, sizeof(map->push_ranges));
+
+   bool has_const_ubo = false;
+   unsigned push_start = UINT_MAX, push_end = 0;
+   nir_foreach_function(function, nir) {
+      if (!function->impl)
+         continue;
+
+      nir_foreach_block(block, function->impl) {
+         nir_foreach_instr(instr, block) {
+            if (instr->type != nir_instr_type_intrinsic)
+               continue;
+
+            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+            switch (intrin->intrinsic) {
+            case nir_intrinsic_load_ubo:
+               if (nir_src_is_const(intrin->src[0]) &&
+                   nir_src_is_const(intrin->src[1]))
+                  has_const_ubo = true;
+               break;
+
+            case nir_intrinsic_load_push_constant: {
+               unsigned base = nir_intrinsic_base(intrin);
+               unsigned range = nir_intrinsic_range(intrin);
+               push_start = MIN2(push_start, base);
+               push_end = MAX2(push_end, base + range);
+               break;
+            }
+
+            case nir_intrinsic_load_desc_set_address_intel:
+               push_start = MIN2(push_start,
+                  offsetof(struct anv_push_constants, desc_sets));
+               push_end = MAX2(push_end, push_start +
+                  sizeof_field(struct anv_push_constants, desc_sets));
+               break;
+
+            default:
+               break;
+            }
+         }
+      }
+   }
+
+   const bool has_push_intrinsic = push_start <= push_end;
+
+   const bool push_ubo_ranges =
+      pdevice->info.verx10 >= 75 &&
+      has_const_ubo && nir->info.stage != MESA_SHADER_COMPUTE &&
+      !brw_shader_stage_requires_bindless_resources(nir->info.stage);
+
+   if (push_ubo_ranges && robust_buffer_access) {
+      /* We can't on-the-fly adjust our push ranges because doing so would
+       * mess up the layout in the shader.  When robustBufferAccess is
+       * enabled, we push a mask into the shader indicating which pushed
+       * registers are valid and we zero out the invalid ones at the top of
+       * the shader.
+       */
+      const uint32_t push_reg_mask_start =
+         offsetof(struct anv_push_constants, push_reg_mask[nir->info.stage]);
+      const uint32_t push_reg_mask_end = push_reg_mask_start + sizeof(uint64_t);
+      push_start = MIN2(push_start, push_reg_mask_start);
+      push_end = MAX2(push_end, push_reg_mask_end);
+   }
+
+   if (nir->info.stage == MESA_SHADER_COMPUTE && devinfo->verx10 < 125) {
+      /* For compute shaders, we always have to have the subgroup ID.  The
+       * back-end compiler will "helpfully" add it for us in the last push
+       * constant slot.  Yes, there is an off-by-one error here but that's
+       * because the back-end will add it so we want to claim the number of
+       * push constants one dword less than the full amount including
+       * gl_SubgroupId.
+       */
+      assert(push_end <= offsetof(struct anv_push_constants, cs.subgroup_id));
+      push_end = offsetof(struct anv_push_constants, cs.subgroup_id);
+   }
+
+   /* Align push_start down to a 32B boundary and make it no larger than
+    * push_end (no push constants is indicated by push_start = UINT_MAX).
+    */
+   push_start = MIN2(push_start, push_end);
+   push_start = align_down_u32(push_start, 32);
+
+   /* For vec4 our push data size needs to be aligned to a vec4 and for
+    * scalar, it needs to be aligned to a DWORD.
+    */
+   const unsigned align = compiler->scalar_stage[nir->info.stage] ? 4 : 16;
+   nir->num_uniforms = ALIGN(push_end - push_start, align);
+   prog_data->nr_params = nir->num_uniforms / 4;
+   prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params);
+
+   struct anv_push_range push_constant_range = {
+      .set = ANV_DESCRIPTOR_SET_PUSH_CONSTANTS,
+      .start = push_start / 32,
+      .length = DIV_ROUND_UP(push_end - push_start, 32),
+   };
+
+   if (has_push_intrinsic) {
+      nir_foreach_function(function, nir) {
+         if (!function->impl)
+            continue;
+
+         nir_builder build, *b = &build;
+         nir_builder_init(b, function->impl);
+
+         nir_foreach_block(block, function->impl) {
+            nir_foreach_instr_safe(instr, block) {
+               if (instr->type != nir_instr_type_intrinsic)
+                  continue;
+
+               nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+               switch (intrin->intrinsic) {
+               case nir_intrinsic_load_push_constant: {
+                  /* With bindless shaders we load uniforms with SEND
+                   * messages. All the push constants are located after the
+                   * RT_DISPATCH_GLOBALS. We just need to add the offset to
+                   * the address right after RT_DISPATCH_GLOBALS (see
+                   * brw_nir_lower_rt_intrinsics.c).
+                   */
+                  unsigned base_offset =
+                     brw_shader_stage_requires_bindless_resources(nir->info.stage) ? 0 : push_start;
+                  intrin->intrinsic = nir_intrinsic_load_uniform;
+                  nir_intrinsic_set_base(intrin,
+                                         nir_intrinsic_base(intrin) -
+                                         base_offset);
+                  break;
+               }
+
+               case nir_intrinsic_load_desc_set_address_intel: {
+                  b->cursor = nir_before_instr(&intrin->instr);
+                  nir_ssa_def *pc_load = nir_load_uniform(b, 1, 64,
+                     nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint64_t)),
+                     .base = offsetof(struct anv_push_constants, desc_sets),
+                     .range = sizeof_field(struct anv_push_constants, desc_sets),
+                     .dest_type = nir_type_uint64);
+                  nir_ssa_def_rewrite_uses(&intrin->dest.ssa, pc_load);
+                  break;
+               }
+
+               default:
+                  break;
+               }
+            }
+         }
+      }
+   }
+
+   if (push_ubo_ranges) {
+      brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
+
+      /* The vec4 back-end pushes at most 32 regs while the scalar back-end
+       * pushes up to 64.  This is primarily because the scalar back-end has a
+       * massively more competent register allocator and so the risk of
+       * spilling due to UBO pushing isn't nearly as high.
+       */
+      const unsigned max_push_regs =
+         compiler->scalar_stage[nir->info.stage] ? 64 : 32;
+
+      unsigned total_push_regs = push_constant_range.length;
+      for (unsigned i = 0; i < 4; i++) {
+         if (total_push_regs + prog_data->ubo_ranges[i].length > max_push_regs)
+            prog_data->ubo_ranges[i].length = max_push_regs - total_push_regs;
+         total_push_regs += prog_data->ubo_ranges[i].length;
+      }
+      assert(total_push_regs <= max_push_regs);
+
+      int n = 0;
+
+      if (push_constant_range.length > 0)
+         map->push_ranges[n++] = push_constant_range;
+
+      if (robust_buffer_access) {
+         const uint32_t push_reg_mask_offset =
+            offsetof(struct anv_push_constants, push_reg_mask[nir->info.stage]);
+         assert(push_reg_mask_offset >= push_start);
+         prog_data->push_reg_mask_param =
+            (push_reg_mask_offset - push_start) / 4;
+      }
+
+      unsigned range_start_reg = push_constant_range.length;
+
+      for (int i = 0; i < 4; i++) {
+         struct brw_ubo_range *ubo_range = &prog_data->ubo_ranges[i];
+         if (ubo_range->length == 0)
+            continue;
+
+         if (n >= 4 || (n == 3 && compiler->constant_buffer_0_is_relative)) {
+            memset(ubo_range, 0, sizeof(*ubo_range));
+            continue;
+         }
+
+         const struct anv_pipeline_binding *binding =
+            &map->surface_to_descriptor[ubo_range->block];
+
+         map->push_ranges[n++] = (struct anv_push_range) {
+            .set = binding->set,
+            .index = binding->index,
+            .dynamic_offset_index = binding->dynamic_offset_index,
+            .start = ubo_range->start,
+            .length = ubo_range->length,
+         };
+
+         /* We only bother to shader-zero pushed client UBOs */
+         if (binding->set < MAX_SETS && robust_buffer_access) {
+            prog_data->zero_push_reg |= BITFIELD64_RANGE(range_start_reg,
+                                                         ubo_range->length);
+         }
+
+         range_start_reg += ubo_range->length;
+      }
+   } else {
+      /* For Ivy Bridge, the push constants packets have a different
+       * rule that would require us to iterate in the other direction
+       * and possibly mess around with dynamic state base address.
+       * Don't bother; just emit regular push constants at n = 0.
+       *
+       * In the compute case, we don't have multiple push ranges so it's
+       * better to just provide one in push_ranges[0].
+       */
+      map->push_ranges[0] = push_constant_range;
+   }
+
+   /* Now that we're done computing the push constant portion of the
+    * bind map, hash it.  This lets us quickly determine if the actual
+    * mapping has changed and not just a no-op pipeline change.
+    */
+   _mesa_sha1_compute(map->push_ranges,
+                      sizeof(map->push_ranges),
+                      map->push_sha1);
+}
+
+void
+anv_nir_validate_push_layout(struct brw_stage_prog_data *prog_data,
+                             struct anv_pipeline_bind_map *map)
+{
+#ifndef NDEBUG
+   unsigned prog_data_push_size = DIV_ROUND_UP(prog_data->nr_params, 8);
+   for (unsigned i = 0; i < 4; i++)
+      prog_data_push_size += prog_data->ubo_ranges[i].length;
+
+   unsigned bind_map_push_size = 0;
+   for (unsigned i = 0; i < 4; i++)
+      bind_map_push_size += map->push_ranges[i].length;
+
+   /* We could go through everything again but it should be enough to assert
+    * that they push the same number of registers.  This should alert us if
+    * the back-end compiler decides to re-arrange stuff or shrink a range.
+    */
+   assert(prog_data_push_size == bind_map_push_size);
+#endif
+}
--- a/src/intel/vulkan_hasvk/anv_nir_lower_multiview.c
+++ b/src/intel/vulkan_hasvk/anv_nir_lower_multiview.c
@ -0,0 +1,324 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_nir.h"
+#include "nir/nir_builder.h"
+#include "util/debug.h"
+
+/**
+ * This file implements the lowering required for VK_KHR_multiview.
+ *
+ * When possible, Primitive Replication is used and the shader is modified to
+ * make gl_Position an array and fill it with values for each view.
+ *
+ * Otherwise we implement multiview using instanced rendering.  The number of
+ * instances in each draw call is multiplied by the number of views in the
+ * subpass.  Then, in the shader, we divide gl_InstanceId by the number of
+ * views and use gl_InstanceId % view_count to compute the actual ViewIndex.
+ */
+
+struct lower_multiview_state {
+   nir_builder builder;
+
+   uint32_t view_mask;
+
+   nir_ssa_def *instance_id;
+   nir_ssa_def *view_index;
+};
+
+static nir_ssa_def *
+build_instance_id(struct lower_multiview_state *state)
+{
+   assert(state->builder.shader->info.stage == MESA_SHADER_VERTEX);
+
+   if (state->instance_id == NULL) {
+      nir_builder *b = &state->builder;
+
+      b->cursor = nir_before_block(nir_start_block(b->impl));
+
+      /* We use instancing for implementing multiview.  The actual instance id
+       * is given by dividing instance_id by the number of views in this
+       * subpass.
+       */
+      state->instance_id =
+         nir_idiv(b, nir_load_instance_id(b),
+                     nir_imm_int(b, util_bitcount(state->view_mask)));
+   }
+
+   return state->instance_id;
+}
+
+static nir_ssa_def *
+build_view_index(struct lower_multiview_state *state)
+{
+   assert(state->builder.shader->info.stage != MESA_SHADER_FRAGMENT);
+
+   if (state->view_index == NULL) {
+      nir_builder *b = &state->builder;
+
+      b->cursor = nir_before_block(nir_start_block(b->impl));
+
+      assert(state->view_mask != 0);
+      if (util_bitcount(state->view_mask) == 1) {
+         /* Set the view index directly. */
+         state->view_index = nir_imm_int(b, ffs(state->view_mask) - 1);
+      } else if (state->builder.shader->info.stage == MESA_SHADER_VERTEX) {
+         /* We only support 16 viewports */
+         assert((state->view_mask & 0xffff0000) == 0);
+
+         /* We use instancing for implementing multiview.  The compacted view
+          * id is given by instance_id % view_count.  We then have to convert
+          * that to an actual view id.
+          */
+         nir_ssa_def *compacted =
+            nir_umod(b, nir_load_instance_id(b),
+                        nir_imm_int(b, util_bitcount(state->view_mask)));
+
+         if (util_is_power_of_two_or_zero(state->view_mask + 1)) {
+            /* If we have a full view mask, then compacted is what we want */
+            state->view_index = compacted;
+         } else {
+            /* Now we define a map from compacted view index to the actual
+             * view index that's based on the view_mask.  The map is given by
+             * 16 nibbles, each of which is a value from 0 to 15.
+             */
+            uint64_t remap = 0;
+            uint32_t i = 0;
+            u_foreach_bit(bit, state->view_mask) {
+               assert(bit < 16);
+               remap |= (uint64_t)bit << (i++ * 4);
+            }
+
+            nir_ssa_def *shift = nir_imul(b, compacted, nir_imm_int(b, 4));
+
+            /* One of these days, when we have int64 everywhere, this will be
+             * easier.
+             */
+            nir_ssa_def *shifted;
+            if (remap <= UINT32_MAX) {
+               shifted = nir_ushr(b, nir_imm_int(b, remap), shift);
+            } else {
+               nir_ssa_def *shifted_low =
+                  nir_ushr(b, nir_imm_int(b, remap), shift);
+               nir_ssa_def *shifted_high =
+                  nir_ushr(b, nir_imm_int(b, remap >> 32),
+                              nir_isub(b, shift, nir_imm_int(b, 32)));
+               shifted = nir_bcsel(b, nir_ilt(b, shift, nir_imm_int(b, 32)),
+                                      shifted_low, shifted_high);
+            }
+            state->view_index = nir_iand(b, shifted, nir_imm_int(b, 0xf));
+         }
+      } else {
+         const struct glsl_type *type = glsl_int_type();
+         if (b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
+             b->shader->info.stage == MESA_SHADER_GEOMETRY)
+            type = glsl_array_type(type, 1, 0);
+
+         nir_variable *idx_var =
+            nir_variable_create(b->shader, nir_var_shader_in,
+                                type, "view index");
+         idx_var->data.location = VARYING_SLOT_VIEW_INDEX;
+         if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
+            idx_var->data.interpolation = INTERP_MODE_FLAT;
+
+         nir_deref_instr *deref = nir_build_deref_var(b, idx_var);
+         if (glsl_type_is_array(type))
+            deref = nir_build_deref_array_imm(b, deref, 0);
+
+         state->view_index = nir_load_deref(b, deref);
+      }
+   }
+
+   return state->view_index;
+}
+
+static bool
+is_load_view_index(const nir_instr *instr, const void *data)
+{
+   return instr->type == nir_instr_type_intrinsic &&
+          nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_view_index;
+}
+
+static nir_ssa_def *
+replace_load_view_index_with_zero(struct nir_builder *b,
+                                  nir_instr *instr, void *data)
+{
+   assert(is_load_view_index(instr, data));
+   return nir_imm_zero(b, 1, 32);
+}
+
+static nir_ssa_def *
+replace_load_view_index_with_layer_id(struct nir_builder *b,
+                                      nir_instr *instr, void *data)
+{
+   assert(is_load_view_index(instr, data));
+   return nir_load_layer_id(b);
+}
+
+bool
+anv_nir_lower_multiview(nir_shader *shader, uint32_t view_mask,
+                        bool use_primitive_replication)
+{
+   assert(shader->info.stage != MESA_SHADER_COMPUTE);
+
+   /* If multiview isn't enabled, just lower the ViewIndex builtin to zero. */
+   if (view_mask == 0) {
+      return nir_shader_lower_instructions(shader, is_load_view_index,
+                                           replace_load_view_index_with_zero, NULL);
+   }
+
+   if (shader->info.stage == MESA_SHADER_FRAGMENT) {
+      return nir_shader_lower_instructions(shader, is_load_view_index,
+                                           replace_load_view_index_with_layer_id, NULL);
+   }
+
+   /* This pass assumes a single entrypoint */
+   nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader);
+
+   /* Primitive Replication allows a shader to write different positions for
+    * each view in the same execution. If only the position depends on the
+    * view, then it is possible to use the feature instead of instancing to
+    * implement multiview.
+    */
+   if (use_primitive_replication) {
+      bool progress = nir_lower_multiview(shader, view_mask);
+
+      if (progress) {
+         nir_builder b;
+         nir_builder_init(&b, entrypoint);
+         b.cursor = nir_before_cf_list(&entrypoint->body);
+
+         /* Fill Layer ID with zero. Replication will use that as base to
+          * apply the RTAI offsets.
+          */
+         nir_variable *layer_id_out =
+            nir_variable_create(shader, nir_var_shader_out,
+                                glsl_int_type(), "layer ID");
+         layer_id_out->data.location = VARYING_SLOT_LAYER;
+         nir_store_var(&b, layer_id_out, nir_imm_zero(&b, 1, 32), 0x1);
+      }
+
+      return progress;
+   }
+
+   struct lower_multiview_state state = {
+      .view_mask = view_mask,
+   };
+
+   nir_builder_init(&state.builder, entrypoint);
+
+   nir_foreach_block(block, entrypoint) {
+      nir_foreach_instr_safe(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
+
+         if (load->intrinsic != nir_intrinsic_load_instance_id &&
+             load->intrinsic != nir_intrinsic_load_view_index)
+            continue;
+
+         assert(load->dest.is_ssa);
+
+         nir_ssa_def *value;
+         if (load->intrinsic == nir_intrinsic_load_instance_id) {
+            value = build_instance_id(&state);
+         } else {
+            assert(load->intrinsic == nir_intrinsic_load_view_index);
+            value = build_view_index(&state);
+         }
+
+         nir_ssa_def_rewrite_uses(&load->dest.ssa, value);
+
+         nir_instr_remove(&load->instr);
+      }
+   }
+
+   /* The view index is available in all stages but the instance id is only
+    * available in the VS.  If it's not a fragment shader, we need to pass
+    * the view index on to the next stage.
+    */
+   nir_ssa_def *view_index = build_view_index(&state);
+
+   nir_builder *b = &state.builder;
+
+   assert(view_index->parent_instr->block == nir_start_block(entrypoint));
+   b->cursor = nir_after_instr(view_index->parent_instr);
+
+   /* Unless there is only one possible view index (that would be set
+    * directly), pass it to the next stage. */
+   if (util_bitcount(state.view_mask) != 1) {
+      nir_variable *view_index_out =
+         nir_variable_create(shader, nir_var_shader_out,
+                             glsl_int_type(), "view index");
+      view_index_out->data.location = VARYING_SLOT_VIEW_INDEX;
+      nir_store_var(b, view_index_out, view_index, 0x1);
+   }
+
+   nir_variable *layer_id_out =
+      nir_variable_create(shader, nir_var_shader_out,
+                          glsl_int_type(), "layer ID");
+   layer_id_out->data.location = VARYING_SLOT_LAYER;
+   nir_store_var(b, layer_id_out, view_index, 0x1);
+
+   nir_metadata_preserve(entrypoint, nir_metadata_block_index |
+                                     nir_metadata_dominance);
+
+   return true;
+}
+
+bool
+anv_check_for_primitive_replication(struct anv_device *device,
+                                    VkShaderStageFlags stages,
+                                    nir_shader **shaders,
+                                    uint32_t view_mask)
+{
+   assert(device->info->ver >= 12);
+
+   static int primitive_replication_max_views = -1;
+   if (primitive_replication_max_views < 0) {
+      /* TODO: Figure out why we are not getting same benefits for larger than
+       * 2 views.  For now use Primitive Replication just for the 2-view case
+       * by default.
+       */
+      const unsigned default_max_views = 2;
+
+      primitive_replication_max_views =
+         MIN2(MAX_VIEWS_FOR_PRIMITIVE_REPLICATION,
+              env_var_as_unsigned("ANV_PRIMITIVE_REPLICATION_MAX_VIEWS",
+                                  default_max_views));
+   }
+
+   /* TODO: We should be able to support replication at 'geometry' stages
+    * later than Vertex.  In that case only the last stage can refer to
+    * gl_ViewIndex.
+    */
+   if (stages & ~(VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT))
+      return false;
+
+   int view_count = util_bitcount(view_mask);
+   if (view_count == 1 || view_count > primitive_replication_max_views)
+      return false;
+
+   return nir_can_lower_multiview(shaders[MESA_SHADER_VERTEX]);
+}
--- a/src/intel/vulkan_hasvk/anv_nir_lower_ubo_loads.c
+++ b/src/intel/vulkan_hasvk/anv_nir_lower_ubo_loads.c
@ -0,0 +1,124 @@
+/*
+ * Copyright © 2020 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_nir.h"
+#include "nir_builder.h"
+
+static bool
+lower_ubo_load_instr(nir_builder *b, nir_instr *instr, UNUSED void *_data)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
+   if (load->intrinsic != nir_intrinsic_load_global_constant_offset &&
+       load->intrinsic != nir_intrinsic_load_global_constant_bounded)
+      return false;
+
+   b->cursor = nir_before_instr(instr);
+
+   nir_ssa_def *base_addr = load->src[0].ssa;
+   nir_ssa_def *bound = NULL;
+   if (load->intrinsic == nir_intrinsic_load_global_constant_bounded)
+      bound = load->src[2].ssa;
+
+   unsigned bit_size = load->dest.ssa.bit_size;
+   assert(bit_size >= 8 && bit_size % 8 == 0);
+   unsigned byte_size = bit_size / 8;
+
+   nir_ssa_def *val;
+   if (nir_src_is_const(load->src[1])) {
+      uint32_t offset = nir_src_as_uint(load->src[1]);
+
+      /* Things should be component-aligned. */
+      assert(offset % byte_size == 0);
+
+      assert(ANV_UBO_ALIGNMENT == 64);
+
+      unsigned suboffset = offset % 64;
+      uint64_t aligned_offset = offset - suboffset;
+
+      /* Load two just in case we go over a 64B boundary */
+      nir_ssa_def *data[2];
+      for (unsigned i = 0; i < 2; i++) {
+         nir_ssa_def *pred;
+         if (bound) {
+            pred = nir_ilt(b, nir_imm_int(b, aligned_offset + i * 64 + 63),
+                              bound);
+         } else {
+            pred = nir_imm_true(b);
+         }
+
+         nir_ssa_def *addr = nir_iadd_imm(b, base_addr,
+                                          aligned_offset + i * 64);
+
+         data[i] = nir_load_global_const_block_intel(b, 16, addr, pred);
+      }
+
+      val = nir_extract_bits(b, data, 2, suboffset * 8,
+                             load->num_components, bit_size);
+   } else {
+      nir_ssa_def *offset = load->src[1].ssa;
+      nir_ssa_def *addr = nir_iadd(b, base_addr, nir_u2u64(b, offset));
+
+      if (bound) {
+         nir_ssa_def *zero = nir_imm_zero(b, load->num_components, bit_size);
+
+         unsigned load_size = byte_size * load->num_components;
+         nir_ssa_def *in_bounds =
+            nir_ilt(b, nir_iadd_imm(b, offset, load_size - 1), bound);
+
+         nir_push_if(b, in_bounds);
+
+         nir_ssa_def *load_val =
+            nir_build_load_global_constant(b, load->dest.ssa.num_components,
+                                           load->dest.ssa.bit_size, addr,
+                                           .access = nir_intrinsic_access(load),
+                                           .align_mul = nir_intrinsic_align_mul(load),
+                                           .align_offset = nir_intrinsic_align_offset(load));
+
+         nir_pop_if(b, NULL);
+
+         val = nir_if_phi(b, load_val, zero);
+      } else {
+         val = nir_build_load_global_constant(b, load->dest.ssa.num_components,
+                                              load->dest.ssa.bit_size, addr,
+                                              .access = nir_intrinsic_access(load),
+                                              .align_mul = nir_intrinsic_align_mul(load),
+                                              .align_offset = nir_intrinsic_align_offset(load));
+      }
+   }
+
+   nir_ssa_def_rewrite_uses(&load->dest.ssa, val);
+   nir_instr_remove(&load->instr);
+
+   return true;
+}
+
+bool
+anv_nir_lower_ubo_loads(nir_shader *shader)
+{
+   return nir_shader_instructions_pass(shader, lower_ubo_load_instr,
+                                       nir_metadata_none,
+                                       NULL);
+}
--- a/src/intel/vulkan_hasvk/anv_nir_lower_ycbcr_textures.c
+++ b/src/intel/vulkan_hasvk/anv_nir_lower_ycbcr_textures.c
@ -0,0 +1,349 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_nir.h"
+#include "anv_private.h"
+#include "nir/nir.h"
+#include "nir/nir_builder.h"
+#include "nir/nir_vulkan.h"
+
+struct ycbcr_state {
+   nir_builder *builder;
+   nir_ssa_def *image_size;
+   nir_tex_instr *origin_tex;
+   nir_deref_instr *tex_deref;
+   struct anv_ycbcr_conversion *conversion;
+};
+
+/* TODO: we should probably replace this with a push constant/uniform. */
+static nir_ssa_def *
+get_texture_size(struct ycbcr_state *state, nir_deref_instr *texture)
+{
+   if (state->image_size)
+      return state->image_size;
+
+   nir_builder *b = state->builder;
+   const struct glsl_type *type = texture->type;
+   nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1);
+
+   tex->op = nir_texop_txs;
+   tex->sampler_dim = glsl_get_sampler_dim(type);
+   tex->is_array = glsl_sampler_type_is_array(type);
+   tex->is_shadow = glsl_sampler_type_is_shadow(type);
+   tex->dest_type = nir_type_int32;
+
+   tex->src[0].src_type = nir_tex_src_texture_deref;
+   tex->src[0].src = nir_src_for_ssa(&texture->dest.ssa);
+
+   nir_ssa_dest_init(&tex->instr, &tex->dest,
+                     nir_tex_instr_dest_size(tex), 32, NULL);
+   nir_builder_instr_insert(b, &tex->instr);
+
+   state->image_size = nir_i2f32(b, &tex->dest.ssa);
+
+   return state->image_size;
+}
+
+static nir_ssa_def *
+implicit_downsampled_coord(nir_builder *b,
+                           nir_ssa_def *value,
+                           nir_ssa_def *max_value,
+                           int div_scale)
+{
+   return nir_fadd(b,
+                   value,
+                   nir_fdiv(b,
+                            nir_imm_float(b, 1.0f),
+                            nir_fmul(b,
+                                     nir_imm_float(b, div_scale),
+                                     max_value)));
+}
+
+static nir_ssa_def *
+implicit_downsampled_coords(struct ycbcr_state *state,
+                            nir_ssa_def *old_coords,
+                            const struct anv_format_plane *plane_format)
+{
+   nir_builder *b = state->builder;
+   struct anv_ycbcr_conversion *conversion = state->conversion;
+   nir_ssa_def *image_size = get_texture_size(state, state->tex_deref);
+   nir_ssa_def *comp[4] = { NULL, };
+   int c;
+
+   for (c = 0; c < ARRAY_SIZE(conversion->chroma_offsets); c++) {
+      if (plane_format->denominator_scales[c] > 1 &&
+          conversion->chroma_offsets[c] == VK_CHROMA_LOCATION_COSITED_EVEN) {
+         comp[c] = implicit_downsampled_coord(b,
+                                              nir_channel(b, old_coords, c),
+                                              nir_channel(b, image_size, c),
+                                              plane_format->denominator_scales[c]);
+      } else {
+         comp[c] = nir_channel(b, old_coords, c);
+      }
+   }
+
+   /* Leave other coordinates untouched */
+   for (; c < old_coords->num_components; c++)
+      comp[c] = nir_channel(b, old_coords, c);
+
+   return nir_vec(b, comp, old_coords->num_components);
+}
+
+static nir_ssa_def *
+create_plane_tex_instr_implicit(struct ycbcr_state *state,
+                                uint32_t plane)
+{
+   nir_builder *b = state->builder;
+   struct anv_ycbcr_conversion *conversion = state->conversion;
+   const struct anv_format_plane *plane_format =
+      &conversion->format->planes[plane];
+   nir_tex_instr *old_tex = state->origin_tex;
+   nir_tex_instr *tex = nir_tex_instr_create(b->shader, old_tex->num_srcs + 1);
+
+   for (uint32_t i = 0; i < old_tex->num_srcs; i++) {
+      tex->src[i].src_type = old_tex->src[i].src_type;
+
+      switch (old_tex->src[i].src_type) {
+      case nir_tex_src_coord:
+         if (plane_format->has_chroma && conversion->chroma_reconstruction) {
+            assert(old_tex->src[i].src.is_ssa);
+            tex->src[i].src =
+               nir_src_for_ssa(implicit_downsampled_coords(state,
+                                                           old_tex->src[i].src.ssa,
+                                                           plane_format));
+            break;
+         }
+         FALLTHROUGH;
+      default:
+         nir_src_copy(&tex->src[i].src, &old_tex->src[i].src, &tex->instr);
+         break;
+      }
+   }
+   tex->src[tex->num_srcs - 1].src = nir_src_for_ssa(nir_imm_int(b, plane));
+   tex->src[tex->num_srcs - 1].src_type = nir_tex_src_plane;
+
+   tex->sampler_dim = old_tex->sampler_dim;
+   tex->dest_type = old_tex->dest_type;
+
+   tex->op = old_tex->op;
+   tex->coord_components = old_tex->coord_components;
+   tex->is_new_style_shadow = old_tex->is_new_style_shadow;
+   tex->component = old_tex->component;
+
+   tex->texture_index = old_tex->texture_index;
+   tex->sampler_index = old_tex->sampler_index;
+   tex->is_array = old_tex->is_array;
+
+   nir_ssa_dest_init(&tex->instr, &tex->dest,
+                     old_tex->dest.ssa.num_components,
+                     nir_dest_bit_size(old_tex->dest), NULL);
+   nir_builder_instr_insert(b, &tex->instr);
+
+   return &tex->dest.ssa;
+}
+
+static unsigned
+channel_to_component(enum isl_channel_select channel)
+{
+   switch (channel) {
+   case ISL_CHANNEL_SELECT_RED:
+      return 0;
+   case ISL_CHANNEL_SELECT_GREEN:
+      return 1;
+   case ISL_CHANNEL_SELECT_BLUE:
+      return 2;
+   case ISL_CHANNEL_SELECT_ALPHA:
+      return 3;
+   default:
+      unreachable("invalid channel");
+      return 0;
+   }
+}
+
+static enum isl_channel_select
+swizzle_channel(struct isl_swizzle swizzle, unsigned channel)
+{
+   switch (channel) {
+   case 0:
+      return swizzle.r;
+   case 1:
+      return swizzle.g;
+   case 2:
+      return swizzle.b;
+   case 3:
+      return swizzle.a;
+   default:
+      unreachable("invalid channel");
+      return 0;
+   }
+}
+
+static bool
+anv_nir_lower_ycbcr_textures_instr(nir_builder *builder,
+                                   nir_instr *instr,
+                                   void *cb_data)
+{
+   const struct anv_pipeline_layout *layout = cb_data;
+
+   if (instr->type != nir_instr_type_tex)
+      return false;
+
+   nir_tex_instr *tex = nir_instr_as_tex(instr);
+
+   int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
+   assert(deref_src_idx >= 0);
+   nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
+
+   nir_variable *var = nir_deref_instr_get_variable(deref);
+   const struct anv_descriptor_set_layout *set_layout =
+      layout->set[var->data.descriptor_set].layout;
+   const struct anv_descriptor_set_binding_layout *binding =
+      &set_layout->binding[var->data.binding];
+
+   /* For the following instructions, we don't apply any change and let the
+    * instruction apply to the first plane.
+    */
+   if (tex->op == nir_texop_txs ||
+       tex->op == nir_texop_query_levels ||
+       tex->op == nir_texop_lod)
+      return false;
+
+   if (binding->immutable_samplers == NULL)
+      return false;
+
+   assert(tex->texture_index == 0);
+   unsigned array_index = 0;
+   if (deref->deref_type != nir_deref_type_var) {
+      assert(deref->deref_type == nir_deref_type_array);
+      if (!nir_src_is_const(deref->arr.index))
+         return false;
+      array_index = nir_src_as_uint(deref->arr.index);
+      array_index = MIN2(array_index, binding->array_size - 1);
+   }
+   const struct anv_sampler *sampler = binding->immutable_samplers[array_index];
+
+   if (sampler->conversion == NULL)
+      return false;
+
+   struct ycbcr_state state = {
+      .builder = builder,
+      .origin_tex = tex,
+      .tex_deref = deref,
+      .conversion = sampler->conversion,
+   };
+
+   builder->cursor = nir_before_instr(&tex->instr);
+
+   const struct anv_format *format = state.conversion->format;
+   const struct isl_format_layout *y_isl_layout = NULL;
+   for (uint32_t p = 0; p < format->n_planes; p++) {
+      if (!format->planes[p].has_chroma)
+         y_isl_layout = isl_format_get_layout(format->planes[p].isl_format);
+   }
+   assert(y_isl_layout != NULL);
+   uint8_t y_bpc = y_isl_layout->channels_array[0].bits;
+
+   /* |ycbcr_comp| holds components in the order : Cr-Y-Cb */
+   nir_ssa_def *zero = nir_imm_float(builder, 0.0f);
+   nir_ssa_def *one = nir_imm_float(builder, 1.0f);
+   /* Use extra 2 channels for following swizzle */
+   nir_ssa_def *ycbcr_comp[5] = { zero, zero, zero, one, zero };
+
+   uint8_t ycbcr_bpcs[5];
+   memset(ycbcr_bpcs, y_bpc, sizeof(ycbcr_bpcs));
+
+   /* Go through all the planes and gather the samples into a |ycbcr_comp|
+    * while applying a swizzle required by the spec:
+    *
+    *    R, G, B should respectively map to Cr, Y, Cb
+    */
+   for (uint32_t p = 0; p < format->n_planes; p++) {
+      const struct anv_format_plane *plane_format = &format->planes[p];
+      nir_ssa_def *plane_sample = create_plane_tex_instr_implicit(&state, p);
+
+      for (uint32_t pc = 0; pc < 4; pc++) {
+         enum isl_channel_select ycbcr_swizzle =
+            swizzle_channel(plane_format->ycbcr_swizzle, pc);
+         if (ycbcr_swizzle == ISL_CHANNEL_SELECT_ZERO)
+            continue;
+
+         unsigned ycbcr_component = channel_to_component(ycbcr_swizzle);
+         ycbcr_comp[ycbcr_component] = nir_channel(builder, plane_sample, pc);
+
+         /* Also compute the number of bits for each component. */
+         const struct isl_format_layout *isl_layout =
+            isl_format_get_layout(plane_format->isl_format);
+         ycbcr_bpcs[ycbcr_component] = isl_layout->channels_array[pc].bits;
+      }
+   }
+
+   /* Now remaps components to the order specified by the conversion. */
+   nir_ssa_def *swizzled_comp[4] = { NULL, };
+   uint32_t swizzled_bpcs[4] = { 0, };
+
+   for (uint32_t i = 0; i < ARRAY_SIZE(state.conversion->mapping); i++) {
+      /* Maps to components in |ycbcr_comp| */
+      static const uint32_t swizzle_mapping[] = {
+         [VK_COMPONENT_SWIZZLE_ZERO] = 4,
+         [VK_COMPONENT_SWIZZLE_ONE]  = 3,
+         [VK_COMPONENT_SWIZZLE_R]    = 0,
+         [VK_COMPONENT_SWIZZLE_G]    = 1,
+         [VK_COMPONENT_SWIZZLE_B]    = 2,
+         [VK_COMPONENT_SWIZZLE_A]    = 3,
+      };
+      const VkComponentSwizzle m = state.conversion->mapping[i];
+
+      if (m == VK_COMPONENT_SWIZZLE_IDENTITY) {
+         swizzled_comp[i] = ycbcr_comp[i];
+         swizzled_bpcs[i] = ycbcr_bpcs[i];
+      } else {
+         swizzled_comp[i] = ycbcr_comp[swizzle_mapping[m]];
+         swizzled_bpcs[i] = ycbcr_bpcs[swizzle_mapping[m]];
+      }
+   }
+
+   nir_ssa_def *result = nir_vec(builder, swizzled_comp, 4);
+   if (state.conversion->ycbcr_model != VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY) {
+      result = nir_convert_ycbcr_to_rgb(builder,
+                                        state.conversion->ycbcr_model,
+                                        state.conversion->ycbcr_range,
+                                        result,
+                                        swizzled_bpcs);
+   }
+
+   nir_ssa_def_rewrite_uses(&tex->dest.ssa, result);
+   nir_instr_remove(&tex->instr);
+
+   return true;
+}
+
+bool
+anv_nir_lower_ycbcr_textures(nir_shader *shader,
+                             const struct anv_pipeline_layout *layout)
+{
+   return nir_shader_instructions_pass(shader,
+                                       anv_nir_lower_ycbcr_textures_instr,
+                                       nir_metadata_block_index |
+                                       nir_metadata_dominance,
+                                       (void *)layout);
+}
--- a/src/intel/vulkan_hasvk/anv_perf.c
+++ b/src/intel/vulkan_hasvk/anv_perf.c
@ -0,0 +1,488 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "anv_private.h"
+#include "vk_util.h"
+
+#include "perf/intel_perf.h"
+#include "perf/intel_perf_mdapi.h"
+
+#include "util/mesa-sha1.h"
+
+void
+anv_physical_device_init_perf(struct anv_physical_device *device, int fd)
+{
+   const struct intel_device_info *devinfo = &device->info;
+
+   device->perf = NULL;
+
+   /* We need self modifying batches. The i915 parser prevents it on
+    * Gfx7.5 :( maybe one day.
+    */
+   if (devinfo->ver < 8)
+      return;
+
+   struct intel_perf_config *perf = intel_perf_new(NULL);
+
+   intel_perf_init_metrics(perf, &device->info, fd,
+                           false /* pipeline statistics */,
+                           true /* register snapshots */);
+
+   if (!perf->n_queries)
+      goto err;
+
+   /* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in
+    * perf revision 2.
+    */
+   if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
+      if (!intel_perf_has_hold_preemption(perf))
+         goto err;
+   }
+
+   device->perf = perf;
+
+   /* Compute the number of commands we need to implement a performance
+    * query.
+    */
+   const struct intel_perf_query_field_layout *layout = &perf->query_layout;
+   device->n_perf_query_commands = 0;
+   for (uint32_t f = 0; f < layout->n_fields; f++) {
+      struct intel_perf_query_field *field = &layout->fields[f];
+
+      switch (field->type) {
+      case INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC:
+         device->n_perf_query_commands++;
+         break;
+      case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
+      case INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT:
+      case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A:
+      case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
+      case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
+         device->n_perf_query_commands += field->size / 4;
+         break;
+      default:
+         unreachable("Unhandled register type");
+      }
+   }
+   device->n_perf_query_commands *= 2; /* Begin & End */
+   device->n_perf_query_commands += 1; /* availability */
+
+   return;
+
+ err:
+   ralloc_free(perf);
+}
+
+void
+anv_device_perf_init(struct anv_device *device)
+{
+   device->perf_fd = -1;
+}
+
+static int
+anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
+{
+   uint64_t properties[DRM_I915_PERF_PROP_MAX * 2];
+   struct drm_i915_perf_open_param param;
+   int p = 0, stream_fd;
+
+   properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA;
+   properties[p++] = true;
+
+   properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET;
+   properties[p++] = metric_id;
+
+   properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
+   properties[p++] = device->info->ver >= 8 ?
+      I915_OA_FORMAT_A32u40_A4u32_B8_C8 :
+      I915_OA_FORMAT_A45_B8_C8;
+
+   properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
+   properties[p++] = 31; /* slowest sampling period */
+
+   properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE;
+   properties[p++] = device->context_id;
+
+   properties[p++] = DRM_I915_PERF_PROP_HOLD_PREEMPTION;
+   properties[p++] = true;
+
+   /* If global SSEU is available, pin it to the default. This will ensure on
+    * Gfx11 for instance we use the full EU array. Initially when perf was
+    * enabled we would use only half on Gfx11 because of functional
+    * requirements.
+    *
+    * Temporary disable this option on Gfx12.5+, kernel doesn't appear to
+    * support it.
+    */
+   if (intel_perf_has_global_sseu(device->physical->perf) &&
+       device->info->verx10 < 125) {
+      properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU;
+      properties[p++] = (uintptr_t) &device->physical->perf->sseu;
+   }
+
+   memset(&param, 0, sizeof(param));
+   param.flags = 0;
+   param.flags |= I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK;
+   param.properties_ptr = (uintptr_t)properties;
+   param.num_properties = p / 2;
+
+   stream_fd = intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_OPEN, &param);
+   return stream_fd;
+}
+
+/* VK_INTEL_performance_query */
+VkResult anv_InitializePerformanceApiINTEL(
+    VkDevice                                    _device,
+    const VkInitializePerformanceApiInfoINTEL*  pInitializeInfo)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+
+   if (!device->physical->perf)
+      return VK_ERROR_EXTENSION_NOT_PRESENT;
+
+   /* Not much to do here */
+   return VK_SUCCESS;
+}
+
+VkResult anv_GetPerformanceParameterINTEL(
+    VkDevice                                    _device,
+    VkPerformanceParameterTypeINTEL             parameter,
+    VkPerformanceValueINTEL*                    pValue)
+{
+      ANV_FROM_HANDLE(anv_device, device, _device);
+
+      if (!device->physical->perf)
+         return VK_ERROR_EXTENSION_NOT_PRESENT;
+
+      VkResult result = VK_SUCCESS;
+      switch (parameter) {
+      case VK_PERFORMANCE_PARAMETER_TYPE_HW_COUNTERS_SUPPORTED_INTEL:
+         pValue->type = VK_PERFORMANCE_VALUE_TYPE_BOOL_INTEL;
+         pValue->data.valueBool = VK_TRUE;
+         break;
+
+      case VK_PERFORMANCE_PARAMETER_TYPE_STREAM_MARKER_VALID_BITS_INTEL:
+         pValue->type = VK_PERFORMANCE_VALUE_TYPE_UINT32_INTEL;
+         pValue->data.value32 = 25;
+         break;
+
+      default:
+         result = VK_ERROR_FEATURE_NOT_PRESENT;
+         break;
+      }
+
+      return result;
+}
+
+VkResult anv_CmdSetPerformanceMarkerINTEL(
+    VkCommandBuffer                             commandBuffer,
+    const VkPerformanceMarkerInfoINTEL*         pMarkerInfo)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+   cmd_buffer->intel_perf_marker = pMarkerInfo->marker;
+
+   return VK_SUCCESS;
+}
+
+VkResult anv_AcquirePerformanceConfigurationINTEL(
+    VkDevice                                    _device,
+    const VkPerformanceConfigurationAcquireInfoINTEL* pAcquireInfo,
+    VkPerformanceConfigurationINTEL*            pConfiguration)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   struct anv_performance_configuration_intel *config;
+
+   config = vk_object_alloc(&device->vk, NULL, sizeof(*config),
+                            VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL);
+   if (!config)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
+      config->register_config =
+         intel_perf_load_configuration(device->physical->perf, device->fd,
+                                     INTEL_PERF_QUERY_GUID_MDAPI);
+      if (!config->register_config) {
+         vk_object_free(&device->vk, NULL, config);
+         return VK_INCOMPLETE;
+      }
+
+      int ret =
+         intel_perf_store_configuration(device->physical->perf, device->fd,
+                                      config->register_config, NULL /* guid */);
+      if (ret < 0) {
+         ralloc_free(config->register_config);
+         vk_object_free(&device->vk, NULL, config);
+         return VK_INCOMPLETE;
+      }
+
+      config->config_id = ret;
+   }
+
+   *pConfiguration = anv_performance_configuration_intel_to_handle(config);
+
+   return VK_SUCCESS;
+}
+
+VkResult anv_ReleasePerformanceConfigurationINTEL(
+    VkDevice                                    _device,
+    VkPerformanceConfigurationINTEL             _configuration)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
+
+   if (!INTEL_DEBUG(DEBUG_NO_OACONFIG))
+      intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config->config_id);
+
+   ralloc_free(config->register_config);
+
+   vk_object_free(&device->vk, NULL, config);
+
+   return VK_SUCCESS;
+}
+
+VkResult anv_QueueSetPerformanceConfigurationINTEL(
+    VkQueue                                     _queue,
+    VkPerformanceConfigurationINTEL             _configuration)
+{
+   ANV_FROM_HANDLE(anv_queue, queue, _queue);
+   ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
+   struct anv_device *device = queue->device;
+
+   if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
+      if (device->perf_fd < 0) {
+         device->perf_fd = anv_device_perf_open(device, config->config_id);
+         if (device->perf_fd < 0)
+            return VK_ERROR_INITIALIZATION_FAILED;
+      } else {
+         int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
+                               (void *)(uintptr_t) config->config_id);
+         if (ret < 0)
+            return vk_device_set_lost(&device->vk, "i915-perf config failed: %m");
+      }
+   }
+
+   return VK_SUCCESS;
+}
+
+void anv_UninitializePerformanceApiINTEL(
+    VkDevice                                    _device)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+
+   if (device->perf_fd >= 0) {
+      close(device->perf_fd);
+      device->perf_fd = -1;
+   }
+}
+
+/* VK_KHR_performance_query */
+static const VkPerformanceCounterUnitKHR
+intel_perf_counter_unit_to_vk_unit[] = {
+   [INTEL_PERF_COUNTER_UNITS_BYTES]                                = VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR,
+   [INTEL_PERF_COUNTER_UNITS_HZ]                                   = VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR,
+   [INTEL_PERF_COUNTER_UNITS_NS]                                   = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR,
+   [INTEL_PERF_COUNTER_UNITS_US]                                   = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR, /* todo */
+   [INTEL_PERF_COUNTER_UNITS_PIXELS]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+   [INTEL_PERF_COUNTER_UNITS_TEXELS]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+   [INTEL_PERF_COUNTER_UNITS_THREADS]                              = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+   [INTEL_PERF_COUNTER_UNITS_PERCENT]                              = VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR,
+   [INTEL_PERF_COUNTER_UNITS_MESSAGES]                             = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+   [INTEL_PERF_COUNTER_UNITS_NUMBER]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+   [INTEL_PERF_COUNTER_UNITS_CYCLES]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+   [INTEL_PERF_COUNTER_UNITS_EVENTS]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+   [INTEL_PERF_COUNTER_UNITS_UTILIZATION]                          = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+   [INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES]           = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+   [INTEL_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+   [INTEL_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES]        = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+   [INTEL_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE]           = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+};
+
+static const VkPerformanceCounterStorageKHR
+intel_perf_counter_data_type_to_vk_storage[] = {
+   [INTEL_PERF_COUNTER_DATA_TYPE_BOOL32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
+   [INTEL_PERF_COUNTER_DATA_TYPE_UINT32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
+   [INTEL_PERF_COUNTER_DATA_TYPE_UINT64] = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR,
+   [INTEL_PERF_COUNTER_DATA_TYPE_FLOAT]  = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR,
+   [INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR,
+};
+
+VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
+    VkPhysicalDevice                            physicalDevice,
+    uint32_t                                    queueFamilyIndex,
+    uint32_t*                                   pCounterCount,
+    VkPerformanceCounterKHR*                    pCounters,
+    VkPerformanceCounterDescriptionKHR*         pCounterDescriptions)
+{
+   ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
+   struct intel_perf_config *perf = pdevice->perf;
+
+   uint32_t desc_count = *pCounterCount;
+
+   VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterKHR, out, pCounters, pCounterCount);
+   VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterDescriptionKHR, out_desc,
+                          pCounterDescriptions, &desc_count);
+
+   /* We cannot support performance queries on anything other than RCS,
+    * because the MI_REPORT_PERF_COUNT command is not available on other
+    * engines.
+    */
+   struct anv_queue_family *queue_family =
+      &pdevice->queue.families[queueFamilyIndex];
+   if (queue_family->engine_class != I915_ENGINE_CLASS_RENDER)
+      return vk_outarray_status(&out);
+
+   for (int c = 0; c < (perf ? perf->n_counters : 0); c++) {
+      const struct intel_perf_query_counter *intel_counter = perf->counter_infos[c].counter;
+
+      vk_outarray_append_typed(VkPerformanceCounterKHR, &out, counter) {
+         counter->unit = intel_perf_counter_unit_to_vk_unit[intel_counter->units];
+         counter->scope = VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_KHR;
+         counter->storage = intel_perf_counter_data_type_to_vk_storage[intel_counter->data_type];
+
+         unsigned char sha1_result[20];
+         _mesa_sha1_compute(intel_counter->symbol_name,
+                            strlen(intel_counter->symbol_name),
+                            sha1_result);
+         memcpy(counter->uuid, sha1_result, sizeof(counter->uuid));
+      }
+
+      vk_outarray_append_typed(VkPerformanceCounterDescriptionKHR, &out_desc, desc) {
+         desc->flags = 0; /* None so far. */
+         snprintf(desc->name, sizeof(desc->name), "%s", intel_counter->name);
+         snprintf(desc->category, sizeof(desc->category), "%s", intel_counter->category);
+         snprintf(desc->description, sizeof(desc->description), "%s", intel_counter->desc);
+      }
+   }
+
+   return vk_outarray_status(&out);
+}
+
+void anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
+    VkPhysicalDevice                            physicalDevice,
+    const VkQueryPoolPerformanceCreateInfoKHR*  pPerformanceQueryCreateInfo,
+    uint32_t*                                   pNumPasses)
+{
+   ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
+   struct intel_perf_config *perf = pdevice->perf;
+
+   if (!perf) {
+      *pNumPasses = 0;
+      return;
+   }
+
+   *pNumPasses = intel_perf_get_n_passes(perf,
+                                       pPerformanceQueryCreateInfo->pCounterIndices,
+                                       pPerformanceQueryCreateInfo->counterIndexCount,
+                                       NULL);
+}
+
+VkResult anv_AcquireProfilingLockKHR(
+    VkDevice                                    _device,
+    const VkAcquireProfilingLockInfoKHR*        pInfo)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   struct intel_perf_config *perf = device->physical->perf;
+   struct intel_perf_query_info *first_metric_set = &perf->queries[0];
+   int fd = -1;
+
+   assert(device->perf_fd == -1);
+
+   if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
+      fd = anv_device_perf_open(device, first_metric_set->oa_metrics_set_id);
+      if (fd < 0)
+         return VK_TIMEOUT;
+   }
+
+   device->perf_fd = fd;
+   return VK_SUCCESS;
+}
+
+void anv_ReleaseProfilingLockKHR(
+    VkDevice                                    _device)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+
+   if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
+      assert(device->perf_fd >= 0);
+      close(device->perf_fd);
+   }
+   device->perf_fd = -1;
+}
+
+void
+anv_perf_write_pass_results(struct intel_perf_config *perf,
+                            struct anv_query_pool *pool, uint32_t pass,
+                            const struct intel_perf_query_result *accumulated_results,
+                            union VkPerformanceCounterResultKHR *results)
+{
+   for (uint32_t c = 0; c < pool->n_counters; c++) {
+      const struct intel_perf_counter_pass *counter_pass = &pool->counter_pass[c];
+
+      if (counter_pass->pass != pass)
+         continue;
+
+      switch (pool->pass_query[pass]->kind) {
+      case INTEL_PERF_QUERY_TYPE_PIPELINE: {
+         assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);
+         uint32_t accu_offset = counter_pass->counter->offset / sizeof(uint64_t);
+         results[c].uint64 = accumulated_results->accumulator[accu_offset];
+         break;
+      }
+
+      case INTEL_PERF_QUERY_TYPE_OA:
+      case INTEL_PERF_QUERY_TYPE_RAW:
+         switch (counter_pass->counter->data_type) {
+         case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
+            results[c].uint64 =
+               counter_pass->counter->oa_counter_read_uint64(perf,
+                                                             counter_pass->query,
+                                                             accumulated_results);
+            break;
+         case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
+            results[c].float32 =
+               counter_pass->counter->oa_counter_read_float(perf,
+                                                            counter_pass->query,
+                                                            accumulated_results);
+            break;
+         default:
+            /* So far we aren't using uint32, double or bool32... */
+            unreachable("unexpected counter data type");
+         }
+         break;
+
+      default:
+         unreachable("invalid query type");
+      }
+
+      /* The Vulkan extension only has nanoseconds as a unit */
+      if (counter_pass->counter->units == INTEL_PERF_COUNTER_UNITS_US) {
+         assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);
+         results[c].uint64 *= 1000;
+      }
+   }
+}
--- a/src/intel/vulkan_hasvk/anv_pipeline.c
+++ b/src/intel/vulkan_hasvk/anv_pipeline.c
--- a/src/intel/vulkan_hasvk/anv_pipeline_cache.c
+++ b/src/intel/vulkan_hasvk/anv_pipeline_cache.c
@ -0,0 +1,380 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/blob.h"
+#include "util/hash_table.h"
+#include "util/debug.h"
+#include "util/disk_cache.h"
+#include "util/mesa-sha1.h"
+#include "nir/nir_serialize.h"
+#include "anv_private.h"
+#include "nir/nir_xfb_info.h"
+#include "vulkan/util/vk_util.h"
+
+static bool
+anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,
+                         struct blob *blob);
+
+struct vk_pipeline_cache_object *
+anv_shader_bin_deserialize(struct vk_device *device,
+                           const void *key_data, size_t key_size,
+                           struct blob_reader *blob);
+
+static void
+anv_shader_bin_destroy(struct vk_pipeline_cache_object *object)
+{
+   struct anv_device *device =
+      container_of(object->device, struct anv_device, vk);
+   struct anv_shader_bin *shader =
+      container_of(object, struct anv_shader_bin, base);
+
+   anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
+   vk_pipeline_cache_object_finish(&shader->base);
+   vk_free(&device->vk.alloc, shader);
+}
+
+static const struct vk_pipeline_cache_object_ops anv_shader_bin_ops = {
+   .serialize = anv_shader_bin_serialize,
+   .deserialize = anv_shader_bin_deserialize,
+   .destroy = anv_shader_bin_destroy,
+};
+
+const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2] = {
+   &anv_shader_bin_ops,
+   NULL
+};
+
+struct anv_shader_bin *
+anv_shader_bin_create(struct anv_device *device,
+                      gl_shader_stage stage,
+                      const void *key_data, uint32_t key_size,
+                      const void *kernel_data, uint32_t kernel_size,
+                      const struct brw_stage_prog_data *prog_data_in,
+                      uint32_t prog_data_size,
+                      const struct brw_compile_stats *stats, uint32_t num_stats,
+                      const nir_xfb_info *xfb_info_in,
+                      const struct anv_pipeline_bind_map *bind_map)
+{
+   VK_MULTIALLOC(ma);
+   VK_MULTIALLOC_DECL(&ma, struct anv_shader_bin, shader, 1);
+   VK_MULTIALLOC_DECL_SIZE(&ma, void, obj_key_data, key_size);
+   VK_MULTIALLOC_DECL_SIZE(&ma, struct brw_stage_prog_data, prog_data,
+                                prog_data_size);
+   VK_MULTIALLOC_DECL(&ma, struct brw_shader_reloc, prog_data_relocs,
+                           prog_data_in->num_relocs);
+   VK_MULTIALLOC_DECL(&ma, uint32_t, prog_data_param, prog_data_in->nr_params);
+
+   VK_MULTIALLOC_DECL_SIZE(&ma, nir_xfb_info, xfb_info,
+                                xfb_info_in == NULL ? 0 :
+                                nir_xfb_info_size(xfb_info_in->output_count));
+
+   VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, surface_to_descriptor,
+                           bind_map->surface_count);
+   VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, sampler_to_descriptor,
+                           bind_map->sampler_count);
+
+   if (!vk_multialloc_alloc(&ma, &device->vk.alloc,
+                            VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
+      return NULL;
+
+   memcpy(obj_key_data, key_data, key_size);
+   vk_pipeline_cache_object_init(&device->vk, &shader->base,
+                                 &anv_shader_bin_ops, obj_key_data, key_size);
+
+   shader->stage = stage;
+
+   shader->kernel =
+      anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
+   memcpy(shader->kernel.map, kernel_data, kernel_size);
+   shader->kernel_size = kernel_size;
+
+   uint64_t shader_data_addr = INSTRUCTION_STATE_POOL_MIN_ADDRESS +
+                               shader->kernel.offset +
+                               prog_data_in->const_data_offset;
+
+   int rv_count = 0;
+   struct brw_shader_reloc_value reloc_values[5];
+   reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
+      .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW,
+      .value = shader_data_addr,
+   };
+   reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
+      .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH,
+      .value = shader_data_addr >> 32,
+   };
+   reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
+      .id = BRW_SHADER_RELOC_SHADER_START_OFFSET,
+      .value = shader->kernel.offset,
+   };
+   if (brw_shader_stage_is_bindless(stage)) {
+      const struct brw_bs_prog_data *bs_prog_data =
+         brw_bs_prog_data_const(prog_data_in);
+      uint64_t resume_sbt_addr = INSTRUCTION_STATE_POOL_MIN_ADDRESS +
+                                 shader->kernel.offset +
+                                 bs_prog_data->resume_sbt_offset;
+      reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
+         .id = BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW,
+         .value = resume_sbt_addr,
+      };
+      reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
+         .id = BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH,
+         .value = resume_sbt_addr >> 32,
+      };
+   }
+
+   brw_write_shader_relocs(&device->physical->compiler->isa,
+                           shader->kernel.map, prog_data_in,
+                           reloc_values, rv_count);
+
+   memcpy(prog_data, prog_data_in, prog_data_size);
+   typed_memcpy(prog_data_relocs, prog_data_in->relocs,
+                prog_data_in->num_relocs);
+   prog_data->relocs = prog_data_relocs;
+   memset(prog_data_param, 0,
+          prog_data->nr_params * sizeof(*prog_data_param));
+   prog_data->param = prog_data_param;
+   shader->prog_data = prog_data;
+   shader->prog_data_size = prog_data_size;
+
+   assert(num_stats <= ARRAY_SIZE(shader->stats));
+   typed_memcpy(shader->stats, stats, num_stats);
+   shader->num_stats = num_stats;
+
+   if (xfb_info_in) {
+      *xfb_info = *xfb_info_in;
+      typed_memcpy(xfb_info->outputs, xfb_info_in->outputs,
+                   xfb_info_in->output_count);
+      shader->xfb_info = xfb_info;
+   } else {
+      shader->xfb_info = NULL;
+   }
+
+   shader->bind_map = *bind_map;
+   typed_memcpy(surface_to_descriptor, bind_map->surface_to_descriptor,
+                bind_map->surface_count);
+   shader->bind_map.surface_to_descriptor = surface_to_descriptor;
+   typed_memcpy(sampler_to_descriptor, bind_map->sampler_to_descriptor,
+                bind_map->sampler_count);
+   shader->bind_map.sampler_to_descriptor = sampler_to_descriptor;
+
+   return shader;
+}
+
+static bool
+anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,
+                         struct blob *blob)
+{
+   struct anv_shader_bin *shader =
+      container_of(object, struct anv_shader_bin, base);
+
+   blob_write_uint32(blob, shader->stage);
+
+   blob_write_uint32(blob, shader->kernel_size);
+   blob_write_bytes(blob, shader->kernel.map, shader->kernel_size);
+
+   blob_write_uint32(blob, shader->prog_data_size);
+   blob_write_bytes(blob, shader->prog_data, shader->prog_data_size);
+   blob_write_bytes(blob, shader->prog_data->relocs,
+                    shader->prog_data->num_relocs *
+                    sizeof(shader->prog_data->relocs[0]));
+
+   blob_write_uint32(blob, shader->num_stats);
+   blob_write_bytes(blob, shader->stats,
+                    shader->num_stats * sizeof(shader->stats[0]));
+
+   if (shader->xfb_info) {
+      uint32_t xfb_info_size =
+         nir_xfb_info_size(shader->xfb_info->output_count);
+      blob_write_uint32(blob, xfb_info_size);
+      blob_write_bytes(blob, shader->xfb_info, xfb_info_size);
+   } else {
+      blob_write_uint32(blob, 0);
+   }
+
+   blob_write_bytes(blob, shader->bind_map.surface_sha1,
+                    sizeof(shader->bind_map.surface_sha1));
+   blob_write_bytes(blob, shader->bind_map.sampler_sha1,
+                    sizeof(shader->bind_map.sampler_sha1));
+   blob_write_bytes(blob, shader->bind_map.push_sha1,
+                    sizeof(shader->bind_map.push_sha1));
+   blob_write_uint32(blob, shader->bind_map.surface_count);
+   blob_write_uint32(blob, shader->bind_map.sampler_count);
+   blob_write_bytes(blob, shader->bind_map.surface_to_descriptor,
+                    shader->bind_map.surface_count *
+                    sizeof(*shader->bind_map.surface_to_descriptor));
+   blob_write_bytes(blob, shader->bind_map.sampler_to_descriptor,
+                    shader->bind_map.sampler_count *
+                    sizeof(*shader->bind_map.sampler_to_descriptor));
+   blob_write_bytes(blob, shader->bind_map.push_ranges,
+                    sizeof(shader->bind_map.push_ranges));
+
+   return !blob->out_of_memory;
+}
+
+struct vk_pipeline_cache_object *
+anv_shader_bin_deserialize(struct vk_device *vk_device,
+                           const void *key_data, size_t key_size,
+                           struct blob_reader *blob)
+{
+   struct anv_device *device =
+      container_of(vk_device, struct anv_device, vk);
+
+   gl_shader_stage stage = blob_read_uint32(blob);
+
+   uint32_t kernel_size = blob_read_uint32(blob);
+   const void *kernel_data = blob_read_bytes(blob, kernel_size);
+
+   uint32_t prog_data_size = blob_read_uint32(blob);
+   const void *prog_data_bytes = blob_read_bytes(blob, prog_data_size);
+   if (blob->overrun)
+      return NULL;
+
+   union brw_any_prog_data prog_data;
+   memcpy(&prog_data, prog_data_bytes,
+          MIN2(sizeof(prog_data), prog_data_size));
+   prog_data.base.relocs =
+      blob_read_bytes(blob, prog_data.base.num_relocs *
+                            sizeof(prog_data.base.relocs[0]));
+
+   uint32_t num_stats = blob_read_uint32(blob);
+   const struct brw_compile_stats *stats =
+      blob_read_bytes(blob, num_stats * sizeof(stats[0]));
+
+   const nir_xfb_info *xfb_info = NULL;
+   uint32_t xfb_size = blob_read_uint32(blob);
+   if (xfb_size)
+      xfb_info = blob_read_bytes(blob, xfb_size);
+
+   struct anv_pipeline_bind_map bind_map;
+   blob_copy_bytes(blob, bind_map.surface_sha1, sizeof(bind_map.surface_sha1));
+   blob_copy_bytes(blob, bind_map.sampler_sha1, sizeof(bind_map.sampler_sha1));
+   blob_copy_bytes(blob, bind_map.push_sha1, sizeof(bind_map.push_sha1));
+   bind_map.surface_count = blob_read_uint32(blob);
+   bind_map.sampler_count = blob_read_uint32(blob);
+   bind_map.surface_to_descriptor = (void *)
+      blob_read_bytes(blob, bind_map.surface_count *
+                            sizeof(*bind_map.surface_to_descriptor));
+   bind_map.sampler_to_descriptor = (void *)
+      blob_read_bytes(blob, bind_map.sampler_count *
+                            sizeof(*bind_map.sampler_to_descriptor));
+   blob_copy_bytes(blob, bind_map.push_ranges, sizeof(bind_map.push_ranges));
+
+   if (blob->overrun)
+      return NULL;
+
+   struct anv_shader_bin *shader =
+      anv_shader_bin_create(device, stage,
+                            key_data, key_size,
+                            kernel_data, kernel_size,
+                            &prog_data.base, prog_data_size,
+                            stats, num_stats, xfb_info, &bind_map);
+   if (shader == NULL)
+      return NULL;
+
+   return &shader->base;
+}
+
+struct anv_shader_bin *
+anv_device_search_for_kernel(struct anv_device *device,
+                             struct vk_pipeline_cache *cache,
+                             const void *key_data, uint32_t key_size,
+                             bool *user_cache_hit)
+{
+   /* Use the default pipeline cache if none is specified */
+   if (cache == NULL)
+      cache = device->default_pipeline_cache;
+
+   bool cache_hit = false;
+   struct vk_pipeline_cache_object *object =
+      vk_pipeline_cache_lookup_object(cache, key_data, key_size,
+                                      &anv_shader_bin_ops, &cache_hit);
+   if (user_cache_hit != NULL) {
+      *user_cache_hit = object != NULL && cache_hit &&
+                        cache != device->default_pipeline_cache;
+   }
+   if (object == NULL)
+      return NULL;
+
+   return container_of(object, struct anv_shader_bin, base);
+}
+
+struct anv_shader_bin *
+anv_device_upload_kernel(struct anv_device *device,
+                         struct vk_pipeline_cache *cache,
+                         gl_shader_stage stage,
+                         const void *key_data, uint32_t key_size,
+                         const void *kernel_data, uint32_t kernel_size,
+                         const struct brw_stage_prog_data *prog_data,
+                         uint32_t prog_data_size,
+                         const struct brw_compile_stats *stats,
+                         uint32_t num_stats,
+                         const nir_xfb_info *xfb_info,
+                         const struct anv_pipeline_bind_map *bind_map)
+{
+   /* Use the default pipeline cache if none is specified */
+   if (cache == NULL)
+      cache = device->default_pipeline_cache;
+
+   struct anv_shader_bin *shader =
+      anv_shader_bin_create(device, stage,
+                            key_data, key_size,
+                            kernel_data, kernel_size,
+                            prog_data, prog_data_size,
+                            stats, num_stats,
+                            xfb_info, bind_map);
+   if (shader == NULL)
+      return NULL;
+
+   struct vk_pipeline_cache_object *cached =
+      vk_pipeline_cache_add_object(cache, &shader->base);
+
+   return container_of(cached, struct anv_shader_bin, base);
+}
+
+#define SHA1_KEY_SIZE 20
+
+struct nir_shader *
+anv_device_search_for_nir(struct anv_device *device,
+                          struct vk_pipeline_cache *cache,
+                          const nir_shader_compiler_options *nir_options,
+                          unsigned char sha1_key[SHA1_KEY_SIZE],
+                          void *mem_ctx)
+{
+   if (cache == NULL)
+      cache = device->default_pipeline_cache;
+
+   return vk_pipeline_cache_lookup_nir(cache, sha1_key, SHA1_KEY_SIZE,
+                                       nir_options, NULL, mem_ctx);
+}
+
+void
+anv_device_upload_nir(struct anv_device *device,
+                      struct vk_pipeline_cache *cache,
+                      const struct nir_shader *nir,
+                      unsigned char sha1_key[SHA1_KEY_SIZE])
+{
+   if (cache == NULL)
+      cache = device->default_pipeline_cache;
+
+   vk_pipeline_cache_add_nir(cache, sha1_key, SHA1_KEY_SIZE, nir);
+}
--- a/src/intel/vulkan_hasvk/anv_private.h
+++ b/src/intel/vulkan_hasvk/anv_private.h
--- a/src/intel/vulkan_hasvk/anv_queue.c
+++ b/src/intel/vulkan_hasvk/anv_queue.c
@ -0,0 +1,75 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * This file implements VkQueue
+ */
+
+#include "anv_private.h"
+
+VkResult
+anv_queue_init(struct anv_device *device, struct anv_queue *queue,
+               uint32_t exec_flags,
+               const VkDeviceQueueCreateInfo *pCreateInfo,
+               uint32_t index_in_family)
+{
+   struct anv_physical_device *pdevice = device->physical;
+   VkResult result;
+
+   result = vk_queue_init(&queue->vk, &device->vk, pCreateInfo,
+                          index_in_family);
+   if (result != VK_SUCCESS)
+      return result;
+
+   if (INTEL_DEBUG(DEBUG_SYNC)) {
+      result = vk_sync_create(&device->vk,
+                              &device->physical->sync_syncobj_type,
+                              0, 0, &queue->sync);
+      if (result != VK_SUCCESS) {
+         vk_queue_finish(&queue->vk);
+         return result;
+      }
+   }
+
+   queue->vk.driver_submit = anv_queue_submit;
+
+   queue->device = device;
+
+   assert(queue->vk.queue_family_index < pdevice->queue.family_count);
+   queue->family = &pdevice->queue.families[queue->vk.queue_family_index];
+
+   queue->index_in_family = index_in_family;
+
+   queue->exec_flags = exec_flags;
+
+   return VK_SUCCESS;
+}
+
+void
+anv_queue_finish(struct anv_queue *queue)
+{
+   if (queue->sync)
+      vk_sync_destroy(&queue->device->vk, queue->sync);
+
+   vk_queue_finish(&queue->vk);
+}
--- a/src/intel/vulkan_hasvk/anv_util.c
+++ b/src/intel/vulkan_hasvk/anv_util.c
@ -0,0 +1,92 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+
+#include "anv_private.h"
+#include "vk_enum_to_str.h"
+
+void
+__anv_perf_warn(struct anv_device *device,
+                const struct vk_object_base *object,
+                const char *file, int line, const char *format, ...)
+{
+   va_list ap;
+   char buffer[256];
+
+   va_start(ap, format);
+   vsnprintf(buffer, sizeof(buffer), format, ap);
+   va_end(ap);
+
+   if (object) {
+      __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT,
+               VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
+               VK_LOG_OBJS(object), file, line,
+               "PERF: %s", buffer);
+   } else {
+      __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT,
+               VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
+               VK_LOG_NO_OBJS(device->physical->instance), file, line,
+               "PERF: %s", buffer);
+   }
+}
+
+void
+anv_dump_pipe_bits(enum anv_pipe_bits bits)
+{
+   if (bits & ANV_PIPE_DEPTH_CACHE_FLUSH_BIT)
+      fputs("+depth_flush ", stderr);
+   if (bits & ANV_PIPE_DATA_CACHE_FLUSH_BIT)
+      fputs("+dc_flush ", stderr);
+   if (bits & ANV_PIPE_HDC_PIPELINE_FLUSH_BIT)
+      fputs("+hdc_flush ", stderr);
+   if (bits & ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT)
+      fputs("+rt_flush ", stderr);
+   if (bits & ANV_PIPE_TILE_CACHE_FLUSH_BIT)
+      fputs("+tile_flush ", stderr);
+   if (bits & ANV_PIPE_STATE_CACHE_INVALIDATE_BIT)
+      fputs("+state_inval ", stderr);
+   if (bits & ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT)
+      fputs("+const_inval ", stderr);
+   if (bits & ANV_PIPE_VF_CACHE_INVALIDATE_BIT)
+      fputs("+vf_inval ", stderr);
+   if (bits & ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT)
+      fputs("+tex_inval ", stderr);
+   if (bits & ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT)
+      fputs("+ic_inval ", stderr);
+   if (bits & ANV_PIPE_STALL_AT_SCOREBOARD_BIT)
+      fputs("+pb_stall ", stderr);
+   if (bits & ANV_PIPE_PSS_STALL_SYNC_BIT)
+      fputs("+pss_stall ", stderr);
+   if (bits & ANV_PIPE_DEPTH_STALL_BIT)
+      fputs("+depth_stall ", stderr);
+   if (bits & ANV_PIPE_CS_STALL_BIT)
+      fputs("+cs_stall ", stderr);
+   if (bits & ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT)
+      fputs("+utdp_flush", stderr);
+}
--- a/src/intel/vulkan_hasvk/anv_utrace.c
+++ b/src/intel/vulkan_hasvk/anv_utrace.c
@ -0,0 +1,346 @@
+/*
+ * Copyright © 2021 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_private.h"
+
+#include "perf/intel_perf.h"
+
+static uint32_t
+command_buffers_count_utraces(struct anv_device *device,
+                              uint32_t cmd_buffer_count,
+                              struct anv_cmd_buffer **cmd_buffers,
+                              uint32_t *utrace_copies)
+{
+   if (!u_trace_context_actively_tracing(&device->ds.trace_context))
+      return 0;
+
+   uint32_t utraces = 0;
+   for (uint32_t i = 0; i < cmd_buffer_count; i++) {
+      if (u_trace_has_points(&cmd_buffers[i]->trace)) {
+         utraces++;
+         if (!(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT))
+            *utrace_copies += list_length(&cmd_buffers[i]->trace.trace_chunks);
+      }
+   }
+
+   return utraces;
+}
+
+static void
+anv_utrace_delete_flush_data(struct u_trace_context *utctx,
+                             void *flush_data)
+{
+   struct anv_device *device =
+      container_of(utctx, struct anv_device, ds.trace_context);
+   struct anv_utrace_flush_copy *flush = flush_data;
+
+   intel_ds_flush_data_fini(&flush->ds);
+
+   if (flush->trace_bo) {
+      assert(flush->batch_bo);
+      anv_reloc_list_finish(&flush->relocs, &device->vk.alloc);
+      anv_device_release_bo(device, flush->batch_bo);
+      anv_device_release_bo(device, flush->trace_bo);
+   }
+
+   vk_sync_destroy(&device->vk, flush->sync);
+
+   vk_free(&device->vk.alloc, flush);
+}
+
+static void
+anv_device_utrace_emit_copy_ts_buffer(struct u_trace_context *utctx,
+                                      void *cmdstream,
+                                      void *ts_from, uint32_t from_offset,
+                                      void *ts_to, uint32_t to_offset,
+                                      uint32_t count)
+{
+   struct anv_device *device =
+      container_of(utctx, struct anv_device, ds.trace_context);
+   struct anv_utrace_flush_copy *flush = cmdstream;
+   struct anv_address from_addr = (struct anv_address) {
+      .bo = ts_from, .offset = from_offset * sizeof(uint64_t) };
+   struct anv_address to_addr = (struct anv_address) {
+      .bo = ts_to, .offset = to_offset * sizeof(uint64_t) };
+
+   anv_genX(device->info, emit_so_memcpy)(&flush->memcpy_state,
+                                           to_addr, from_addr, count * sizeof(uint64_t));
+}
+
+VkResult
+anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
+                                    uint32_t cmd_buffer_count,
+                                    struct anv_cmd_buffer **cmd_buffers,
+                                    struct anv_utrace_flush_copy **out_flush_data)
+{
+   struct anv_device *device = queue->device;
+   uint32_t utrace_copies = 0;
+   uint32_t utraces = command_buffers_count_utraces(device,
+                                                    cmd_buffer_count,
+                                                    cmd_buffers,
+                                                    &utrace_copies);
+   if (!utraces) {
+      *out_flush_data = NULL;
+      return VK_SUCCESS;
+   }
+
+   VkResult result;
+   struct anv_utrace_flush_copy *flush =
+      vk_zalloc(&device->vk.alloc, sizeof(struct anv_utrace_flush_copy),
+                8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!flush)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   intel_ds_flush_data_init(&flush->ds, queue->ds, queue->ds->submission_id);
+
+   result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type,
+                           0, 0, &flush->sync);
+   if (result != VK_SUCCESS)
+      goto error_sync;
+
+   if (utrace_copies > 0) {
+      result = anv_bo_pool_alloc(&device->utrace_bo_pool,
+                                 utrace_copies * 4096,
+                                 &flush->trace_bo);
+      if (result != VK_SUCCESS)
+         goto error_trace_buf;
+
+      result = anv_bo_pool_alloc(&device->utrace_bo_pool,
+                                 /* 128 dwords of setup + 64 dwords per copy */
+                                 align_u32(512 + 64 * utrace_copies, 4096),
+                                 &flush->batch_bo);
+      if (result != VK_SUCCESS)
+         goto error_batch_buf;
+
+      result = anv_reloc_list_init(&flush->relocs, &device->vk.alloc);
+      if (result != VK_SUCCESS)
+         goto error_reloc_list;
+
+      flush->batch.alloc = &device->vk.alloc;
+      flush->batch.relocs = &flush->relocs;
+      anv_batch_set_storage(&flush->batch,
+                            (struct anv_address) { .bo = flush->batch_bo, },
+                            flush->batch_bo->map, flush->batch_bo->size);
+
+      /* Emit the copies */
+      anv_genX(device->info, emit_so_memcpy_init)(&flush->memcpy_state,
+                                                   device,
+                                                   &flush->batch);
+      for (uint32_t i = 0; i < cmd_buffer_count; i++) {
+         if (cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) {
+            u_trace_flush(&cmd_buffers[i]->trace, flush, false);
+         } else {
+            u_trace_clone_append(u_trace_begin_iterator(&cmd_buffers[i]->trace),
+                                 u_trace_end_iterator(&cmd_buffers[i]->trace),
+                                 &flush->ds.trace,
+                                 flush,
+                                 anv_device_utrace_emit_copy_ts_buffer);
+         }
+      }
+      anv_genX(device->info, emit_so_memcpy_fini)(&flush->memcpy_state);
+
+      u_trace_flush(&flush->ds.trace, flush, true);
+
+      if (flush->batch.status != VK_SUCCESS) {
+         result = flush->batch.status;
+         goto error_batch;
+      }
+   } else {
+      for (uint32_t i = 0; i < cmd_buffer_count; i++) {
+         assert(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT);
+         u_trace_flush(&cmd_buffers[i]->trace, flush, i == (cmd_buffer_count - 1));
+      }
+   }
+
+   flush->queue = queue;
+
+   *out_flush_data = flush;
+
+   return VK_SUCCESS;
+
+ error_batch:
+   anv_reloc_list_finish(&flush->relocs, &device->vk.alloc);
+ error_reloc_list:
+   anv_bo_pool_free(&device->utrace_bo_pool, flush->batch_bo);
+ error_batch_buf:
+   anv_bo_pool_free(&device->utrace_bo_pool, flush->trace_bo);
+ error_trace_buf:
+   vk_sync_destroy(&device->vk, flush->sync);
+ error_sync:
+   vk_free(&device->vk.alloc, flush);
+   return result;
+}
+
+static void *
+anv_utrace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size_b)
+{
+   struct anv_device *device =
+      container_of(utctx, struct anv_device, ds.trace_context);
+
+   struct anv_bo *bo = NULL;
+   UNUSED VkResult result =
+      anv_bo_pool_alloc(&device->utrace_bo_pool,
+                        align_u32(size_b, 4096),
+                        &bo);
+   assert(result == VK_SUCCESS);
+
+   return bo;
+}
+
+static void
+anv_utrace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
+{
+   struct anv_device *device =
+      container_of(utctx, struct anv_device, ds.trace_context);
+   struct anv_bo *bo = timestamps;
+
+   anv_bo_pool_free(&device->utrace_bo_pool, bo);
+}
+
+static void
+anv_utrace_record_ts(struct u_trace *ut, void *cs,
+                     void *timestamps, unsigned idx,
+                     bool end_of_pipe)
+{
+   struct anv_cmd_buffer *cmd_buffer =
+      container_of(ut, struct anv_cmd_buffer, trace);
+   struct anv_device *device = cmd_buffer->device;
+   struct anv_bo *bo = timestamps;
+
+   device->physical->cmd_emit_timestamp(&cmd_buffer->batch, device,
+                                        (struct anv_address) {
+                                           .bo = bo,
+                                           .offset = idx * sizeof(uint64_t) },
+                                        end_of_pipe);
+}
+
+static uint64_t
+anv_utrace_read_ts(struct u_trace_context *utctx,
+                   void *timestamps, unsigned idx, void *flush_data)
+{
+   struct anv_device *device =
+      container_of(utctx, struct anv_device, ds.trace_context);
+   struct anv_bo *bo = timestamps;
+   struct anv_utrace_flush_copy *flush = flush_data;
+
+   /* Only need to stall on results for the first entry: */
+   if (idx == 0) {
+      UNUSED VkResult result =
+         vk_sync_wait(&device->vk,
+                      flush->sync,
+                      0,
+                      VK_SYNC_WAIT_COMPLETE,
+                      os_time_get_absolute_timeout(OS_TIMEOUT_INFINITE));
+      assert(result == VK_SUCCESS);
+   }
+
+   uint64_t *ts = bo->map;
+
+   /* Don't translate the no-timestamp marker: */
+   if (ts[idx] == U_TRACE_NO_TIMESTAMP)
+      return U_TRACE_NO_TIMESTAMP;
+
+   return intel_device_info_timebase_scale(device->info, ts[idx]);
+}
+
+static const char *
+queue_family_to_name(const struct anv_queue_family *family)
+{
+   switch (family->engine_class) {
+   case I915_ENGINE_CLASS_RENDER:
+      return "render";
+   case I915_ENGINE_CLASS_COPY:
+      return "copy";
+   case I915_ENGINE_CLASS_VIDEO:
+      return "video";
+   case I915_ENGINE_CLASS_VIDEO_ENHANCE:
+      return "video-enh";
+   default:
+      return "unknown";
+   }
+}
+
+void
+anv_device_utrace_init(struct anv_device *device)
+{
+   anv_bo_pool_init(&device->utrace_bo_pool, device, "utrace");
+   intel_ds_device_init(&device->ds, device->info, device->fd,
+                        device->physical->local_minor - 128,
+                        INTEL_DS_API_VULKAN);
+   u_trace_context_init(&device->ds.trace_context,
+                        &device->ds,
+                        anv_utrace_create_ts_buffer,
+                        anv_utrace_destroy_ts_buffer,
+                        anv_utrace_record_ts,
+                        anv_utrace_read_ts,
+                        anv_utrace_delete_flush_data);
+
+   for (uint32_t q = 0; q < device->queue_count; q++) {
+      struct anv_queue *queue = &device->queues[q];
+
+      queue->ds =
+         intel_ds_device_add_queue(&device->ds, "%s%u",
+                                   queue_family_to_name(queue->family),
+                                   queue->index_in_family);
+   }
+}
+
+void
+anv_device_utrace_finish(struct anv_device *device)
+{
+   u_trace_context_process(&device->ds.trace_context, true);
+   intel_ds_device_fini(&device->ds);
+   anv_bo_pool_finish(&device->utrace_bo_pool);
+}
+
+enum intel_ds_stall_flag
+anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits)
+{
+   static const struct {
+      enum anv_pipe_bits anv;
+      enum intel_ds_stall_flag ds;
+   } anv_to_ds_flags[] = {
+      { .anv = ANV_PIPE_DEPTH_CACHE_FLUSH_BIT,            .ds = INTEL_DS_DEPTH_CACHE_FLUSH_BIT, },
+      { .anv = ANV_PIPE_DATA_CACHE_FLUSH_BIT,             .ds = INTEL_DS_DATA_CACHE_FLUSH_BIT, },
+      { .anv = ANV_PIPE_TILE_CACHE_FLUSH_BIT,             .ds = INTEL_DS_TILE_CACHE_FLUSH_BIT, },
+      { .anv = ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT,    .ds = INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT, },
+      { .anv = ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,       .ds = INTEL_DS_STATE_CACHE_INVALIDATE_BIT, },
+      { .anv = ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT,    .ds = INTEL_DS_CONST_CACHE_INVALIDATE_BIT, },
+      { .anv = ANV_PIPE_VF_CACHE_INVALIDATE_BIT,          .ds = INTEL_DS_VF_CACHE_INVALIDATE_BIT, },
+      { .anv = ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,     .ds = INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT, },
+      { .anv = ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_INST_CACHE_INVALIDATE_BIT, },
+      { .anv = ANV_PIPE_DEPTH_STALL_BIT,                  .ds = INTEL_DS_DEPTH_STALL_BIT, },
+      { .anv = ANV_PIPE_CS_STALL_BIT,                     .ds = INTEL_DS_CS_STALL_BIT, },
+      { .anv = ANV_PIPE_HDC_PIPELINE_FLUSH_BIT,           .ds = INTEL_DS_HDC_PIPELINE_FLUSH_BIT, },
+      { .anv = ANV_PIPE_STALL_AT_SCOREBOARD_BIT,          .ds = INTEL_DS_STALL_AT_SCOREBOARD_BIT, },
+      { .anv = ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT, .ds = INTEL_DS_UNTYPED_DATAPORT_CACHE_FLUSH_BIT, },
+   };
+
+   enum intel_ds_stall_flag ret = 0;
+   for (uint32_t i = 0; i < ARRAY_SIZE(anv_to_ds_flags); i++) {
+      if (anv_to_ds_flags[i].anv & bits)
+         ret |= anv_to_ds_flags[i].ds;
+   }
+
+   return ret;
+}
--- a/src/intel/vulkan_hasvk/anv_wsi.c
+++ b/src/intel/vulkan_hasvk/anv_wsi.c
@ -0,0 +1,118 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_private.h"
+#include "anv_measure.h"
+#include "wsi_common.h"
+#include "vk_fence.h"
+#include "vk_queue.h"
+#include "vk_semaphore.h"
+#include "vk_util.h"
+
+static PFN_vkVoidFunction
+anv_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName)
+{
+   ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
+   return vk_instance_get_proc_addr_unchecked(&pdevice->instance->vk, pName);
+}
+
+VkResult
+anv_init_wsi(struct anv_physical_device *physical_device)
+{
+   VkResult result;
+
+   result = wsi_device_init(&physical_device->wsi_device,
+                            anv_physical_device_to_handle(physical_device),
+                            anv_wsi_proc_addr,
+                            &physical_device->instance->vk.alloc,
+                            physical_device->master_fd,
+                            &physical_device->instance->dri_options,
+                            false);
+   if (result != VK_SUCCESS)
+      return result;
+
+   physical_device->wsi_device.supports_modifiers = true;
+   physical_device->wsi_device.signal_semaphore_with_memory = true;
+   physical_device->wsi_device.signal_fence_with_memory = true;
+
+   physical_device->vk.wsi_device = &physical_device->wsi_device;
+
+   wsi_device_setup_syncobj_fd(&physical_device->wsi_device,
+                               physical_device->local_fd);
+
+   return VK_SUCCESS;
+}
+
+void
+anv_finish_wsi(struct anv_physical_device *physical_device)
+{
+   physical_device->vk.wsi_device = NULL;
+   wsi_device_finish(&physical_device->wsi_device,
+                     &physical_device->instance->vk.alloc);
+}
+
+VkResult anv_AcquireNextImage2KHR(
+   VkDevice _device,
+   const VkAcquireNextImageInfoKHR *pAcquireInfo,
+   uint32_t *pImageIndex)
+{
+   VK_FROM_HANDLE(anv_device, device, _device);
+
+   VkResult result =
+      wsi_common_acquire_next_image2(&device->physical->wsi_device,
+                                     _device, pAcquireInfo, pImageIndex);
+   if (result == VK_SUCCESS)
+      anv_measure_acquire(device);
+
+   return result;
+}
+
+VkResult anv_QueuePresentKHR(
+    VkQueue                                  _queue,
+    const VkPresentInfoKHR*                  pPresentInfo)
+{
+   ANV_FROM_HANDLE(anv_queue, queue, _queue);
+   struct anv_device *device = queue->device;
+   VkResult result;
+
+   if (device->debug_frame_desc) {
+      device->debug_frame_desc->frame_id++;
+      if (device->physical->memory.need_clflush) {
+         intel_clflush_range(device->debug_frame_desc,
+                           sizeof(*device->debug_frame_desc));
+      }
+   }
+
+   result = vk_queue_wait_before_present(&queue->vk, pPresentInfo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   result = wsi_common_queue_present(&device->physical->wsi_device,
+                                     anv_device_to_handle(queue->device),
+                                     _queue, 0,
+                                     pPresentInfo);
+
+   u_trace_context_process(&device->ds.trace_context, true);
+
+   return result;
+}
--- a/src/intel/vulkan_hasvk/genX_blorp_exec.c
+++ b/src/intel/vulkan_hasvk/genX_blorp_exec.c
@ -0,0 +1,410 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+
+#include "anv_private.h"
+#include "anv_measure.h"
+
+/* These are defined in anv_private.h and blorp_genX_exec.h */
+#undef __gen_address_type
+#undef __gen_user_data
+#undef __gen_combine_address
+
+#include "common/intel_l3_config.h"
+#include "blorp/blorp_genX_exec.h"
+
+#include "ds/intel_tracepoints.h"
+
+static void blorp_measure_start(struct blorp_batch *_batch,
+                                const struct blorp_params *params)
+{
+   struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch;
+   trace_intel_begin_blorp(&cmd_buffer->trace);
+   anv_measure_snapshot(cmd_buffer,
+                        params->snapshot_type,
+                        NULL, 0);
+}
+
+static void blorp_measure_end(struct blorp_batch *_batch,
+                              const struct blorp_params *params)
+{
+   struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch;
+   trace_intel_end_blorp(&cmd_buffer->trace,
+                         params->x1 - params->x0,
+                         params->y1 - params->y0,
+                         params->hiz_op,
+                         params->fast_clear_op,
+                         params->shader_type,
+                         params->shader_pipeline);
+}
+
+static void *
+blorp_emit_dwords(struct blorp_batch *batch, unsigned n)
+{
+   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
+   return anv_batch_emit_dwords(&cmd_buffer->batch, n);
+}
+
+static uint64_t
+blorp_emit_reloc(struct blorp_batch *batch,
+                 void *location, struct blorp_address address, uint32_t delta)
+{
+   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
+   assert(cmd_buffer->batch.start <= location &&
+          location < cmd_buffer->batch.end);
+   return anv_batch_emit_reloc(&cmd_buffer->batch, location,
+                               address.buffer, address.offset + delta);
+}
+
+static void
+blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
+                    struct blorp_address address, uint32_t delta)
+{
+   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
+   VkResult result;
+
+   if (ANV_ALWAYS_SOFTPIN) {
+      result = anv_reloc_list_add_bo(&cmd_buffer->surface_relocs,
+                                     &cmd_buffer->vk.pool->alloc,
+                                     address.buffer);
+      if (unlikely(result != VK_SUCCESS))
+         anv_batch_set_error(&cmd_buffer->batch, result);
+      return;
+   }
+
+   uint64_t address_u64 = 0;
+   result = anv_reloc_list_add(&cmd_buffer->surface_relocs,
+                               &cmd_buffer->vk.pool->alloc,
+                               ss_offset, address.buffer,
+                               address.offset + delta,
+                               &address_u64);
+   if (result != VK_SUCCESS)
+      anv_batch_set_error(&cmd_buffer->batch, result);
+
+   void *dest = anv_block_pool_map(
+      &cmd_buffer->device->surface_state_pool.block_pool, ss_offset, 8);
+   write_reloc(cmd_buffer->device, dest, address_u64, false);
+}
+
+static uint64_t
+blorp_get_surface_address(struct blorp_batch *blorp_batch,
+                          struct blorp_address address)
+{
+   if (ANV_ALWAYS_SOFTPIN) {
+      struct anv_address anv_addr = {
+         .bo = address.buffer,
+         .offset = address.offset,
+      };
+      return anv_address_physical(anv_addr);
+   } else {
+      /* We'll let blorp_surface_reloc write the address. */
+      return 0;
+   }
+}
+
+#if GFX_VER >= 7 && GFX_VER < 10
+static struct blorp_address
+blorp_get_surface_base_address(struct blorp_batch *batch)
+{
+   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
+   return (struct blorp_address) {
+      .buffer = cmd_buffer->device->surface_state_pool.block_pool.bo,
+      .offset = 0,
+   };
+}
+#endif
+
+static void *
+blorp_alloc_dynamic_state(struct blorp_batch *batch,
+                          uint32_t size,
+                          uint32_t alignment,
+                          uint32_t *offset)
+{
+   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
+
+   struct anv_state state =
+      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment);
+
+   *offset = state.offset;
+   return state.map;
+}
+
+UNUSED static void *
+blorp_alloc_general_state(struct blorp_batch *batch,
+                          uint32_t size,
+                          uint32_t alignment,
+                          uint32_t *offset)
+{
+   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
+
+   struct anv_state state =
+      anv_state_stream_alloc(&cmd_buffer->general_state_stream, size,
+                             alignment);
+
+   *offset = state.offset;
+   return state.map;
+}
+
+static void
+blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries,
+                          unsigned state_size, unsigned state_alignment,
+                          uint32_t *bt_offset,
+                          uint32_t *surface_offsets, void **surface_maps)
+{
+   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
+
+   uint32_t state_offset;
+   struct anv_state bt_state;
+
+   VkResult result =
+      anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, num_entries,
+                                               &state_offset, &bt_state);
+   if (result != VK_SUCCESS)
+      return;
+
+   uint32_t *bt_map = bt_state.map;
+   *bt_offset = bt_state.offset;
+
+   for (unsigned i = 0; i < num_entries; i++) {
+      struct anv_state surface_state =
+         anv_cmd_buffer_alloc_surface_state(cmd_buffer);
+      bt_map[i] = surface_state.offset + state_offset;
+      surface_offsets[i] = surface_state.offset;
+      surface_maps[i] = surface_state.map;
+   }
+}
+
+static uint32_t
+blorp_binding_table_offset_to_pointer(struct blorp_batch *batch,
+                                      uint32_t offset)
+{
+   return offset;
+}
+
+static void *
+blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,
+                          struct blorp_address *addr)
+{
+   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
+   struct anv_state vb_state =
+      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 64);
+
+   *addr = (struct blorp_address) {
+      .buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
+      .offset = vb_state.offset,
+      .mocs = isl_mocs(&cmd_buffer->device->isl_dev,
+                       ISL_SURF_USAGE_VERTEX_BUFFER_BIT, false),
+   };
+
+   return vb_state.map;
+}
+
+static void
+blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch,
+                                           const struct blorp_address *addrs,
+                                           uint32_t *sizes,
+                                           unsigned num_vbs)
+{
+   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
+
+   for (unsigned i = 0; i < num_vbs; i++) {
+      struct anv_address anv_addr = {
+         .bo = addrs[i].buffer,
+         .offset = addrs[i].offset,
+      };
+      genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer,
+                                                     i, anv_addr, sizes[i]);
+   }
+
+   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
+   /* Technically, we should call this *after* 3DPRIMITIVE but it doesn't
+    * really matter for blorp because we never call apply_pipe_flushes after
+    * this point.
+    */
+   genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(cmd_buffer, SEQUENTIAL,
+                                                       (1 << num_vbs) - 1);
+}
+
+UNUSED static struct blorp_address
+blorp_get_workaround_address(struct blorp_batch *batch)
+{
+   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
+
+   return (struct blorp_address) {
+      .buffer = cmd_buffer->device->workaround_address.bo,
+      .offset = cmd_buffer->device->workaround_address.offset,
+   };
+}
+
+static void
+blorp_flush_range(struct blorp_batch *batch, void *start, size_t size)
+{
+   /* We don't need to flush states anymore, since everything will be snooped.
+    */
+}
+
+static const struct intel_l3_config *
+blorp_get_l3_config(struct blorp_batch *batch)
+{
+   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
+   return cmd_buffer->state.current_l3_config;
+}
+
+static void
+blorp_exec_on_render(struct blorp_batch *batch,
+                     const struct blorp_params *params)
+{
+   assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
+
+   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
+   assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT);
+
+   const unsigned scale = params->fast_clear_op ? UINT_MAX : 1;
+   genX(cmd_buffer_emit_hashing_mode)(cmd_buffer, params->x1 - params->x0,
+                                      params->y1 - params->y0, scale);
+
+#if GFX_VER >= 11
+   /* The PIPE_CONTROL command description says:
+    *
+    *    "Whenever a Binding Table Index (BTI) used by a Render Target Message
+    *     points to a different RENDER_SURFACE_STATE, SW must issue a Render
+    *     Target Cache Flush by enabling this bit. When render target flush
+    *     is set due to new association of BTI, PS Scoreboard Stall bit must
+    *     be set in this packet."
+    */
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
+                             ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
+                             "before blorp BTI change");
+#endif
+
+   if (params->depth.enabled &&
+       !(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL))
+      genX(cmd_buffer_emit_gfx12_depth_wa)(cmd_buffer, &params->depth.surf);
+
+   genX(flush_pipeline_select_3d)(cmd_buffer);
+
+   /* Apply any outstanding flushes in case pipeline select haven't. */
+   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
+   genX(cmd_buffer_emit_gfx7_depth_flush)(cmd_buffer);
+
+   /* BLORP doesn't do anything fancy with depth such as discards, so we want
+    * the PMA fix off.  Also, off is always the safe option.
+    */
+   genX(cmd_buffer_enable_pma_fix)(cmd_buffer, false);
+
+   blorp_exec(batch, params);
+
+#if GFX_VER >= 11
+   /* The PIPE_CONTROL command description says:
+    *
+    *    "Whenever a Binding Table Index (BTI) used by a Render Target Message
+    *     points to a different RENDER_SURFACE_STATE, SW must issue a Render
+    *     Target Cache Flush by enabling this bit. When render target flush
+    *     is set due to new association of BTI, PS Scoreboard Stall bit must
+    *     be set in this packet."
+    */
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
+                             ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
+                             "after blorp BTI change");
+#endif
+
+   /* Calculate state that does not get touched by blorp.
+    * Flush everything else.
+    */
+   anv_cmd_dirty_mask_t dirty = ~(ANV_CMD_DIRTY_INDEX_BUFFER |
+                                  ANV_CMD_DIRTY_XFB_ENABLE);
+
+   BITSET_DECLARE(dyn_dirty, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX);
+   BITSET_ONES(dyn_dirty);
+   BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE);
+   BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_VP_SCISSOR_COUNT);
+   BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_VP_SCISSORS);
+   BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE);
+   BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_FSR);
+   BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS);
+   if (!params->wm_prog_data) {
+      BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES);
+      BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP);
+   }
+
+   cmd_buffer->state.gfx.vb_dirty = ~0;
+   cmd_buffer->state.gfx.dirty |= dirty;
+   BITSET_OR(cmd_buffer->vk.dynamic_graphics_state.dirty,
+             cmd_buffer->vk.dynamic_graphics_state.dirty, dyn_dirty);
+   cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS;
+}
+
+static void
+blorp_exec_on_compute(struct blorp_batch *batch,
+                      const struct blorp_params *params)
+{
+   assert(batch->flags & BLORP_BATCH_USE_COMPUTE);
+
+   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
+   assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_COMPUTE_BIT);
+
+   genX(flush_pipeline_select_gpgpu)(cmd_buffer);
+
+   /* Apply any outstanding flushes in case pipeline select haven't. */
+   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
+   blorp_exec(batch, params);
+
+   cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
+}
+
+void
+genX(blorp_exec)(struct blorp_batch *batch,
+                 const struct blorp_params *params)
+{
+   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
+
+   if (!cmd_buffer->state.current_l3_config) {
+      const struct intel_l3_config *cfg =
+         intel_get_default_l3_config(cmd_buffer->device->info);
+      genX(cmd_buffer_config_l3)(cmd_buffer, cfg);
+   }
+
+#if GFX_VER == 7
+   /* The MI_LOAD/STORE_REGISTER_MEM commands which BLORP uses to implement
+    * indirect fast-clear colors can cause GPU hangs if we don't stall first.
+    * See genX(cmd_buffer_mi_memcpy) for more details.
+    */
+   if (params->src.clear_color_addr.buffer ||
+       params->dst.clear_color_addr.buffer) {
+      anv_add_pending_pipe_bits(cmd_buffer,
+                                ANV_PIPE_CS_STALL_BIT,
+                                "before blorp prep fast clear");
+   }
+#endif
+
+   if (batch->flags & BLORP_BATCH_USE_COMPUTE)
+      blorp_exec_on_compute(batch, params);
+   else
+      blorp_exec_on_render(batch, params);
+}
--- a/src/intel/vulkan_hasvk/genX_cmd_buffer.c
+++ b/src/intel/vulkan_hasvk/genX_cmd_buffer.c
--- a/src/intel/vulkan_hasvk/genX_gpu_memcpy.c
+++ b/src/intel/vulkan_hasvk/genX_gpu_memcpy.c
@ -0,0 +1,324 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_private.h"
+
+#include "genxml/gen_macros.h"
+#include "genxml/genX_pack.h"
+
+#include "common/intel_l3_config.h"
+
+/**
+ * This file implements some lightweight memcpy/memset operations on the GPU
+ * using a vertex buffer and streamout.
+ */
+
+/**
+ * Returns the greatest common divisor of a and b that is a power of two.
+ */
+static uint64_t
+gcd_pow2_u64(uint64_t a, uint64_t b)
+{
+   assert(a > 0 || b > 0);
+
+   unsigned a_log2 = ffsll(a) - 1;
+   unsigned b_log2 = ffsll(b) - 1;
+
+   /* If either a or b is 0, then a_log2 or b_log2 will be UINT_MAX in which
+    * case, the MIN2() will take the other one.  If both are 0 then we will
+    * hit the assert above.
+    */
+   return 1 << MIN2(a_log2, b_log2);
+}
+
+static void
+emit_common_so_memcpy(struct anv_batch *batch, struct anv_device *device,
+                      const struct intel_l3_config *l3_config)
+{
+#if GFX_VER >= 8
+   anv_batch_emit(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
+      vfi.InstancingEnable = false;
+      vfi.VertexElementIndex = 0;
+   }
+   anv_batch_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs);
+#endif
+
+   /* Disable all shader stages */
+   anv_batch_emit(batch, GENX(3DSTATE_VS), vs);
+   anv_batch_emit(batch, GENX(3DSTATE_HS), hs);
+   anv_batch_emit(batch, GENX(3DSTATE_TE), te);
+   anv_batch_emit(batch, GENX(3DSTATE_DS), DS);
+   anv_batch_emit(batch, GENX(3DSTATE_GS), gs);
+   anv_batch_emit(batch, GENX(3DSTATE_PS), gs);
+
+   anv_batch_emit(batch, GENX(3DSTATE_SBE), sbe) {
+      sbe.VertexURBEntryReadOffset = 1;
+      sbe.NumberofSFOutputAttributes = 1;
+      sbe.VertexURBEntryReadLength = 1;
+#if GFX_VER >= 8
+      sbe.ForceVertexURBEntryReadLength = true;
+      sbe.ForceVertexURBEntryReadOffset = true;
+#endif
+
+#if GFX_VER >= 9
+      for (unsigned i = 0; i < 32; i++)
+         sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
+#endif
+   }
+
+   /* Emit URB setup.  We tell it that the VS is active because we want it to
+    * allocate space for the VS.  Even though one isn't run, we need VUEs to
+    * store the data that VF is going to pass to SOL.
+    */
+   const unsigned entry_size[4] = { DIV_ROUND_UP(32, 64), 1, 1, 1 };
+
+   genX(emit_urb_setup)(device, batch, l3_config,
+                        VK_SHADER_STAGE_VERTEX_BIT, entry_size, NULL);
+
+#if GFX_VER >= 12
+   /* Disable Primitive Replication. */
+   anv_batch_emit(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
+#endif
+
+#if GFX_VER >= 8
+   anv_batch_emit(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
+      topo.PrimitiveTopologyType = _3DPRIM_POINTLIST;
+   }
+#endif
+
+   anv_batch_emit(batch, GENX(3DSTATE_VF_STATISTICS), vf) {
+      vf.StatisticsEnable = false;
+   }
+}
+
+static void
+emit_so_memcpy(struct anv_batch *batch, struct anv_device *device,
+               struct anv_address dst, struct anv_address src,
+               uint32_t size)
+{
+   /* The maximum copy block size is 4 32-bit components at a time. */
+   assert(size % 4 == 0);
+   unsigned bs = gcd_pow2_u64(16, size);
+
+   enum isl_format format;
+   switch (bs) {
+   case 4:  format = ISL_FORMAT_R32_UINT;          break;
+   case 8:  format = ISL_FORMAT_R32G32_UINT;       break;
+   case 16: format = ISL_FORMAT_R32G32B32A32_UINT; break;
+   default:
+      unreachable("Invalid size");
+   }
+
+   uint32_t *dw;
+   dw = anv_batch_emitn(batch, 5, GENX(3DSTATE_VERTEX_BUFFERS));
+   GENX(VERTEX_BUFFER_STATE_pack)(batch, dw + 1,
+      &(struct GENX(VERTEX_BUFFER_STATE)) {
+         .VertexBufferIndex = 32, /* Reserved for this */
+         .AddressModifyEnable = true,
+         .BufferStartingAddress = src,
+         .BufferPitch = bs,
+         .MOCS = anv_mocs(device, src.bo, 0),
+#if GFX_VER >= 12
+         .L3BypassDisable = true,
+#endif
+#if (GFX_VER >= 8)
+         .BufferSize = size,
+#else
+         .EndAddress = anv_address_add(src, size - 1),
+#endif
+      });
+
+   dw = anv_batch_emitn(batch, 3, GENX(3DSTATE_VERTEX_ELEMENTS));
+   GENX(VERTEX_ELEMENT_STATE_pack)(batch, dw + 1,
+      &(struct GENX(VERTEX_ELEMENT_STATE)) {
+         .VertexBufferIndex = 32,
+         .Valid = true,
+         .SourceElementFormat = format,
+         .SourceElementOffset = 0,
+         .Component0Control = (bs >= 4) ? VFCOMP_STORE_SRC : VFCOMP_STORE_0,
+         .Component1Control = (bs >= 8) ? VFCOMP_STORE_SRC : VFCOMP_STORE_0,
+         .Component2Control = (bs >= 12) ? VFCOMP_STORE_SRC : VFCOMP_STORE_0,
+         .Component3Control = (bs >= 16) ? VFCOMP_STORE_SRC : VFCOMP_STORE_0,
+      });
+
+
+   anv_batch_emit(batch, GENX(3DSTATE_SO_BUFFER), sob) {
+#if GFX_VER < 12
+      sob.SOBufferIndex = 0;
+#else
+      sob._3DCommandOpcode = 0;
+      sob._3DCommandSubOpcode = SO_BUFFER_INDEX_0_CMD;
+#endif
+      sob.MOCS = anv_mocs(device, dst.bo, 0),
+      sob.SurfaceBaseAddress = dst;
+
+#if GFX_VER >= 8
+      sob.SOBufferEnable = true;
+      sob.SurfaceSize = size / 4 - 1;
+#else
+      sob.SurfacePitch = bs;
+      sob.SurfaceEndAddress = anv_address_add(dst, size);
+#endif
+
+#if GFX_VER >= 8
+      /* As SOL writes out data, it updates the SO_WRITE_OFFSET registers with
+       * the end position of the stream.  We need to reset this value to 0 at
+       * the beginning of the run or else SOL will start at the offset from
+       * the previous draw.
+       */
+      sob.StreamOffsetWriteEnable = true;
+      sob.StreamOffset = 0;
+#endif
+   }
+
+#if GFX_VER <= 7
+   /* The hardware can do this for us on BDW+ (see above) */
+   anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), load) {
+      load.RegisterOffset = GENX(SO_WRITE_OFFSET0_num);
+      load.DataDWord = 0;
+   }
+#endif
+
+   dw = anv_batch_emitn(batch, 5, GENX(3DSTATE_SO_DECL_LIST),
+                        .StreamtoBufferSelects0 = (1 << 0),
+                        .NumEntries0 = 1);
+   GENX(SO_DECL_ENTRY_pack)(batch, dw + 3,
+      &(struct GENX(SO_DECL_ENTRY)) {
+         .Stream0Decl = {
+            .OutputBufferSlot = 0,
+            .RegisterIndex = 0,
+            .ComponentMask = (1 << (bs / 4)) - 1,
+         },
+      });
+
+   anv_batch_emit(batch, GENX(3DSTATE_STREAMOUT), so) {
+      so.SOFunctionEnable = true;
+      so.RenderingDisable = true;
+      so.Stream0VertexReadOffset = 0;
+      so.Stream0VertexReadLength = DIV_ROUND_UP(32, 64);
+#if GFX_VER >= 8
+      so.Buffer0SurfacePitch = bs;
+#else
+      so.SOBufferEnable0 = true;
+#endif
+   }
+
+   anv_batch_emit(batch, GENX(3DPRIMITIVE), prim) {
+      prim.VertexAccessType         = SEQUENTIAL;
+      prim.PrimitiveTopologyType    = _3DPRIM_POINTLIST;
+      prim.VertexCountPerInstance   = size / bs;
+      prim.StartVertexLocation      = 0;
+      prim.InstanceCount            = 1;
+      prim.StartInstanceLocation    = 0;
+      prim.BaseVertexLocation       = 0;
+   }
+}
+
+void
+genX(emit_so_memcpy_init)(struct anv_memcpy_state *state,
+                          struct anv_device *device,
+                          struct anv_batch *batch)
+{
+   memset(state, 0, sizeof(*state));
+
+   state->batch = batch;
+   state->device = device;
+
+   const struct intel_l3_config *cfg = intel_get_default_l3_config(device->info);
+   genX(emit_l3_config)(batch, device, cfg);
+
+   anv_batch_emit(batch, GENX(PIPELINE_SELECT), ps) {
+#if GFX_VER >= 9
+      ps.MaskBits = GFX_VER >= 12 ? 0x13 : 3;
+      ps.MediaSamplerDOPClockGateEnable = GFX_VER >= 12;
+#endif
+      ps.PipelineSelection = _3D;
+   }
+
+   emit_common_so_memcpy(batch, device, cfg);
+}
+
+void
+genX(emit_so_memcpy_fini)(struct anv_memcpy_state *state)
+{
+   genX(emit_apply_pipe_flushes)(state->batch, state->device, _3D,
+                                 ANV_PIPE_END_OF_PIPE_SYNC_BIT);
+
+   anv_batch_emit(state->batch, GENX(MI_BATCH_BUFFER_END), end);
+
+   if ((state->batch->next - state->batch->start) & 4)
+      anv_batch_emit(state->batch, GENX(MI_NOOP), noop);
+}
+
+void
+genX(emit_so_memcpy)(struct anv_memcpy_state *state,
+                     struct anv_address dst, struct anv_address src,
+                     uint32_t size)
+{
+   if (GFX_VER >= 8 && GFX_VER <= 9 &&
+       !anv_use_relocations(state->device->physical) &&
+       anv_gfx8_9_vb_cache_range_needs_workaround(&state->vb_bound,
+                                                  &state->vb_dirty,
+                                                  src, size)) {
+      genX(emit_apply_pipe_flushes)(state->batch, state->device, _3D,
+                                    ANV_PIPE_CS_STALL_BIT |
+                                    ANV_PIPE_VF_CACHE_INVALIDATE_BIT);
+      memset(&state->vb_dirty, 0, sizeof(state->vb_dirty));
+   }
+
+   emit_so_memcpy(state->batch, state->device, dst, src, size);
+}
+
+void
+genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
+                           struct anv_address dst, struct anv_address src,
+                           uint32_t size)
+{
+   if (size == 0)
+      return;
+
+   if (!cmd_buffer->state.current_l3_config) {
+      const struct intel_l3_config *cfg =
+         intel_get_default_l3_config(cmd_buffer->device->info);
+      genX(cmd_buffer_config_l3)(cmd_buffer, cfg);
+   }
+
+   genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, 32, src, size);
+   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
+   genX(flush_pipeline_select_3d)(cmd_buffer);
+
+   emit_common_so_memcpy(&cmd_buffer->batch, cmd_buffer->device,
+                         cmd_buffer->state.current_l3_config);
+   emit_so_memcpy(&cmd_buffer->batch, cmd_buffer->device, dst, src, size);
+
+   genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(cmd_buffer, SEQUENTIAL,
+                                                       1ull << 32);
+
+   /* Invalidate pipeline & raster discard since we touch
+    * 3DSTATE_STREAMOUT.
+    */
+   cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_PIPELINE;
+   BITSET_SET(cmd_buffer->vk.dynamic_graphics_state.dirty,
+              MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE);
+}
--- a/src/intel/vulkan_hasvk/genX_pipeline.c
+++ b/src/intel/vulkan_hasvk/genX_pipeline.c
--- a/src/intel/vulkan_hasvk/genX_query.c
+++ b/src/intel/vulkan_hasvk/genX_query.c
--- a/src/intel/vulkan_hasvk/genX_state.c
+++ b/src/intel/vulkan_hasvk/genX_state.c
--- a/src/intel/vulkan_hasvk/gfx7_cmd_buffer.c
+++ b/src/intel/vulkan_hasvk/gfx7_cmd_buffer.c
@ -0,0 +1,314 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "anv_private.h"
+#include "vk_format.h"
+
+#include "genxml/gen_macros.h"
+#include "genxml/genX_pack.h"
+
+static uint32_t
+get_depth_format(struct anv_cmd_buffer *cmd_buffer)
+{
+   struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
+
+   switch (gfx->depth_att.vk_format) {
+   case VK_FORMAT_D16_UNORM:
+   case VK_FORMAT_D16_UNORM_S8_UINT:
+      return D16_UNORM;
+
+   case VK_FORMAT_X8_D24_UNORM_PACK32:
+   case VK_FORMAT_D24_UNORM_S8_UINT:
+      return D24_UNORM_X8_UINT;
+
+   case VK_FORMAT_D32_SFLOAT:
+   case VK_FORMAT_D32_SFLOAT_S8_UINT:
+      return D32_FLOAT;
+
+   default:
+      return D16_UNORM;
+   }
+}
+
+void
+genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
+{
+   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+   const struct vk_dynamic_graphics_state *dyn =
+      &cmd_buffer->vk.dynamic_graphics_state;
+
+   if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
+                                       ANV_CMD_DIRTY_RENDER_TARGETS)) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CULL_MODE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_FRONT_FACE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_ENABLE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH)) {
+      /* Take dynamic primitive topology in to account with
+       *    3DSTATE_SF::MultisampleRasterizationMode
+       */
+      VkPolygonMode dynamic_raster_mode =
+         genX(raster_polygon_mode)(cmd_buffer->state.gfx.pipeline,
+                                   dyn->ia.primitive_topology);
+      uint32_t ms_rast_mode =
+         genX(ms_rasterization_mode)(pipeline, dynamic_raster_mode);
+
+      bool aa_enable = anv_rasterization_aa_mode(dynamic_raster_mode,
+                                                 pipeline->line_mode);
+
+      uint32_t sf_dw[GENX(3DSTATE_SF_length)];
+      struct GENX(3DSTATE_SF) sf = {
+         GENX(3DSTATE_SF_header),
+         .DepthBufferSurfaceFormat = get_depth_format(cmd_buffer),
+         .LineWidth = dyn->rs.line.width,
+         .AntialiasingEnable = aa_enable,
+         .CullMode     = genX(vk_to_intel_cullmode)[dyn->rs.cull_mode],
+         .FrontWinding = genX(vk_to_intel_front_face)[dyn->rs.front_face],
+         .MultisampleRasterizationMode = ms_rast_mode,
+         .GlobalDepthOffsetEnableSolid       = dyn->rs.depth_bias.enable,
+         .GlobalDepthOffsetEnableWireframe   = dyn->rs.depth_bias.enable,
+         .GlobalDepthOffsetEnablePoint       = dyn->rs.depth_bias.enable,
+         .GlobalDepthOffsetConstant          = dyn->rs.depth_bias.constant,
+         .GlobalDepthOffsetScale             = dyn->rs.depth_bias.slope,
+         .GlobalDepthOffsetClamp             = dyn->rs.depth_bias.clamp,
+      };
+      GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
+
+      anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gfx7.sf);
+   }
+
+   if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
+      struct anv_state cc_state =
+         anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
+                                            GENX(COLOR_CALC_STATE_length) * 4,
+                                            64);
+      struct GENX(COLOR_CALC_STATE) cc = {
+         .BlendConstantColorRed = dyn->cb.blend_constants[0],
+         .BlendConstantColorGreen = dyn->cb.blend_constants[1],
+         .BlendConstantColorBlue = dyn->cb.blend_constants[2],
+         .BlendConstantColorAlpha = dyn->cb.blend_constants[3],
+         .StencilReferenceValue = dyn->ds.stencil.front.reference & 0xff,
+         .BackfaceStencilReferenceValue = dyn->ds.stencil.back.reference & 0xff,
+      };
+      GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
+
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
+         ccp.ColorCalcStatePointer = cc_state.offset;
+      }
+   }
+
+   if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE)) {
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_LINE_STIPPLE), ls) {
+         ls.LineStipplePattern = dyn->rs.line.stipple.pattern;
+         ls.LineStippleInverseRepeatCount =
+            1.0f / MAX2(1, dyn->rs.line.stipple.factor);
+         ls.LineStippleRepeatCount = dyn->rs.line.stipple.factor;
+      }
+   }
+
+   if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
+                                       ANV_CMD_DIRTY_RENDER_TARGETS)) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK)) {
+      uint32_t depth_stencil_dw[GENX(DEPTH_STENCIL_STATE_length)];
+
+      VkImageAspectFlags ds_aspects = 0;
+      if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED)
+         ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
+      if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED)
+         ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
+
+      struct vk_depth_stencil_state opt_ds = dyn->ds;
+      vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
+
+      struct GENX(DEPTH_STENCIL_STATE) depth_stencil = {
+         .DoubleSidedStencilEnable = true,
+
+         .StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff,
+         .StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff,
+
+         .BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff,
+         .BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff,
+
+         .DepthTestEnable = opt_ds.depth.test_enable,
+         .DepthBufferWriteEnable = opt_ds.depth.write_enable,
+         .DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op],
+         .StencilTestEnable = opt_ds.stencil.test_enable,
+         .StencilBufferWriteEnable = opt_ds.stencil.write_enable,
+         .StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail],
+         .StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass],
+         .StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail],
+         .StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare],
+         .BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail],
+         .BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass],
+         .BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail],
+         .BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare],
+      };
+      GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil);
+
+      struct anv_state ds_state =
+         anv_cmd_buffer_emit_dynamic(cmd_buffer, depth_stencil_dw,
+                                     sizeof(depth_stencil_dw), 64);
+
+      anv_batch_emit(&cmd_buffer->batch,
+                     GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), dsp) {
+         dsp.PointertoDEPTH_STENCIL_STATE = ds_state.offset;
+      }
+   }
+
+   if (cmd_buffer->state.gfx.index_buffer &&
+       ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
+                                        ANV_CMD_DIRTY_INDEX_BUFFER)) ||
+        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE))) {
+      struct anv_buffer *buffer = cmd_buffer->state.gfx.index_buffer;
+      uint32_t offset = cmd_buffer->state.gfx.index_offset;
+
+#if GFX_VERx10 == 75
+      anv_batch_emit(&cmd_buffer->batch, GFX75_3DSTATE_VF, vf) {
+         vf.IndexedDrawCutIndexEnable  = dyn->ia.primitive_restart_enable;
+         vf.CutIndex                   = cmd_buffer->state.gfx.restart_index;
+      }
+#endif
+
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
+#if GFX_VERx10 != 75
+         ib.CutIndexEnable        = dyn->ia.primitive_restart_enable;
+#endif
+         ib.IndexFormat           = cmd_buffer->state.gfx.index_type;
+         ib.MOCS                  = anv_mocs(cmd_buffer->device,
+                                             buffer->address.bo,
+                                             ISL_SURF_USAGE_INDEX_BUFFER_BIT);
+
+         ib.BufferStartingAddress = anv_address_add(buffer->address, offset);
+         ib.BufferEndingAddress   = anv_address_add(buffer->address,
+                                                    buffer->vk.size);
+      }
+   }
+
+   /* 3DSTATE_WM in the hope we can avoid spawning fragment shaders
+    * threads or if we have dirty dynamic primitive topology state and
+    * need to toggle 3DSTATE_WM::MultisampleRasterizationMode dynamically.
+    */
+   if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
+      VkPolygonMode dynamic_raster_mode =
+         genX(raster_polygon_mode)(cmd_buffer->state.gfx.pipeline,
+                                   dyn->ia.primitive_topology);
+
+      uint32_t dwords[GENX(3DSTATE_WM_length)];
+      struct GENX(3DSTATE_WM) wm = {
+         GENX(3DSTATE_WM_header),
+
+         .ThreadDispatchEnable = anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) &&
+                                 (pipeline->force_fragment_thread_dispatch ||
+                                  !anv_cmd_buffer_all_color_write_masked(cmd_buffer)),
+         .MultisampleRasterizationMode =
+                                 genX(ms_rasterization_mode)(pipeline,
+                                                             dynamic_raster_mode),
+      };
+      GENX(3DSTATE_WM_pack)(NULL, dwords, &wm);
+
+      anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx7.wm);
+   }
+
+   if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS)) {
+      const uint32_t samples = MAX2(1, cmd_buffer->state.gfx.samples);
+      const struct vk_sample_locations_state *sl = dyn->ms.sample_locations;
+      genX(emit_multisample)(&cmd_buffer->batch, samples,
+                             sl->per_pixel == samples ? sl : NULL);
+   }
+
+   if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
+      const uint8_t color_writes = dyn->cb.color_write_enables;
+
+      /* Blend states of each RT */
+      uint32_t blend_dws[GENX(BLEND_STATE_length) +
+                         MAX_RTS * GENX(BLEND_STATE_ENTRY_length)];
+      uint32_t *dws = blend_dws;
+      memset(blend_dws, 0, sizeof(blend_dws));
+
+      /* Skip this part */
+      dws += GENX(BLEND_STATE_length);
+
+      for (uint32_t i = 0; i < MAX_RTS; i++) {
+         /* Disable anything above the current number of color attachments. */
+         bool write_disabled = i >= cmd_buffer->state.gfx.color_att_count ||
+                               (color_writes & BITFIELD_BIT(i)) == 0;
+         struct GENX(BLEND_STATE_ENTRY) entry = {
+            .WriteDisableAlpha = write_disabled ||
+                                 (pipeline->color_comp_writes[i] &
+                                  VK_COLOR_COMPONENT_A_BIT) == 0,
+            .WriteDisableRed   = write_disabled ||
+                                 (pipeline->color_comp_writes[i] &
+                                  VK_COLOR_COMPONENT_R_BIT) == 0,
+            .WriteDisableGreen = write_disabled ||
+                                 (pipeline->color_comp_writes[i] &
+                                  VK_COLOR_COMPONENT_G_BIT) == 0,
+            .WriteDisableBlue  = write_disabled ||
+                                 (pipeline->color_comp_writes[i] &
+                                  VK_COLOR_COMPONENT_B_BIT) == 0,
+            .LogicOpFunction   = genX(vk_to_intel_logic_op)[dyn->cb.logic_op],
+         };
+         GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry);
+         dws += GENX(BLEND_STATE_ENTRY_length);
+      }
+
+      uint32_t num_dwords = GENX(BLEND_STATE_length) +
+         GENX(BLEND_STATE_ENTRY_length) * MAX_RTS;
+
+      struct anv_state blend_states =
+         anv_cmd_buffer_merge_dynamic(cmd_buffer, blend_dws,
+                                      pipeline->gfx7.blend_state, num_dwords, 64);
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
+         bsp.BlendStatePointer      = blend_states.offset;
+      }
+   }
+
+   /* When we're done, there is no more dirty gfx state. */
+   vk_dynamic_graphics_state_clear_dirty(&cmd_buffer->vk.dynamic_graphics_state);
+   cmd_buffer->state.gfx.dirty = 0;
+}
+
+void
+genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer,
+                                bool enable)
+{
+   /* The NP PMA fix doesn't exist on gfx7 */
+}
--- a/src/intel/vulkan_hasvk/gfx8_cmd_buffer.c
+++ b/src/intel/vulkan_hasvk/gfx8_cmd_buffer.c
@ -0,0 +1,706 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "anv_private.h"
+
+#include "genxml/gen_macros.h"
+#include "genxml/genX_pack.h"
+
+void
+genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
+{
+   if (cmd_buffer->state.pma_fix_enabled == enable)
+      return;
+
+   cmd_buffer->state.pma_fix_enabled = enable;
+
+   /* According to the Broadwell PIPE_CONTROL documentation, software should
+    * emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set
+    * prior to the LRI.  If stencil buffer writes are enabled, then a Render
+    * Cache Flush is also necessary.
+    *
+    * The Skylake docs say to use a depth stall rather than a command
+    * streamer stall.  However, the hardware seems to violently disagree.
+    * A full command streamer stall seems to be needed in both cases.
+    */
+   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+      pc.DepthCacheFlushEnable = true;
+      pc.CommandStreamerStallEnable = true;
+      pc.RenderTargetCacheFlushEnable = true;
+#if GFX_VER >= 12
+      pc.TileCacheFlushEnable = true;
+
+      /* Wa_1409600907: "PIPE_CONTROL with Depth Stall Enable bit must
+       * be set with any PIPE_CONTROL with Depth Flush Enable bit set.
+       */
+      pc.DepthStallEnable = true;
+#endif
+   }
+
+#if GFX_VER == 9
+
+   uint32_t cache_mode;
+   anv_pack_struct(&cache_mode, GENX(CACHE_MODE_0),
+                   .STCPMAOptimizationEnable = enable,
+                   .STCPMAOptimizationEnableMask = true);
+   anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
+      lri.RegisterOffset   = GENX(CACHE_MODE_0_num);
+      lri.DataDWord        = cache_mode;
+   }
+
+#elif GFX_VER == 8
+
+   uint32_t cache_mode;
+   anv_pack_struct(&cache_mode, GENX(CACHE_MODE_1),
+                   .NPPMAFixEnable = enable,
+                   .NPEarlyZFailsDisable = enable,
+                   .NPPMAFixEnableMask = true,
+                   .NPEarlyZFailsDisableMask = true);
+   anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
+      lri.RegisterOffset   = GENX(CACHE_MODE_1_num);
+      lri.DataDWord        = cache_mode;
+   }
+
+#endif /* GFX_VER == 8 */
+
+   /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
+    * Flush bits is often necessary.  We do it regardless because it's easier.
+    * The render cache flush is also necessary if stencil writes are enabled.
+    *
+    * Again, the Skylake docs give a different set of flushes but the BDW
+    * flushes seem to work just as well.
+    */
+   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+      pc.DepthStallEnable = true;
+      pc.DepthCacheFlushEnable = true;
+      pc.RenderTargetCacheFlushEnable = true;
+#if GFX_VER >= 12
+      pc.TileCacheFlushEnable = true;
+#endif
+   }
+}
+
+UNUSED static bool
+want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer,
+                   const struct vk_depth_stencil_state *ds)
+{
+   assert(GFX_VER == 8);
+
+   /* From the Broadwell PRM Vol. 2c CACHE_MODE_1::NP_PMA_FIX_ENABLE:
+    *
+    *    SW must set this bit in order to enable this fix when following
+    *    expression is TRUE.
+    *
+    *    3DSTATE_WM::ForceThreadDispatch != 1 &&
+    *    !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
+    *    (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
+    *    (3DSTATE_DEPTH_BUFFER::HIZ Enable) &&
+    *    !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) &&
+    *    (3DSTATE_PS_EXTRA::PixelShaderValid) &&
+    *    !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
+    *      3DSTATE_WM_HZ_OP::DepthBufferResolve ||
+    *      3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
+    *      3DSTATE_WM_HZ_OP::StencilBufferClear) &&
+    *    (3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable) &&
+    *    (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
+    *       3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
+    *       3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
+    *       3DSTATE_PS_BLEND::AlphaTestEnable ||
+    *       3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) &&
+    *      3DSTATE_WM::ForceKillPix != ForceOff &&
+    *      ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
+    *        3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) ||
+    *       (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
+    *        3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
+    *        3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||
+    *     (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
+    */
+
+   /* These are always true:
+    *    3DSTATE_WM::ForceThreadDispatch != 1 &&
+    *    !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
+    */
+
+   /* We only enable the PMA fix if we know for certain that HiZ is enabled.
+    * If we don't know whether HiZ is enabled or not, we disable the PMA fix
+    * and there is no harm.
+    *
+    * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
+    * 3DSTATE_DEPTH_BUFFER::HIZ Enable
+    */
+   if (!cmd_buffer->state.hiz_enabled)
+      return false;
+
+   /* 3DSTATE_PS_EXTRA::PixelShaderValid */
+   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+   if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
+      return false;
+
+   /* !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) */
+   const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
+   if (wm_prog_data->early_fragment_tests)
+      return false;
+
+   /* We never use anv_pipeline for HiZ ops so this is trivially true:
+    *    !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
+    *      3DSTATE_WM_HZ_OP::DepthBufferResolve ||
+    *      3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
+    *      3DSTATE_WM_HZ_OP::StencilBufferClear)
+    */
+
+   /* 3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable */
+   if (!ds->depth.test_enable)
+      return false;
+
+   /* (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
+    *    3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
+    *    3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
+    *    3DSTATE_PS_BLEND::AlphaTestEnable ||
+    *    3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) &&
+    *   3DSTATE_WM::ForceKillPix != ForceOff &&
+    *   ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
+    *     3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) ||
+    *    (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
+    *     3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
+    *     3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||
+    *  (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
+    */
+   return (pipeline->kill_pixel && (ds->depth.write_enable ||
+                                    ds->stencil.write_enable)) ||
+          wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
+}
+
+UNUSED static bool
+want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer,
+                     const struct vk_depth_stencil_state *ds)
+{
+   if (GFX_VER > 9)
+      return false;
+   assert(GFX_VER == 9);
+
+   /* From the Skylake PRM Vol. 2c CACHE_MODE_1::STC PMA Optimization Enable:
+    *
+    *    Clearing this bit will force the STC cache to wait for pending
+    *    retirement of pixels at the HZ-read stage and do the STC-test for
+    *    Non-promoted, R-computed and Computed depth modes instead of
+    *    postponing the STC-test to RCPFE.
+    *
+    *    STC_TEST_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
+    *                  3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
+    *
+    *    STC_WRITE_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
+    *                   (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
+    *                    3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
+    *
+    *    COMP_STC_EN = STC_TEST_EN &&
+    *                  3DSTATE_PS_EXTRA::PixelShaderComputesStencil
+    *
+    *    SW parses the pipeline states to generate the following logical
+    *    signal indicating if PMA FIX can be enabled.
+    *
+    *    STC_PMA_OPT =
+    *       3DSTATE_WM::ForceThreadDispatch != 1 &&
+    *       !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
+    *       3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
+    *       3DSTATE_DEPTH_BUFFER::HIZ Enable &&
+    *       !(3DSTATE_WM::EDSC_Mode == 2) &&
+    *       3DSTATE_PS_EXTRA::PixelShaderValid &&
+    *       !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
+    *         3DSTATE_WM_HZ_OP::DepthBufferResolve ||
+    *         3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
+    *         3DSTATE_WM_HZ_OP::StencilBufferClear) &&
+    *       (COMP_STC_EN || STC_WRITE_EN) &&
+    *       ((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
+    *         3DSTATE_WM::ForceKillPix == ON ||
+    *         3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
+    *         3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
+    *         3DSTATE_PS_BLEND::AlphaTestEnable ||
+    *         3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
+    *        (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
+    */
+
+   /* These are always true:
+    *    3DSTATE_WM::ForceThreadDispatch != 1 &&
+    *    !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
+    */
+
+   /* We only enable the PMA fix if we know for certain that HiZ is enabled.
+    * If we don't know whether HiZ is enabled or not, we disable the PMA fix
+    * and there is no harm.
+    *
+    * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
+    * 3DSTATE_DEPTH_BUFFER::HIZ Enable
+    */
+   if (!cmd_buffer->state.hiz_enabled)
+      return false;
+
+   /* We can't possibly know if HiZ is enabled without the depth attachment */
+   ASSERTED const struct anv_image_view *d_iview =
+      cmd_buffer->state.gfx.depth_att.iview;
+   assert(d_iview && d_iview->image->planes[0].aux_usage == ISL_AUX_USAGE_HIZ);
+
+   /* 3DSTATE_PS_EXTRA::PixelShaderValid */
+   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+   if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
+      return false;
+
+   /* !(3DSTATE_WM::EDSC_Mode == 2) */
+   const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
+   if (wm_prog_data->early_fragment_tests)
+      return false;
+
+   /* We never use anv_pipeline for HiZ ops so this is trivially true:
+   *    !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
+    *      3DSTATE_WM_HZ_OP::DepthBufferResolve ||
+    *      3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
+    *      3DSTATE_WM_HZ_OP::StencilBufferClear)
+    */
+
+   /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
+    * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
+    */
+   const bool stc_test_en = ds->stencil.test_enable;
+
+   /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
+    * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
+    *  3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
+    */
+   const bool stc_write_en = ds->stencil.write_enable;
+
+   /* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */
+   const bool comp_stc_en = stc_test_en && wm_prog_data->computed_stencil;
+
+   /* COMP_STC_EN || STC_WRITE_EN */
+   if (!(comp_stc_en || stc_write_en))
+      return false;
+
+   /* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
+    *  3DSTATE_WM::ForceKillPix == ON ||
+    *  3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
+    *  3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
+    *  3DSTATE_PS_BLEND::AlphaTestEnable ||
+    *  3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
+    * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF)
+    */
+   return pipeline->kill_pixel ||
+          wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
+}
+
+void
+genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
+{
+   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+   const struct vk_dynamic_graphics_state *dyn =
+      &cmd_buffer->vk.dynamic_graphics_state;
+
+#if GFX_VER >= 11
+   if (cmd_buffer->device->vk.enabled_extensions.KHR_fragment_shading_rate &&
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR))
+      genX(emit_shading_rate)(&cmd_buffer->batch, pipeline, &dyn->fsr);
+#endif /* GFX_VER >= 11 */
+
+   if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH)) {
+      uint32_t sf_dw[GENX(3DSTATE_SF_length)];
+      struct GENX(3DSTATE_SF) sf = {
+         GENX(3DSTATE_SF_header),
+      };
+#if GFX_VER == 8
+      if (cmd_buffer->device->info->platform == INTEL_PLATFORM_CHV) {
+         sf.CHVLineWidth = dyn->rs.line.width;
+      } else {
+         sf.LineWidth = dyn->rs.line.width;
+      }
+#else
+      sf.LineWidth = dyn->rs.line.width,
+#endif
+      GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
+      anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gfx8.sf);
+   }
+
+   if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CULL_MODE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_FRONT_FACE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_ENABLE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS)) {
+      /* Take dynamic primitive topology in to account with
+       *    3DSTATE_RASTER::APIMode
+       *    3DSTATE_RASTER::DXMultisampleRasterizationEnable
+       *    3DSTATE_RASTER::AntialiasingEnable
+       */
+      uint32_t api_mode = 0;
+      bool msaa_raster_enable = false;
+
+      VkPolygonMode dynamic_raster_mode =
+         genX(raster_polygon_mode)(cmd_buffer->state.gfx.pipeline,
+                                   dyn->ia.primitive_topology);
+
+      genX(rasterization_mode)(dynamic_raster_mode,
+                               pipeline->line_mode, dyn->rs.line.width,
+                               &api_mode, &msaa_raster_enable);
+
+      bool aa_enable = anv_rasterization_aa_mode(dynamic_raster_mode,
+                                                 pipeline->line_mode);
+
+      uint32_t raster_dw[GENX(3DSTATE_RASTER_length)];
+      struct GENX(3DSTATE_RASTER) raster = {
+         GENX(3DSTATE_RASTER_header),
+         .APIMode = api_mode,
+         .DXMultisampleRasterizationEnable = msaa_raster_enable,
+         .AntialiasingEnable = aa_enable,
+         .CullMode     = genX(vk_to_intel_cullmode)[dyn->rs.cull_mode],
+         .FrontWinding = genX(vk_to_intel_front_face)[dyn->rs.front_face],
+         .GlobalDepthOffsetEnableSolid       = dyn->rs.depth_bias.enable,
+         .GlobalDepthOffsetEnableWireframe   = dyn->rs.depth_bias.enable,
+         .GlobalDepthOffsetEnablePoint       = dyn->rs.depth_bias.enable,
+         .GlobalDepthOffsetConstant          = dyn->rs.depth_bias.constant,
+         .GlobalDepthOffsetScale             = dyn->rs.depth_bias.slope,
+         .GlobalDepthOffsetClamp             = dyn->rs.depth_bias.clamp,
+      };
+      GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster);
+      anv_batch_emit_merge(&cmd_buffer->batch, raster_dw,
+                           pipeline->gfx8.raster);
+   }
+
+   /* Stencil reference values moved from COLOR_CALC_STATE in gfx8 to
+    * 3DSTATE_WM_DEPTH_STENCIL in gfx9. That means the dirty bits gets split
+    * across different state packets for gfx8 and gfx9. We handle that by
+    * using a big old #if switch here.
+    */
+#if GFX_VER == 8
+   if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
+      struct anv_state cc_state =
+         anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
+                                            GENX(COLOR_CALC_STATE_length) * 4,
+                                            64);
+      struct GENX(COLOR_CALC_STATE) cc = {
+         .BlendConstantColorRed = dyn->cb.blend_constants[0],
+         .BlendConstantColorGreen = dyn->cb.blend_constants[1],
+         .BlendConstantColorBlue = dyn->cb.blend_constants[2],
+         .BlendConstantColorAlpha = dyn->cb.blend_constants[3],
+         .StencilReferenceValue = dyn->ds.stencil.front.reference & 0xff,
+         .BackfaceStencilReferenceValue = dyn->ds.stencil.back.reference & 0xff,
+      };
+      GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
+
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
+         ccp.ColorCalcStatePointer        = cc_state.offset;
+         ccp.ColorCalcStatePointerValid   = true;
+      }
+   }
+
+   if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
+                                       ANV_CMD_DIRTY_RENDER_TARGETS)) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK)) {
+      VkImageAspectFlags ds_aspects = 0;
+      if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED)
+         ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
+      if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED)
+         ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
+
+      struct vk_depth_stencil_state opt_ds = dyn->ds;
+      vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
+
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) {
+         ds.DoubleSidedStencilEnable = true;
+
+         ds.StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff;
+         ds.StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff;
+
+         ds.BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff;
+         ds.BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff;
+
+         ds.DepthTestEnable = opt_ds.depth.test_enable;
+         ds.DepthBufferWriteEnable = opt_ds.depth.write_enable;
+         ds.DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op];
+         ds.StencilTestEnable = opt_ds.stencil.test_enable;
+         ds.StencilBufferWriteEnable = opt_ds.stencil.write_enable;
+         ds.StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail];
+         ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass];
+         ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail];
+         ds.StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare];
+         ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail];
+         ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass];
+         ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail];
+         ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare];
+      }
+
+      const bool pma = want_depth_pma_fix(cmd_buffer, &opt_ds);
+      genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma);
+   }
+#else
+   if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
+      struct anv_state cc_state =
+         anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
+                                            GENX(COLOR_CALC_STATE_length) * 4,
+                                            64);
+      struct GENX(COLOR_CALC_STATE) cc = {
+         .BlendConstantColorRed = dyn->cb.blend_constants[0],
+         .BlendConstantColorGreen = dyn->cb.blend_constants[1],
+         .BlendConstantColorBlue = dyn->cb.blend_constants[2],
+         .BlendConstantColorAlpha = dyn->cb.blend_constants[3],
+      };
+      GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
+
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
+         ccp.ColorCalcStatePointer = cc_state.offset;
+         ccp.ColorCalcStatePointerValid = true;
+      }
+   }
+
+   if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
+                                       ANV_CMD_DIRTY_RENDER_TARGETS)) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE)) {
+      VkImageAspectFlags ds_aspects = 0;
+      if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED)
+         ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
+      if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED)
+         ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
+
+      struct vk_depth_stencil_state opt_ds = dyn->ds;
+      vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
+
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) {
+         ds.DoubleSidedStencilEnable = true;
+
+         ds.StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff;
+         ds.StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff;
+
+         ds.BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff;
+         ds.BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff;
+
+         ds.StencilReferenceValue = opt_ds.stencil.front.reference & 0xff;
+         ds.BackfaceStencilReferenceValue = opt_ds.stencil.back.reference & 0xff;
+
+         ds.DepthTestEnable = opt_ds.depth.test_enable;
+         ds.DepthBufferWriteEnable = opt_ds.depth.write_enable;
+         ds.DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op];
+         ds.StencilTestEnable = opt_ds.stencil.test_enable;
+         ds.StencilBufferWriteEnable = opt_ds.stencil.write_enable;
+         ds.StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail];
+         ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass];
+         ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail];
+         ds.StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare];
+         ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail];
+         ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass];
+         ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail];
+         ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare];
+      }
+
+      const bool pma = want_stencil_pma_fix(cmd_buffer, &opt_ds);
+      genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma);
+   }
+#endif
+
+#if GFX_VER >= 12
+   if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_BOUNDS)) {
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
+         db.DepthBoundsTestEnable = dyn->ds.depth.bounds_test.enable;
+         db.DepthBoundsTestMinValue = dyn->ds.depth.bounds_test.min;
+         db.DepthBoundsTestMaxValue = dyn->ds.depth.bounds_test.max;
+      }
+   }
+#endif
+
+   if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE)) {
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_LINE_STIPPLE), ls) {
+         ls.LineStipplePattern = dyn->rs.line.stipple.pattern;
+         ls.LineStippleInverseRepeatCount =
+            1.0f / MAX2(1, dyn->rs.line.stipple.factor);
+         ls.LineStippleRepeatCount = dyn->rs.line.stipple.factor;
+      }
+   }
+
+   if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
+                                       ANV_CMD_DIRTY_INDEX_BUFFER)) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) {
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) {
+#if GFX_VERx10 >= 125
+         vf.GeometryDistributionEnable = true;
+#endif
+         vf.IndexedDrawCutIndexEnable  = dyn->ia.primitive_restart_enable;
+         vf.CutIndex                   = cmd_buffer->state.gfx.restart_index;
+      }
+   }
+
+   if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_INDEX_BUFFER) {
+      struct anv_buffer *buffer = cmd_buffer->state.gfx.index_buffer;
+      uint32_t offset = cmd_buffer->state.gfx.index_offset;
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
+         ib.IndexFormat           = cmd_buffer->state.gfx.index_type;
+         ib.MOCS                  = anv_mocs(cmd_buffer->device,
+                                             buffer->address.bo,
+                                             ISL_SURF_USAGE_INDEX_BUFFER_BIT);
+#if GFX_VER >= 12
+         ib.L3BypassDisable       = true;
+#endif
+         ib.BufferStartingAddress = anv_address_add(buffer->address, offset);
+         ib.BufferSize            = vk_buffer_range(&buffer->vk, offset,
+                                                    VK_WHOLE_SIZE);
+      }
+   }
+
+#if GFX_VERx10 >= 125
+   if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) {
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VFG), vfg) {
+         /* If 3DSTATE_TE: TE Enable == 1 then RR_STRICT else RR_FREE*/
+         vfg.DistributionMode =
+            anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL) ? RR_STRICT :
+                                                                      RR_FREE;
+         vfg.DistributionGranularity = BatchLevelGranularity;
+         /* Wa_14014890652 */
+         if (intel_device_info_is_dg2(cmd_buffer->device->info))
+            vfg.GranularityThresholdDisable = 1;
+         vfg.ListCutIndexEnable = dyn->ia.primitive_restart_enable;
+         /* 192 vertices for TRILIST_ADJ */
+         vfg.ListNBatchSizeScale = 0;
+         /* Batch size of 384 vertices */
+         vfg.List3BatchSizeScale = 2;
+         /* Batch size of 128 vertices */
+         vfg.List2BatchSizeScale = 1;
+         /* Batch size of 128 vertices */
+         vfg.List1BatchSizeScale = 2;
+         /* Batch size of 256 vertices for STRIP topologies */
+         vfg.StripBatchSizeScale = 3;
+         /* 192 control points for PATCHLIST_3 */
+         vfg.PatchBatchSizeScale = 1;
+         /* 192 control points for PATCHLIST_3 */
+         vfg.PatchBatchSizeMultiplier = 31;
+      }
+   }
+#endif
+
+   if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations &&
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS))
+      genX(emit_sample_pattern)(&cmd_buffer->batch, dyn->ms.sample_locations);
+
+   if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
+      /* 3DSTATE_WM in the hope we can avoid spawning fragment shaders
+       * threads.
+       */
+      uint32_t wm_dwords[GENX(3DSTATE_WM_length)];
+      struct GENX(3DSTATE_WM) wm = {
+         GENX(3DSTATE_WM_header),
+
+         .ForceThreadDispatchEnable = anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) &&
+                                      (pipeline->force_fragment_thread_dispatch ||
+                                       anv_cmd_buffer_all_color_write_masked(cmd_buffer)) ?
+                                      ForceON : 0,
+      };
+      GENX(3DSTATE_WM_pack)(NULL, wm_dwords, &wm);
+
+      anv_batch_emit_merge(&cmd_buffer->batch, wm_dwords, pipeline->gfx8.wm);
+   }
+
+   if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
+      const uint8_t color_writes = dyn->cb.color_write_enables;
+      const struct anv_cmd_graphics_state *state = &cmd_buffer->state.gfx;
+      bool has_writeable_rt =
+         anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) &&
+         (color_writes & ((1u << state->color_att_count) - 1)) != 0;
+
+      /* 3DSTATE_PS_BLEND to be consistent with the rest of the
+       * BLEND_STATE_ENTRY.
+       */
+      uint32_t ps_blend_dwords[GENX(3DSTATE_PS_BLEND_length)];
+      struct GENX(3DSTATE_PS_BLEND) ps_blend = {
+         GENX(3DSTATE_PS_BLEND_header),
+         .HasWriteableRT = has_writeable_rt,
+      };
+      GENX(3DSTATE_PS_BLEND_pack)(NULL, ps_blend_dwords, &ps_blend);
+      anv_batch_emit_merge(&cmd_buffer->batch, ps_blend_dwords,
+                           pipeline->gfx8.ps_blend);
+
+      uint32_t blend_dws[GENX(BLEND_STATE_length) +
+                         MAX_RTS * GENX(BLEND_STATE_ENTRY_length)];
+      uint32_t *dws = blend_dws;
+      memset(blend_dws, 0, sizeof(blend_dws));
+
+      /* Skip this part */
+      dws += GENX(BLEND_STATE_length);
+
+      for (uint32_t i = 0; i < MAX_RTS; i++) {
+         /* Disable anything above the current number of color attachments. */
+         bool write_disabled = i >= cmd_buffer->state.gfx.color_att_count ||
+                               (color_writes & BITFIELD_BIT(i)) == 0;
+         struct GENX(BLEND_STATE_ENTRY) entry = {
+            .WriteDisableAlpha = write_disabled ||
+                                 (pipeline->color_comp_writes[i] &
+                                  VK_COLOR_COMPONENT_A_BIT) == 0,
+            .WriteDisableRed   = write_disabled ||
+                                 (pipeline->color_comp_writes[i] &
+                                  VK_COLOR_COMPONENT_R_BIT) == 0,
+            .WriteDisableGreen = write_disabled ||
+                                 (pipeline->color_comp_writes[i] &
+                                  VK_COLOR_COMPONENT_G_BIT) == 0,
+            .WriteDisableBlue  = write_disabled ||
+                                 (pipeline->color_comp_writes[i] &
+                                  VK_COLOR_COMPONENT_B_BIT) == 0,
+            .LogicOpFunction   = genX(vk_to_intel_logic_op)[dyn->cb.logic_op],
+         };
+         GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry);
+         dws += GENX(BLEND_STATE_ENTRY_length);
+      }
+
+      uint32_t num_dwords = GENX(BLEND_STATE_length) +
+         GENX(BLEND_STATE_ENTRY_length) * MAX_RTS;
+
+      struct anv_state blend_states =
+         anv_cmd_buffer_merge_dynamic(cmd_buffer, blend_dws,
+                                      pipeline->gfx8.blend_state, num_dwords, 64);
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
+         bsp.BlendStatePointer      = blend_states.offset;
+         bsp.BlendStatePointerValid = true;
+      }
+   }
+
+   /* When we're done, there is no more dirty gfx state. */
+   vk_dynamic_graphics_state_clear_dirty(&cmd_buffer->vk.dynamic_graphics_state);
+   cmd_buffer->state.gfx.dirty = 0;
+}
--- a/src/intel/vulkan_hasvk/meson.build
+++ b/src/intel/vulkan_hasvk/meson.build
@ -0,0 +1,265 @@
+# Copyright © 2017-2019 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+anv_hasvk_entrypoints = custom_target(
+  'anv_hasvk_entrypoints',
+  input : [vk_entrypoints_gen, vk_api_xml],
+  output : ['anv_entrypoints.h', 'anv_entrypoints.c'],
+  command : [
+    prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak',
+    '--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'anv',
+    '--device-prefix', 'gfx7', '--device-prefix', 'gfx75',
+    '--device-prefix', 'gfx8', '--device-prefix', 'gfx9',
+    '--device-prefix', 'gfx11', '--device-prefix', 'gfx12',
+    '--device-prefix', 'gfx125',
+  ],
+  depend_files : vk_entrypoints_gen_depend_files,
+)
+
+intel_hasvk_icd = custom_target(
+  'intel_hasvk_icd',
+  input : [vk_icd_gen, vk_api_xml],
+  output : 'intel_hasvk_icd.@0@.json'.format(host_machine.cpu()),
+  command : [
+    prog_python, '@INPUT0@',
+    '--api-version', '1.3', '--xml', '@INPUT1@',
+    '--lib-path', join_paths(get_option('prefix'), get_option('libdir'),
+                             'libvulkan_intel_hasvk.so'),
+    '--out', '@OUTPUT@',
+  ],
+  build_by_default : true,
+  install_dir : with_vulkan_icd_dir,
+  install : true,
+)
+
+if meson.version().version_compare('>= 0.58')
+  _dev_icdname = 'intel_hasvk_devenv_icd.@0@.json'.format(host_machine.cpu())
+  custom_target(
+    'intel_hasvk_devenv_icd',
+    input : [vk_icd_gen, vk_api_xml],
+    output : _dev_icdname,
+    command : [
+      prog_python, '@INPUT0@',
+      '--api-version', '1.3', '--xml', '@INPUT1@',
+      '--lib-path', meson.current_build_dir() / 'libvulkan_intel_hasvk.so',
+      '--out', '@OUTPUT@',
+    ],
+    build_by_default : true,
+  )
+
+  devenv.append('VK_ICD_FILENAMES', meson.current_build_dir() / _dev_icdname)
+endif
+
+libanv_per_hw_ver_libs = []
+anv_per_hw_ver_files = files(
+  'genX_blorp_exec.c',
+  'genX_cmd_buffer.c',
+  'genX_gpu_memcpy.c',
+  'genX_pipeline.c',
+  'genX_query.c',
+  'genX_state.c',
+)
+foreach g : [['70', ['gfx7_cmd_buffer.c']], ['75', ['gfx7_cmd_buffer.c']],
+             ['80', ['gfx8_cmd_buffer.c']], ['90', ['gfx8_cmd_buffer.c']],
+             ['110', ['gfx8_cmd_buffer.c']], ['120', ['gfx8_cmd_buffer.c']],
+             ['125', ['gfx8_cmd_buffer.c']]]
+  _gfx_ver = g[0]
+  libanv_per_hw_ver_libs += static_library(
+    'anv_per_hw_ver@0@'.format(_gfx_ver),
+    [anv_per_hw_ver_files, g[1], anv_hasvk_entrypoints[0]],
+    include_directories : [
+      inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_compiler, inc_intel,
+    ],
+    c_args : [
+      no_override_init_args, c_sse2_args,
+      '-DGFX_VERx10=@0@'.format(_gfx_ver),
+    ],
+    gnu_symbol_visibility : 'hidden',
+    dependencies : [
+      dep_libdrm, dep_valgrind, idep_nir_headers, idep_genxml,
+      idep_vulkan_util_headers, idep_vulkan_wsi_headers,
+      idep_vulkan_runtime_headers, idep_intel_driver_ds_headers,
+    ],
+  )
+endforeach
+
+libanv_files = files(
+  'anv_acceleration_structure.c',
+  'anv_allocator.c',
+  'anv_android.h',
+  'anv_batch_chain.c',
+  'anv_blorp.c',
+  'anv_bo_sync.c',
+  'anv_cmd_buffer.c',
+  'anv_descriptor_set.c',
+  'anv_device.c',
+  'anv_formats.c',
+  'anv_genX.h',
+  'anv_image.c',
+  'anv_measure.c',
+  'anv_measure.h',
+  'anv_nir.h',
+  'anv_nir_add_base_work_group_id.c',
+  'anv_nir_apply_pipeline_layout.c',
+  'anv_nir_compute_push_layout.c',
+  'anv_nir_lower_multiview.c',
+  'anv_nir_lower_ubo_loads.c',
+  'anv_nir_lower_ycbcr_textures.c',
+  'anv_perf.c',
+  'anv_pipeline.c',
+  'anv_pipeline_cache.c',
+  'anv_private.h',
+  'anv_queue.c',
+  'anv_util.c',
+  'anv_utrace.c',
+  'anv_wsi.c',
+)
+
+anv_deps = [
+  dep_libdrm,
+  dep_valgrind,
+  idep_genxml,
+  idep_nir_headers,
+  idep_vulkan_util_headers,
+  idep_vulkan_runtime_headers,
+  idep_vulkan_wsi_headers,
+]
+anv_flags = [
+  no_override_init_args,
+  c_sse2_args,
+]
+
+anv_cpp_flags = []
+
+if with_platform_x11
+  anv_deps += dep_xcb_dri3
+endif
+
+if with_platform_wayland
+  anv_deps += dep_wayland_client
+endif
+
+if with_xlib_lease
+  anv_deps += [dep_xlib_xrandr]
+endif
+
+if with_platform_android
+  libanv_files += files('anv_android.c')
+else
+  libanv_files += files('anv_android_stubs.c')
+endif
+
+anv_deps += idep_intel_driver_ds_headers
+
+libanv_hasvk_common = static_library(
+  'anv_hasvk_common',
+  [
+    libanv_files, anv_hasvk_entrypoints, sha1_h,
+    gen_xml_pack,
+  ],
+  include_directories : [
+    inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
+    inc_util,
+  ],
+  c_args : anv_flags,
+  cpp_args : anv_cpp_flags,
+  gnu_symbol_visibility : 'hidden',
+  dependencies : anv_deps,
+)
+
+libvulkan_intel_hasvk = shared_library(
+  'vulkan_intel_hasvk',
+  [files('anv_gem.c'), anv_hasvk_entrypoints[0]],
+  include_directories : [
+    inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
+  ],
+  link_whole : [libanv_hasvk_common, libanv_per_hw_ver_libs],
+  link_with : [
+    libintel_compiler, libintel_dev, libisl, libblorp, libintel_perf,
+  ],
+  dependencies : [
+    dep_thread, dep_dl, dep_m, anv_deps, idep_libintel_common,
+    idep_nir, idep_genxml, idep_vulkan_util, idep_vulkan_wsi,
+    idep_vulkan_runtime, idep_mesautil, idep_xmlconfig,
+    idep_intel_driver_ds,
+  ],
+  c_args : anv_flags,
+  gnu_symbol_visibility : 'hidden',
+  link_args : [ld_args_build_id, ld_args_bsymbolic, ld_args_gc_sections],
+  install : true,
+)
+
+if with_symbols_check
+  test(
+    'anv symbols check',
+    symbols_check,
+    args : [
+      '--lib', libvulkan_intel_hasvk,
+      '--symbols-file', vulkan_icd_symbols,
+      symbols_check_args,
+    ],
+    suite : ['intel'],
+  )
+endif
+
+if with_tests
+  libvulkan_intel_hasvk_test = static_library(
+    'vulkan_intel_hasvk_test',
+    [files('anv_gem_stubs.c'), anv_hasvk_entrypoints[0]],
+    include_directories : [
+      inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
+    ],
+    link_whole : libanv_hasvk_common,
+    link_with : [
+      libanv_per_hw_ver_libs, libintel_compiler, libintel_common, libintel_dev,
+      libisl, libblorp, libintel_perf,
+    ],
+    dependencies : [
+      dep_thread, dep_dl, dep_m, anv_deps,
+      idep_nir, idep_vulkan_util, idep_vulkan_wsi, idep_vulkan_runtime,
+      idep_mesautil,
+    ],
+    c_args : anv_flags,
+    gnu_symbol_visibility : 'hidden',
+  )
+
+  foreach t : ['block_pool_no_free', 'block_pool_grow_first',
+               'state_pool_no_free', 'state_pool_free_list_only',
+               'state_pool', 'state_pool_padding']
+    test(
+      'anv_hasvk_@0@'.format(t),
+      executable(
+        t,
+        ['tests/@0@.c'.format(t), anv_hasvk_entrypoints[0]],
+        c_args : [ c_sse2_args ],
+        link_with : libvulkan_intel_hasvk_test,
+        dependencies : [
+          dep_libdrm, dep_thread, dep_m, dep_valgrind,
+          idep_vulkan_util, idep_vulkan_wsi_headers,
+          idep_vulkan_runtime, idep_intel_driver_ds,
+        ],
+        include_directories : [
+          inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
+        ],
+      ),
+      suite : ['intel'],
+    )
+  endforeach
+endif
--- a/src/intel/vulkan_hasvk/tests/block_pool_grow_first.c
+++ b/src/intel/vulkan_hasvk/tests/block_pool_grow_first.c
@ -0,0 +1,67 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_private.h"
+#include "test_common.h"
+
+int main(void)
+{
+   struct anv_physical_device physical_device = {
+      .use_softpin = true,
+   };
+   struct anv_device device = {};
+   struct anv_block_pool pool;
+
+   /* Create a pool with initial size smaller than the block allocated, so
+    * that it must grow in the first allocation.
+    */
+   const uint32_t block_size = 16 * 1024;
+   const uint32_t initial_size = block_size / 2;
+
+   anv_device_set_physical(&device, &physical_device);
+   pthread_mutex_init(&device.mutex, NULL);
+   anv_bo_cache_init(&device.bo_cache, &device);
+   anv_block_pool_init(&pool, &device, "test", 4096, initial_size);
+   ASSERT(pool.size == initial_size);
+
+   uint32_t padding;
+   int32_t offset = anv_block_pool_alloc(&pool, block_size, &padding);
+
+   /* Pool will have grown at least space to fit the new allocation. */
+   ASSERT(pool.size > initial_size);
+   ASSERT(pool.size >= initial_size + block_size);
+
+   /* The whole initial size is considered padding and the allocation should be
+    * right next to it.
+    */
+   ASSERT(padding == initial_size);
+   ASSERT(offset == initial_size);
+
+   /* Use the memory to ensure it is valid. */
+   void *map = anv_block_pool_map(&pool, offset, block_size);
+   memset(map, 22, block_size);
+
+   anv_block_pool_finish(&pool);
+   anv_bo_cache_finish(&device.bo_cache);
+   pthread_mutex_destroy(&device.mutex);
+}
--- a/src/intel/vulkan_hasvk/tests/block_pool_no_free.c
+++ b/src/intel/vulkan_hasvk/tests/block_pool_no_free.c
@ -0,0 +1,153 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <pthread.h>
+
+#include "anv_private.h"
+#include "test_common.h"
+
+#define NUM_THREADS 16
+#define BLOCKS_PER_THREAD 1024
+#define NUM_RUNS 64
+
+struct job {
+   pthread_t thread;
+   unsigned id;
+   struct anv_block_pool *pool;
+   int32_t blocks[BLOCKS_PER_THREAD];
+   int32_t back_blocks[BLOCKS_PER_THREAD];
+} jobs[NUM_THREADS];
+
+
+static void *alloc_blocks(void *_job)
+{
+   struct job *job = _job;
+   uint32_t job_id = job - jobs;
+   uint32_t block_size = 16 * ((job_id % 4) + 1);
+   int32_t block, *data;
+
+   for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) {
+      block = anv_block_pool_alloc(job->pool, block_size, NULL);
+      data = anv_block_pool_map(job->pool, block, block_size);
+      *data = block;
+      ASSERT(block >= 0);
+      job->blocks[i] = block;
+
+      block = anv_block_pool_alloc_back(job->pool, block_size);
+      data = anv_block_pool_map(job->pool, block, block_size);
+      *data = block;
+      ASSERT(block < 0);
+      job->back_blocks[i] = -block;
+   }
+
+   for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) {
+      block = job->blocks[i];
+      data = anv_block_pool_map(job->pool, block, block_size);
+      ASSERT(*data == block);
+
+      block = -job->back_blocks[i];
+      data = anv_block_pool_map(job->pool, block, block_size);
+      ASSERT(*data == block);
+   }
+
+   return NULL;
+}
+
+static void validate_monotonic(int32_t **blocks)
+{
+   /* A list of indices, one per thread */
+   unsigned next[NUM_THREADS];
+   memset(next, 0, sizeof(next));
+
+   int highest = -1;
+   while (true) {
+      /* First, we find which thread has the lowest next element */
+      int32_t thread_min = INT32_MAX;
+      int min_thread_idx = -1;
+      for (unsigned i = 0; i < NUM_THREADS; i++) {
+         if (next[i] >= BLOCKS_PER_THREAD)
+            continue;
+
+         if (thread_min > blocks[i][next[i]]) {
+            thread_min = blocks[i][next[i]];
+            min_thread_idx = i;
+         }
+      }
+
+      /* The only way this can happen is if all of the next[] values are at
+       * BLOCKS_PER_THREAD, in which case, we're done.
+       */
+      if (thread_min == INT32_MAX)
+         break;
+
+      /* That next element had better be higher than the previous highest */
+      ASSERT(blocks[min_thread_idx][next[min_thread_idx]] > highest);
+
+      highest = blocks[min_thread_idx][next[min_thread_idx]];
+      next[min_thread_idx]++;
+   }
+}
+
+static void run_test()
+{
+   struct anv_physical_device physical_device = {
+      .use_relocations = true,
+   };
+   struct anv_device device = {};
+   struct anv_block_pool pool;
+
+   anv_device_set_physical(&device, &physical_device);
+   pthread_mutex_init(&device.mutex, NULL);
+   anv_bo_cache_init(&device.bo_cache, &device);
+   anv_block_pool_init(&pool, &device, "test", 4096, 4096);
+
+   for (unsigned i = 0; i < NUM_THREADS; i++) {
+      jobs[i].pool = &pool;
+      jobs[i].id = i;
+      pthread_create(&jobs[i].thread, NULL, alloc_blocks, &jobs[i]);
+   }
+
+   for (unsigned i = 0; i < NUM_THREADS; i++)
+      pthread_join(jobs[i].thread, NULL);
+
+   /* Validate that the block allocations were monotonic */
+   int32_t *block_ptrs[NUM_THREADS];
+   for (unsigned i = 0; i < NUM_THREADS; i++)
+      block_ptrs[i] = jobs[i].blocks;
+   validate_monotonic(block_ptrs);
+
+   /* Validate that the back block allocations were monotonic */
+   for (unsigned i = 0; i < NUM_THREADS; i++)
+      block_ptrs[i] = jobs[i].back_blocks;
+   validate_monotonic(block_ptrs);
+
+   anv_block_pool_finish(&pool);
+   anv_bo_cache_finish(&device.bo_cache);
+   pthread_mutex_destroy(&device.mutex);
+}
+
+int main(void)
+{
+   for (unsigned i = 0; i < NUM_RUNS; i++)
+      run_test();
+}
--- a/src/intel/vulkan_hasvk/tests/state_pool.c
+++ b/src/intel/vulkan_hasvk/tests/state_pool.c
@ -0,0 +1,59 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <pthread.h>
+
+#include "anv_private.h"
+#include "test_common.h"
+
+#define NUM_THREADS 8
+#define STATES_PER_THREAD_LOG2 10
+#define STATES_PER_THREAD (1 << STATES_PER_THREAD_LOG2)
+#define NUM_RUNS 64
+
+#include "state_pool_test_helper.h"
+
+int main(void)
+{
+   struct anv_physical_device physical_device = { };
+   struct anv_device device = {};
+   struct anv_state_pool state_pool;
+
+   anv_device_set_physical(&device, &physical_device);
+   pthread_mutex_init(&device.mutex, NULL);
+   anv_bo_cache_init(&device.bo_cache, &device);
+
+   for (unsigned i = 0; i < NUM_RUNS; i++) {
+      anv_state_pool_init(&state_pool, &device, "test", 4096, 0, 256);
+
+      /* Grab one so a zero offset is impossible */
+      anv_state_pool_alloc(&state_pool, 16, 16);
+
+      run_state_pool_test(&state_pool);
+
+      anv_state_pool_finish(&state_pool);
+   }
+
+   anv_bo_cache_finish(&device.bo_cache);
+   pthread_mutex_destroy(&device.mutex);
+}
--- a/src/intel/vulkan_hasvk/tests/state_pool_free_list_only.c
+++ b/src/intel/vulkan_hasvk/tests/state_pool_free_list_only.c
@ -0,0 +1,68 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <pthread.h>
+
+#include "anv_private.h"
+#include "test_common.h"
+
+#define NUM_THREADS 8
+#define STATES_PER_THREAD_LOG2 12
+#define STATES_PER_THREAD (1 << STATES_PER_THREAD_LOG2)
+
+#include "state_pool_test_helper.h"
+
+int main(void)
+{
+   struct anv_physical_device physical_device = { };
+   struct anv_device device = {};
+   struct anv_state_pool state_pool;
+
+   anv_device_set_physical(&device, &physical_device);
+   pthread_mutex_init(&device.mutex, NULL);
+   anv_bo_cache_init(&device.bo_cache, &device);
+   anv_state_pool_init(&state_pool, &device, "test", 4096, 0, 4096);
+
+   /* Grab one so a zero offset is impossible */
+   anv_state_pool_alloc(&state_pool, 16, 16);
+
+   /* Grab and return enough states that the state pool test below won't
+    * actually ever resize anything.
+    */
+   {
+      struct anv_state states[NUM_THREADS * STATES_PER_THREAD];
+      for (unsigned i = 0; i < NUM_THREADS * STATES_PER_THREAD; i++) {
+         states[i] = anv_state_pool_alloc(&state_pool, 16, 16);
+         ASSERT(states[i].offset != 0);
+      }
+
+      for (unsigned i = 0; i < NUM_THREADS * STATES_PER_THREAD; i++)
+         anv_state_pool_free(&state_pool, states[i]);
+   }
+
+   run_state_pool_test(&state_pool);
+
+   anv_state_pool_finish(&state_pool);
+   anv_bo_cache_finish(&device.bo_cache);
+   pthread_mutex_destroy(&device.mutex);
+}
--- a/src/intel/vulkan_hasvk/tests/state_pool_no_free.c
+++ b/src/intel/vulkan_hasvk/tests/state_pool_no_free.c
@ -0,0 +1,119 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <pthread.h>
+
+#include "anv_private.h"
+#include "test_common.h"
+
+#define NUM_THREADS 16
+#define STATES_PER_THREAD 1024
+#define NUM_RUNS 64
+
+struct job {
+   pthread_t thread;
+   unsigned id;
+   struct anv_state_pool *pool;
+   uint32_t offsets[STATES_PER_THREAD];
+} jobs[NUM_THREADS];
+
+pthread_barrier_t barrier;
+
+static void *alloc_states(void *_job)
+{
+   struct job *job = _job;
+
+   pthread_barrier_wait(&barrier);
+
+   for (unsigned i = 0; i < STATES_PER_THREAD; i++) {
+      struct anv_state state = anv_state_pool_alloc(job->pool, 16, 16);
+      job->offsets[i] = state.offset;
+   }
+
+   return NULL;
+}
+
+static void run_test()
+{
+   struct anv_physical_device physical_device = { };
+   struct anv_device device = {};
+   struct anv_state_pool state_pool;
+
+   anv_device_set_physical(&device, &physical_device);
+   pthread_mutex_init(&device.mutex, NULL);
+   anv_bo_cache_init(&device.bo_cache, &device);
+   anv_state_pool_init(&state_pool, &device, "test", 4096, 0, 64);
+
+   pthread_barrier_init(&barrier, NULL, NUM_THREADS);
+
+   for (unsigned i = 0; i < NUM_THREADS; i++) {
+      jobs[i].pool = &state_pool;
+      jobs[i].id = i;
+      pthread_create(&jobs[i].thread, NULL, alloc_states, &jobs[i]);
+   }
+
+   for (unsigned i = 0; i < NUM_THREADS; i++)
+      pthread_join(jobs[i].thread, NULL);
+
+   /* A list of indices, one per thread */
+   unsigned next[NUM_THREADS];
+   memset(next, 0, sizeof(next));
+
+   int highest = -1;
+   while (true) {
+      /* First, we find which thread has the highest next element */
+      int thread_max = -1;
+      int max_thread_idx = -1;
+      for (unsigned i = 0; i < NUM_THREADS; i++) {
+         if (next[i] >= STATES_PER_THREAD)
+            continue;
+
+         if (thread_max < jobs[i].offsets[next[i]]) {
+            thread_max = jobs[i].offsets[next[i]];
+            max_thread_idx = i;
+         }
+      }
+
+      /* The only way this can happen is if all of the next[] values are at
+       * BLOCKS_PER_THREAD, in which case, we're done.
+       */
+      if (thread_max == -1)
+         break;
+
+      /* That next element had better be higher than the previous highest */
+      ASSERT(jobs[max_thread_idx].offsets[next[max_thread_idx]] > highest);
+
+      highest = jobs[max_thread_idx].offsets[next[max_thread_idx]];
+      next[max_thread_idx]++;
+   }
+
+   anv_state_pool_finish(&state_pool);
+   anv_bo_cache_finish(&device.bo_cache);
+   pthread_mutex_destroy(&device.mutex);
+}
+
+int main(void)
+{
+   for (unsigned i = 0; i < NUM_RUNS; i++)
+      run_test();
+}
--- a/src/intel/vulkan_hasvk/tests/state_pool_padding.c
+++ b/src/intel/vulkan_hasvk/tests/state_pool_padding.c
@ -0,0 +1,79 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_private.h"
+#include "test_common.h"
+
+int main(void)
+{
+   struct anv_physical_device physical_device = {
+      .use_softpin = true,
+   };
+   struct anv_device device = {};
+   struct anv_state_pool state_pool;
+
+   anv_device_set_physical(&device, &physical_device);
+   pthread_mutex_init(&device.mutex, NULL);
+   anv_bo_cache_init(&device.bo_cache, &device);
+   anv_state_pool_init(&state_pool, &device, "test", 4096, 0, 4096);
+
+   /* Get the size of the underlying block_pool */
+   struct anv_block_pool *bp = &state_pool.block_pool;
+   uint64_t pool_size = bp->size;
+
+   /* Grab one so the pool has some initial usage */
+   anv_state_pool_alloc(&state_pool, 16, 16);
+
+   /* Grab a state that is the size of the initial allocation */
+   struct anv_state state = anv_state_pool_alloc(&state_pool, pool_size, 16);
+
+   /* The pool must have grown */
+   ASSERT(bp->size > pool_size);
+
+   /* And the state must have been allocated at the end of the original size  */
+   ASSERT(state.offset == pool_size);
+
+   /* A new allocation that fits into the returned empty space should have an
+    * offset within the original pool size
+    */
+   state = anv_state_pool_alloc(&state_pool, 4096, 16);
+   ASSERT(state.offset + state.alloc_size <= pool_size);
+
+   /* We should be able to allocate pool->block_size'd chunks in the returned area
+    */
+   int left_chunks = pool_size / 4096 - 2;
+   for (int i = 0; i < left_chunks; i++) {
+      state = anv_state_pool_alloc(&state_pool, 4096, 16);
+      ASSERT(state.offset + state.alloc_size <= pool_size);
+   }
+
+   /* Now the next chunk to be allocated should make the pool grow again */
+   pool_size = bp->size;
+   state = anv_state_pool_alloc(&state_pool, 4096, 16);
+   ASSERT(bp->size > pool_size);
+   ASSERT(state.offset == pool_size);
+
+   anv_state_pool_finish(&state_pool);
+   anv_bo_cache_finish(&device.bo_cache);
+   pthread_mutex_destroy(&device.mutex);
+}
--- a/src/intel/vulkan_hasvk/tests/state_pool_test_helper.h
+++ b/src/intel/vulkan_hasvk/tests/state_pool_test_helper.h
@ -0,0 +1,71 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <pthread.h>
+
+struct job {
+   struct anv_state_pool *pool;
+   unsigned id;
+   pthread_t thread;
+} jobs[NUM_THREADS];
+
+pthread_barrier_t barrier;
+
+static void *alloc_states(void *void_job)
+{
+   struct job *job = void_job;
+
+   const unsigned chunk_size = 1 << (job->id % STATES_PER_THREAD_LOG2);
+   const unsigned num_chunks = STATES_PER_THREAD / chunk_size;
+
+   struct anv_state states[chunk_size];
+
+   pthread_barrier_wait(&barrier);
+
+   for (unsigned c = 0; c < num_chunks; c++) {
+      for (unsigned i = 0; i < chunk_size; i++) {
+         states[i] = anv_state_pool_alloc(job->pool, 16, 16);
+         memset(states[i].map, 139, 16);
+         ASSERT(states[i].offset != 0);
+      }
+
+      for (unsigned i = 0; i < chunk_size; i++)
+         anv_state_pool_free(job->pool, states[i]);
+   }
+
+   return NULL;
+}
+
+static void run_state_pool_test(struct anv_state_pool *state_pool)
+{
+   pthread_barrier_init(&barrier, NULL, NUM_THREADS);
+
+   for (unsigned i = 0; i < NUM_THREADS; i++) {
+      jobs[i].pool = state_pool;
+      jobs[i].id = i;
+      pthread_create(&jobs[i].thread, NULL, alloc_states, &jobs[i]);
+   }
+
+   for (unsigned i = 0; i < NUM_THREADS; i++)
+      pthread_join(jobs[i].thread, NULL);
+}
--- a/src/intel/vulkan_hasvk/tests/test_common.h
+++ b/src/intel/vulkan_hasvk/tests/test_common.h
@ -0,0 +1,34 @@
+/*
+ * Copyright © 2020 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define ASSERT(cond)                                                    \
+   do {                                                                 \
+      if (!(cond)) {                                                    \
+         fprintf(stderr, "%s:%d: Test assertion `%s` failed.\n",        \
+                 __FILE__, __LINE__, # cond);                           \
+         abort();                                                       \
+      }                                                                 \
+   } while (false)