radeonsi: fix incorrect comments in culling code and NIR lowering

The lowering code removes the "VS inputs" item from the list because the hw doesn't support indirect indexing of VS inputs. Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10261>
2024-12-03 07:04:02 +08:00 · 2021-04-11 21:57:10 -04:00 · 2021-04-11 21:57:10 -04:00 · a0771e6b27
commit a0771e6b27
parent e0ffd1f928
2 changed files with 6 additions and 10 deletions
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@ -844,9 +844,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out
    * Part 2: Compact ES threads in GS threads:
    * - Compute the prefix sum for all 3 vertices from the masks. These are the new
    *   thread IDs for each vertex within the primitive.
-    * - Write the value of the old thread ID into the LDS address of the new thread ID.
-    *   The ES thread will load the old thread ID and use it to load the position, VertexID,
-    *   and InstanceID.
+    * - Write input VGPRs and vertex positions into the LDS address of the new thread ID.
    * - Update vertex indices and null flag in the GS input VGPRs.
    * - Barrier
    *
@ -857,7 +855,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out

   LLVMValueRef vtxindex[3];
   if (shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL) {
-      /* For the GS fast launch, the VS prologs simply puts the Vertex IDs
+      /* For the GS fast launch, the VS prolog simply puts the Vertex IDs
       * into these VGPRs.
       */
      vtxindex[0] = ac_get_arg(&ctx->ac, ctx->gs_vtx01_offset);
@ -981,9 +979,9 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out
                           (sel->info.uses_primid || shader->key.mono.u.vs_export_prim_id);

   /* ES threads compute their prefix sum, which is the new ES thread ID.
-    * Then they write the value of the old thread ID into the LDS address
-    * of the new thread ID. It will be used it to load input VGPRs from
-    * the old thread's LDS location.
+    * Then they write the vertex position and input VGPRs into the LDS address
+    * of the new thread ID. It will be used to load input VGPRs by compacted
+    * threads.
    */
   ac_build_ifcc(&ctx->ac, LLVMBuildLoad(builder, es_accepted, ""), 16009);
   {
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@ -712,15 +712,13 @@ static void si_lower_io(struct nir_shader *nir)
 {
   /* HW supports indirect indexing for: | Enabled in driver
    * -------------------------------------------------------
-    * VS inputs                          | No
    * TCS inputs                         | Yes
    * TES inputs                         | Yes
    * GS inputs                          | No
    * -------------------------------------------------------
    * VS outputs before TCS              | No
-    * VS outputs before GS               | No
    * TCS outputs                        | Yes
-    * TES outputs before GS              | No
+    * VS/TES outputs before GS           | No
    */
   bool has_indirect_inputs = nir->info.stage == MESA_SHADER_TESS_CTRL ||
                              nir->info.stage == MESA_SHADER_TESS_EVAL;