aco: Remove VS inputs from visit_load_input.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Acked-by: Konstantin Seurer <konstantin.seurer@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16805>
2024-12-02 22:54:05 +08:00 · 2023-02-02 17:55:06 +01:00 · 2023-02-02 17:55:06 +01:00 · c602092033
commit c602092033
parent 27c8131978
1 changed files with 1 additions and 262 deletions
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@ -313,58 +313,6 @@ as_vgpr(isel_context* ctx, Temp val)
   return as_vgpr(bld, val);
 }

-// assumes a != 0xffffffff
-void
-emit_v_div_u32(isel_context* ctx, Temp dst, Temp a, uint32_t b)
-{
-   assert(b != 0);
-   Builder bld(ctx->program, ctx->block);
-
-   if (util_is_power_of_two_or_zero(b)) {
-      bld.vop2(aco_opcode::v_lshrrev_b32, Definition(dst), Operand::c32(util_logbase2(b)), a);
-      return;
-   }
-
-   util_fast_udiv_info info = util_compute_fast_udiv_info(b, 32, 32);
-
-   assert(info.multiplier <= 0xffffffff);
-
-   bool pre_shift = info.pre_shift != 0;
-   bool increment = info.increment != 0;
-   bool multiply = true;
-   bool post_shift = info.post_shift != 0;
-
-   if (!pre_shift && !increment && !multiply && !post_shift) {
-      bld.copy(Definition(dst), a);
-      return;
-   }
-
-   Temp pre_shift_dst = a;
-   if (pre_shift) {
-      pre_shift_dst = (increment || multiply || post_shift) ? bld.tmp(v1) : dst;
-      bld.vop2(aco_opcode::v_lshrrev_b32, Definition(pre_shift_dst), Operand::c32(info.pre_shift),
-               a);
-   }
-
-   Temp increment_dst = pre_shift_dst;
-   if (increment) {
-      increment_dst = (post_shift || multiply) ? bld.tmp(v1) : dst;
-      bld.vadd32(Definition(increment_dst), Operand::c32(info.increment), pre_shift_dst);
-   }
-
-   Temp multiply_dst = increment_dst;
-   if (multiply) {
-      multiply_dst = post_shift ? bld.tmp(v1) : dst;
-      bld.vop3(aco_opcode::v_mul_hi_u32, Definition(multiply_dst), increment_dst,
-               bld.copy(bld.def(v1), Operand::c32(info.multiplier)));
-   }
-
-   if (post_shift) {
-      bld.vop2(aco_opcode::v_lshrrev_b32, Definition(dst), Operand::c32(info.post_shift),
-               multiply_dst);
-   }
-}
-
 void
 emit_extract_vector(isel_context* ctx, Temp src, uint32_t idx, Temp dst)
 {
@ -5632,216 +5580,7 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
   Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
   nir_src offset = *nir_get_io_offset_src(instr);

-   if (ctx->shader->info.stage == MESA_SHADER_VERTEX) {
-      if (!nir_src_is_const(offset) || nir_src_as_uint(offset))
-         isel_err(offset.ssa->parent_instr,
-                  "Unimplemented non-zero nir_intrinsic_load_input offset");
-
-      Temp vertex_buffers =
-         convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->vertex_buffers));
-
-      unsigned location = nir_intrinsic_base(instr) - VERT_ATTRIB_GENERIC0;
-      unsigned bitsize = instr->dest.ssa.bit_size;
-      unsigned component = nir_intrinsic_component(instr) >> (bitsize == 64 ? 1 : 0);
-      unsigned attrib_binding = ctx->options->key.vs.vertex_attribute_bindings[location];
-      uint32_t attrib_offset = ctx->options->key.vs.vertex_attribute_offsets[location];
-      uint32_t attrib_stride = ctx->options->key.vs.vertex_attribute_strides[location];
-      enum pipe_format attrib_format =
-         (enum pipe_format)ctx->options->key.vs.vertex_attribute_formats[location];
-      unsigned binding_align = ctx->options->key.vs.vertex_binding_align[attrib_binding];
-
-      const struct ac_vtx_format_info* vtx_info =
-         ac_get_vtx_format_info(GFX8, CHIP_POLARIS10, attrib_format);
-
-      unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa) << component;
-      unsigned num_channels = MIN2(util_last_bit(mask), vtx_info->num_channels);
-
-      unsigned desc_index =
-         ctx->program->info.vs.use_per_attribute_vb_descs ? location : attrib_binding;
-      desc_index = util_bitcount(ctx->program->info.vs.vb_desc_usage_mask &
-                                 u_bit_consecutive(0, desc_index));
-      Operand off = bld.copy(bld.def(s1), Operand::c32(desc_index * 16u));
-      Temp list = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), vertex_buffers, off);
-
-      Temp index;
-      if (ctx->options->key.vs.instance_rate_inputs & (1u << location)) {
-         uint32_t divisor = ctx->options->key.vs.instance_rate_divisors[location];
-         Temp start_instance = get_arg(ctx, ctx->args->start_instance);
-         if (divisor) {
-            Temp instance_id = get_arg(ctx, ctx->args->instance_id);
-            if (divisor != 1) {
-               Temp divided = bld.tmp(v1);
-               emit_v_div_u32(ctx, divided, as_vgpr(ctx, instance_id), divisor);
-               index = bld.vadd32(bld.def(v1), start_instance, divided);
-            } else {
-               index = bld.vadd32(bld.def(v1), start_instance, instance_id);
-            }
-         } else {
-            index = bld.copy(bld.def(v1), start_instance);
-         }
-      } else {
-         index = bld.vadd32(bld.def(v1), get_arg(ctx, ctx->args->base_vertex),
-                            get_arg(ctx, ctx->args->vertex_id));
-      }
-
-      Temp* const channels = (Temp*)alloca(num_channels * sizeof(Temp));
-      unsigned channel_start = 0;
-      bool direct_fetch = false;
-
-      /* skip unused channels at the start */
-      if (vtx_info->chan_byte_size) {
-         channel_start = ffs(mask) - 1;
-         for (unsigned i = 0; i < MIN2(channel_start, num_channels); i++)
-            channels[i] = Temp(0, s1);
-      }
-
-      /* load channels */
-      while (channel_start < num_channels) {
-         unsigned fetch_component = num_channels - channel_start;
-         unsigned fetch_offset = attrib_offset + channel_start * vtx_info->chan_byte_size;
-
-         /* use MUBUF when possible to avoid possible alignment issues */
-         /* TODO: we could use SDWA to unpack 8/16-bit attributes without extra instructions */
-         bool use_mubuf = vtx_info->chan_byte_size == 4 && bitsize != 16;
-         unsigned fetch_fmt = V_008F0C_BUF_DATA_FORMAT_INVALID;
-         if (!use_mubuf) {
-            fetch_component = ac_get_safe_fetch_size(ctx->program->gfx_level, vtx_info, fetch_offset,
-                                                     vtx_info->num_channels - channel_start, binding_align,
-                                                     fetch_component);
-            fetch_fmt = vtx_info->hw_format[fetch_component - 1];
-         } else {
-            /* GFX6 only supports loading vec3 with MTBUF, split to vec2,scalar. */
-            if (fetch_component == 3 && ctx->options->gfx_level == GFX6)
-               fetch_component = 2;
-         }
-
-         unsigned fetch_bytes = fetch_component * bitsize / 8;
-
-         Temp fetch_index = index;
-         if (attrib_stride != 0 && fetch_offset > attrib_stride) {
-            fetch_index =
-               bld.vadd32(bld.def(v1), Operand::c32(fetch_offset / attrib_stride), fetch_index);
-            fetch_offset = fetch_offset % attrib_stride;
-         }
-
-         Operand soffset = Operand::zero();
-         if (fetch_offset >= 4096) {
-            soffset = bld.copy(bld.def(s1), Operand::c32(fetch_offset / 4096 * 4096));
-            fetch_offset %= 4096;
-         }
-
-         aco_opcode opcode;
-         switch (fetch_bytes) {
-         case 2:
-            assert(!use_mubuf && bitsize == 16);
-            opcode = aco_opcode::tbuffer_load_format_d16_x;
-            break;
-         case 4:
-            if (bitsize == 16) {
-               assert(!use_mubuf);
-               opcode = aco_opcode::tbuffer_load_format_d16_xy;
-            } else {
-               opcode =
-                  use_mubuf ? aco_opcode::buffer_load_dword : aco_opcode::tbuffer_load_format_x;
-            }
-            break;
-         case 6:
-            assert(!use_mubuf && bitsize == 16);
-            opcode = aco_opcode::tbuffer_load_format_d16_xyz;
-            break;
-         case 8:
-            if (bitsize == 16) {
-               assert(!use_mubuf);
-               opcode = aco_opcode::tbuffer_load_format_d16_xyzw;
-            } else {
-               opcode =
-                  use_mubuf ? aco_opcode::buffer_load_dwordx2 : aco_opcode::tbuffer_load_format_xy;
-            }
-            break;
-         case 12:
-            assert(ctx->options->gfx_level >= GFX7 ||
-                   (!use_mubuf && ctx->options->gfx_level == GFX6));
-            opcode =
-               use_mubuf ? aco_opcode::buffer_load_dwordx3 : aco_opcode::tbuffer_load_format_xyz;
-            break;
-         case 16:
-            opcode =
-               use_mubuf ? aco_opcode::buffer_load_dwordx4 : aco_opcode::tbuffer_load_format_xyzw;
-            break;
-         default: unreachable("Unimplemented load_input vector size");
-         }
-
-         Temp fetch_dst;
-         if (channel_start == 0 && fetch_bytes == dst.bytes()) {
-            direct_fetch = true;
-            fetch_dst = dst;
-         } else {
-            fetch_dst = bld.tmp(RegClass::get(RegType::vgpr, fetch_bytes));
-         }
-
-         if (use_mubuf) {
-            bld.mubuf(opcode, Definition(fetch_dst), list, fetch_index,
-                                           soffset, fetch_offset, false, false, true);
-         } else {
-            unsigned dfmt = fetch_fmt & 0xf;
-            unsigned nfmt = fetch_fmt >> 4;
-            bld.mtbuf(opcode, Definition(fetch_dst), list, fetch_index,
-                                           soffset, dfmt, nfmt, fetch_offset, false, true);
-         }
-
-         emit_split_vector(ctx, fetch_dst, fetch_dst.bytes() * 8 / bitsize);
-
-         if (fetch_component == 1) {
-            channels[channel_start] = fetch_dst;
-         } else {
-            for (unsigned i = 0; i < MIN2(fetch_component, num_channels - channel_start); i++)
-               channels[channel_start + i] = emit_extract_vector(
-                  ctx, fetch_dst, i, RegClass::get(RegType::vgpr, bitsize / 8u));
-         }
-
-         channel_start += fetch_component;
-      }
-
-      if (!direct_fetch) {
-         bool is_float =
-            nir_alu_type_get_base_type(nir_intrinsic_dest_type(instr)) == nir_type_float;
-
-         unsigned num_components = instr->dest.ssa.num_components;
-
-         aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
-            aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1)};
-         std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
-         unsigned num_temp = 0;
-         for (unsigned i = 0; i < num_components; i++) {
-            unsigned idx = i + component;
-            if (idx < num_channels && channels[idx].id()) {
-               Temp channel = channels[idx];
-               vec->operands[i] = Operand(channel);
-
-               num_temp++;
-               elems[i] = channel;
-            } else if (bitsize == 64) {
-               /* 22.1.1. Attribute Location and Component Assignment of Vulkan 1.3 specification:
-                * For 64-bit data types, no default attribute values are provided. Input variables
-                * must not use more components than provided by the attribute.
-                */
-               vec->operands[i] = Operand(v2);
-            } else if (is_float && idx == 3) {
-               vec->operands[i] = bitsize == 16 ? Operand::c16(0x3c00u) : Operand::c32(0x3f800000u);
-            } else if (!is_float && idx == 3) {
-               vec->operands[i] = Operand::get_const(ctx->options->gfx_level, 1u, bitsize / 8u);
-            } else {
-               vec->operands[i] = Operand::zero(bitsize / 8u);
-            }
-         }
-         vec->definitions[0] = Definition(dst);
-         ctx->block->instructions.emplace_back(std::move(vec));
-         emit_split_vector(ctx, dst, num_components);
-
-         if (num_temp == num_components)
-            ctx->allocated_vec.emplace(dst.id(), elems);
-      }
-   } else if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
+   if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
      if (!nir_src_is_const(offset) || nir_src_as_uint(offset))
         isel_err(offset.ssa->parent_instr,
                  "Unimplemented non-zero nir_intrinsic_load_input offset");