mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2024-12-02 22:54:05 +08:00
aco: Remove VS inputs from visit_load_input.
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Acked-by: Konstantin Seurer <konstantin.seurer@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16805>
This commit is contained in:
parent
27c8131978
commit
c602092033
@ -313,58 +313,6 @@ as_vgpr(isel_context* ctx, Temp val)
|
||||
return as_vgpr(bld, val);
|
||||
}
|
||||
|
||||
// assumes a != 0xffffffff
|
||||
void
|
||||
emit_v_div_u32(isel_context* ctx, Temp dst, Temp a, uint32_t b)
|
||||
{
|
||||
assert(b != 0);
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
|
||||
if (util_is_power_of_two_or_zero(b)) {
|
||||
bld.vop2(aco_opcode::v_lshrrev_b32, Definition(dst), Operand::c32(util_logbase2(b)), a);
|
||||
return;
|
||||
}
|
||||
|
||||
util_fast_udiv_info info = util_compute_fast_udiv_info(b, 32, 32);
|
||||
|
||||
assert(info.multiplier <= 0xffffffff);
|
||||
|
||||
bool pre_shift = info.pre_shift != 0;
|
||||
bool increment = info.increment != 0;
|
||||
bool multiply = true;
|
||||
bool post_shift = info.post_shift != 0;
|
||||
|
||||
if (!pre_shift && !increment && !multiply && !post_shift) {
|
||||
bld.copy(Definition(dst), a);
|
||||
return;
|
||||
}
|
||||
|
||||
Temp pre_shift_dst = a;
|
||||
if (pre_shift) {
|
||||
pre_shift_dst = (increment || multiply || post_shift) ? bld.tmp(v1) : dst;
|
||||
bld.vop2(aco_opcode::v_lshrrev_b32, Definition(pre_shift_dst), Operand::c32(info.pre_shift),
|
||||
a);
|
||||
}
|
||||
|
||||
Temp increment_dst = pre_shift_dst;
|
||||
if (increment) {
|
||||
increment_dst = (post_shift || multiply) ? bld.tmp(v1) : dst;
|
||||
bld.vadd32(Definition(increment_dst), Operand::c32(info.increment), pre_shift_dst);
|
||||
}
|
||||
|
||||
Temp multiply_dst = increment_dst;
|
||||
if (multiply) {
|
||||
multiply_dst = post_shift ? bld.tmp(v1) : dst;
|
||||
bld.vop3(aco_opcode::v_mul_hi_u32, Definition(multiply_dst), increment_dst,
|
||||
bld.copy(bld.def(v1), Operand::c32(info.multiplier)));
|
||||
}
|
||||
|
||||
if (post_shift) {
|
||||
bld.vop2(aco_opcode::v_lshrrev_b32, Definition(dst), Operand::c32(info.post_shift),
|
||||
multiply_dst);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
emit_extract_vector(isel_context* ctx, Temp src, uint32_t idx, Temp dst)
|
||||
{
|
||||
@ -5632,216 +5580,7 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
|
||||
nir_src offset = *nir_get_io_offset_src(instr);
|
||||
|
||||
if (ctx->shader->info.stage == MESA_SHADER_VERTEX) {
|
||||
if (!nir_src_is_const(offset) || nir_src_as_uint(offset))
|
||||
isel_err(offset.ssa->parent_instr,
|
||||
"Unimplemented non-zero nir_intrinsic_load_input offset");
|
||||
|
||||
Temp vertex_buffers =
|
||||
convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->vertex_buffers));
|
||||
|
||||
unsigned location = nir_intrinsic_base(instr) - VERT_ATTRIB_GENERIC0;
|
||||
unsigned bitsize = instr->dest.ssa.bit_size;
|
||||
unsigned component = nir_intrinsic_component(instr) >> (bitsize == 64 ? 1 : 0);
|
||||
unsigned attrib_binding = ctx->options->key.vs.vertex_attribute_bindings[location];
|
||||
uint32_t attrib_offset = ctx->options->key.vs.vertex_attribute_offsets[location];
|
||||
uint32_t attrib_stride = ctx->options->key.vs.vertex_attribute_strides[location];
|
||||
enum pipe_format attrib_format =
|
||||
(enum pipe_format)ctx->options->key.vs.vertex_attribute_formats[location];
|
||||
unsigned binding_align = ctx->options->key.vs.vertex_binding_align[attrib_binding];
|
||||
|
||||
const struct ac_vtx_format_info* vtx_info =
|
||||
ac_get_vtx_format_info(GFX8, CHIP_POLARIS10, attrib_format);
|
||||
|
||||
unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa) << component;
|
||||
unsigned num_channels = MIN2(util_last_bit(mask), vtx_info->num_channels);
|
||||
|
||||
unsigned desc_index =
|
||||
ctx->program->info.vs.use_per_attribute_vb_descs ? location : attrib_binding;
|
||||
desc_index = util_bitcount(ctx->program->info.vs.vb_desc_usage_mask &
|
||||
u_bit_consecutive(0, desc_index));
|
||||
Operand off = bld.copy(bld.def(s1), Operand::c32(desc_index * 16u));
|
||||
Temp list = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), vertex_buffers, off);
|
||||
|
||||
Temp index;
|
||||
if (ctx->options->key.vs.instance_rate_inputs & (1u << location)) {
|
||||
uint32_t divisor = ctx->options->key.vs.instance_rate_divisors[location];
|
||||
Temp start_instance = get_arg(ctx, ctx->args->start_instance);
|
||||
if (divisor) {
|
||||
Temp instance_id = get_arg(ctx, ctx->args->instance_id);
|
||||
if (divisor != 1) {
|
||||
Temp divided = bld.tmp(v1);
|
||||
emit_v_div_u32(ctx, divided, as_vgpr(ctx, instance_id), divisor);
|
||||
index = bld.vadd32(bld.def(v1), start_instance, divided);
|
||||
} else {
|
||||
index = bld.vadd32(bld.def(v1), start_instance, instance_id);
|
||||
}
|
||||
} else {
|
||||
index = bld.copy(bld.def(v1), start_instance);
|
||||
}
|
||||
} else {
|
||||
index = bld.vadd32(bld.def(v1), get_arg(ctx, ctx->args->base_vertex),
|
||||
get_arg(ctx, ctx->args->vertex_id));
|
||||
}
|
||||
|
||||
Temp* const channels = (Temp*)alloca(num_channels * sizeof(Temp));
|
||||
unsigned channel_start = 0;
|
||||
bool direct_fetch = false;
|
||||
|
||||
/* skip unused channels at the start */
|
||||
if (vtx_info->chan_byte_size) {
|
||||
channel_start = ffs(mask) - 1;
|
||||
for (unsigned i = 0; i < MIN2(channel_start, num_channels); i++)
|
||||
channels[i] = Temp(0, s1);
|
||||
}
|
||||
|
||||
/* load channels */
|
||||
while (channel_start < num_channels) {
|
||||
unsigned fetch_component = num_channels - channel_start;
|
||||
unsigned fetch_offset = attrib_offset + channel_start * vtx_info->chan_byte_size;
|
||||
|
||||
/* use MUBUF when possible to avoid possible alignment issues */
|
||||
/* TODO: we could use SDWA to unpack 8/16-bit attributes without extra instructions */
|
||||
bool use_mubuf = vtx_info->chan_byte_size == 4 && bitsize != 16;
|
||||
unsigned fetch_fmt = V_008F0C_BUF_DATA_FORMAT_INVALID;
|
||||
if (!use_mubuf) {
|
||||
fetch_component = ac_get_safe_fetch_size(ctx->program->gfx_level, vtx_info, fetch_offset,
|
||||
vtx_info->num_channels - channel_start, binding_align,
|
||||
fetch_component);
|
||||
fetch_fmt = vtx_info->hw_format[fetch_component - 1];
|
||||
} else {
|
||||
/* GFX6 only supports loading vec3 with MTBUF, split to vec2,scalar. */
|
||||
if (fetch_component == 3 && ctx->options->gfx_level == GFX6)
|
||||
fetch_component = 2;
|
||||
}
|
||||
|
||||
unsigned fetch_bytes = fetch_component * bitsize / 8;
|
||||
|
||||
Temp fetch_index = index;
|
||||
if (attrib_stride != 0 && fetch_offset > attrib_stride) {
|
||||
fetch_index =
|
||||
bld.vadd32(bld.def(v1), Operand::c32(fetch_offset / attrib_stride), fetch_index);
|
||||
fetch_offset = fetch_offset % attrib_stride;
|
||||
}
|
||||
|
||||
Operand soffset = Operand::zero();
|
||||
if (fetch_offset >= 4096) {
|
||||
soffset = bld.copy(bld.def(s1), Operand::c32(fetch_offset / 4096 * 4096));
|
||||
fetch_offset %= 4096;
|
||||
}
|
||||
|
||||
aco_opcode opcode;
|
||||
switch (fetch_bytes) {
|
||||
case 2:
|
||||
assert(!use_mubuf && bitsize == 16);
|
||||
opcode = aco_opcode::tbuffer_load_format_d16_x;
|
||||
break;
|
||||
case 4:
|
||||
if (bitsize == 16) {
|
||||
assert(!use_mubuf);
|
||||
opcode = aco_opcode::tbuffer_load_format_d16_xy;
|
||||
} else {
|
||||
opcode =
|
||||
use_mubuf ? aco_opcode::buffer_load_dword : aco_opcode::tbuffer_load_format_x;
|
||||
}
|
||||
break;
|
||||
case 6:
|
||||
assert(!use_mubuf && bitsize == 16);
|
||||
opcode = aco_opcode::tbuffer_load_format_d16_xyz;
|
||||
break;
|
||||
case 8:
|
||||
if (bitsize == 16) {
|
||||
assert(!use_mubuf);
|
||||
opcode = aco_opcode::tbuffer_load_format_d16_xyzw;
|
||||
} else {
|
||||
opcode =
|
||||
use_mubuf ? aco_opcode::buffer_load_dwordx2 : aco_opcode::tbuffer_load_format_xy;
|
||||
}
|
||||
break;
|
||||
case 12:
|
||||
assert(ctx->options->gfx_level >= GFX7 ||
|
||||
(!use_mubuf && ctx->options->gfx_level == GFX6));
|
||||
opcode =
|
||||
use_mubuf ? aco_opcode::buffer_load_dwordx3 : aco_opcode::tbuffer_load_format_xyz;
|
||||
break;
|
||||
case 16:
|
||||
opcode =
|
||||
use_mubuf ? aco_opcode::buffer_load_dwordx4 : aco_opcode::tbuffer_load_format_xyzw;
|
||||
break;
|
||||
default: unreachable("Unimplemented load_input vector size");
|
||||
}
|
||||
|
||||
Temp fetch_dst;
|
||||
if (channel_start == 0 && fetch_bytes == dst.bytes()) {
|
||||
direct_fetch = true;
|
||||
fetch_dst = dst;
|
||||
} else {
|
||||
fetch_dst = bld.tmp(RegClass::get(RegType::vgpr, fetch_bytes));
|
||||
}
|
||||
|
||||
if (use_mubuf) {
|
||||
bld.mubuf(opcode, Definition(fetch_dst), list, fetch_index,
|
||||
soffset, fetch_offset, false, false, true);
|
||||
} else {
|
||||
unsigned dfmt = fetch_fmt & 0xf;
|
||||
unsigned nfmt = fetch_fmt >> 4;
|
||||
bld.mtbuf(opcode, Definition(fetch_dst), list, fetch_index,
|
||||
soffset, dfmt, nfmt, fetch_offset, false, true);
|
||||
}
|
||||
|
||||
emit_split_vector(ctx, fetch_dst, fetch_dst.bytes() * 8 / bitsize);
|
||||
|
||||
if (fetch_component == 1) {
|
||||
channels[channel_start] = fetch_dst;
|
||||
} else {
|
||||
for (unsigned i = 0; i < MIN2(fetch_component, num_channels - channel_start); i++)
|
||||
channels[channel_start + i] = emit_extract_vector(
|
||||
ctx, fetch_dst, i, RegClass::get(RegType::vgpr, bitsize / 8u));
|
||||
}
|
||||
|
||||
channel_start += fetch_component;
|
||||
}
|
||||
|
||||
if (!direct_fetch) {
|
||||
bool is_float =
|
||||
nir_alu_type_get_base_type(nir_intrinsic_dest_type(instr)) == nir_type_float;
|
||||
|
||||
unsigned num_components = instr->dest.ssa.num_components;
|
||||
|
||||
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1)};
|
||||
std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
|
||||
unsigned num_temp = 0;
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
unsigned idx = i + component;
|
||||
if (idx < num_channels && channels[idx].id()) {
|
||||
Temp channel = channels[idx];
|
||||
vec->operands[i] = Operand(channel);
|
||||
|
||||
num_temp++;
|
||||
elems[i] = channel;
|
||||
} else if (bitsize == 64) {
|
||||
/* 22.1.1. Attribute Location and Component Assignment of Vulkan 1.3 specification:
|
||||
* For 64-bit data types, no default attribute values are provided. Input variables
|
||||
* must not use more components than provided by the attribute.
|
||||
*/
|
||||
vec->operands[i] = Operand(v2);
|
||||
} else if (is_float && idx == 3) {
|
||||
vec->operands[i] = bitsize == 16 ? Operand::c16(0x3c00u) : Operand::c32(0x3f800000u);
|
||||
} else if (!is_float && idx == 3) {
|
||||
vec->operands[i] = Operand::get_const(ctx->options->gfx_level, 1u, bitsize / 8u);
|
||||
} else {
|
||||
vec->operands[i] = Operand::zero(bitsize / 8u);
|
||||
}
|
||||
}
|
||||
vec->definitions[0] = Definition(dst);
|
||||
ctx->block->instructions.emplace_back(std::move(vec));
|
||||
emit_split_vector(ctx, dst, num_components);
|
||||
|
||||
if (num_temp == num_components)
|
||||
ctx->allocated_vec.emplace(dst.id(), elems);
|
||||
}
|
||||
} else if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
if (!nir_src_is_const(offset) || nir_src_as_uint(offset))
|
||||
isel_err(offset.ssa->parent_instr,
|
||||
"Unimplemented non-zero nir_intrinsic_load_input offset");
|
||||
|
Loading…
Reference in New Issue
Block a user