mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2024-11-30 21:54:16 +08:00
nir/opt_varyings: implement compaction without flexible interpolation
We have to honor drivers when they say that different interpolation qualifiers can't be mixed in the same vec4, indicated by nir_io_has_flexible_input_interpolation_except_flat not being set. This is a prerequisite for enabling nir_opt_varyings for all drivers. Acked-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32174>
This commit is contained in:
parent
a7c671efc6
commit
405e9d9b74
@ -389,6 +389,10 @@
|
||||
* * 32-bit transform feedback only
|
||||
* * 16-bit transform feedback only
|
||||
*
|
||||
* When the driver/hw can't mix different interpolation qualifiers
|
||||
* in the same vec4, the interpolated groups are further split into 6
|
||||
* groups, one for each qualifier.
|
||||
*
|
||||
* Then, all scalar varyings are relocated into new slots, starting from
|
||||
* VAR0.x and increasing the scalar slot offset in 32-bit or 16-bit
|
||||
* increments. Rules:
|
||||
@ -509,27 +513,79 @@
|
||||
enum fs_vec4_type {
|
||||
FS_VEC4_TYPE_NONE = 0,
|
||||
FS_VEC4_TYPE_FLAT,
|
||||
FS_VEC4_TYPE_INTERP_FP32,
|
||||
FS_VEC4_TYPE_INTERP_FP16,
|
||||
FS_VEC4_TYPE_INTERP_COLOR,
|
||||
FS_VEC4_TYPE_INTERP_EXPLICIT,
|
||||
FS_VEC4_TYPE_INTERP_EXPLICIT_STRICT,
|
||||
FS_VEC4_TYPE_PER_PRIMITIVE,
|
||||
/* When nir_io_has_flexible_input_interpolation_except_flat is set: */
|
||||
FS_VEC4_TYPE_INTERP_FP32,
|
||||
FS_VEC4_TYPE_INTERP_FP16,
|
||||
FS_VEC4_TYPE_INTERP_COLOR, /* only for glShadeModel, i.e. INTERP_MODE_NONE */
|
||||
/* When nir_io_has_flexible_input_interpolation_except_flat is not set: */
|
||||
FS_VEC4_TYPE_INTERP_FP32_PERSP_PIXEL,
|
||||
FS_VEC4_TYPE_INTERP_FP32_PERSP_CENTROID,
|
||||
FS_VEC4_TYPE_INTERP_FP32_PERSP_SAMPLE,
|
||||
FS_VEC4_TYPE_INTERP_FP32_LINEAR_PIXEL,
|
||||
FS_VEC4_TYPE_INTERP_FP32_LINEAR_CENTROID,
|
||||
FS_VEC4_TYPE_INTERP_FP32_LINEAR_SAMPLE,
|
||||
FS_VEC4_TYPE_INTERP_FP16_PERSP_PIXEL,
|
||||
FS_VEC4_TYPE_INTERP_FP16_PERSP_CENTROID,
|
||||
FS_VEC4_TYPE_INTERP_FP16_PERSP_SAMPLE,
|
||||
FS_VEC4_TYPE_INTERP_FP16_LINEAR_PIXEL,
|
||||
FS_VEC4_TYPE_INTERP_FP16_LINEAR_CENTROID,
|
||||
FS_VEC4_TYPE_INTERP_FP16_LINEAR_SAMPLE,
|
||||
FS_VEC4_TYPE_INTERP_COLOR_PIXEL, /* only for glShadeModel, i.e. INTERP_MODE_NONE */
|
||||
FS_VEC4_TYPE_INTERP_COLOR_CENTROID, /* same */
|
||||
FS_VEC4_TYPE_INTERP_COLOR_SAMPLE, /* same */
|
||||
};
|
||||
|
||||
enum {
|
||||
PERSP_PIXEL,
|
||||
PERSP_CENTROID,
|
||||
PERSP_SAMPLE,
|
||||
LINEAR_PIXEL,
|
||||
LINEAR_CENTROID,
|
||||
LINEAR_SAMPLE,
|
||||
NUM_INTERP_QUALIFIERS,
|
||||
};
|
||||
|
||||
enum {
|
||||
COLOR_PIXEL,
|
||||
COLOR_CENTROID,
|
||||
COLOR_SAMPLE,
|
||||
NUM_COLOR_QUALIFIERS,
|
||||
};
|
||||
|
||||
#if PRINT_RELOCATE_SLOT
|
||||
static const char *fs_vec4_type_strings[] = {
|
||||
"NONE",
|
||||
"FLAT",
|
||||
"INTERP_FP32",
|
||||
"INTERP_FP16",
|
||||
"INTERP_COLOR",
|
||||
"INTERP_EXPLICIT",
|
||||
"INTERP_EXPLICIT_STRICT",
|
||||
"PER_PRIMITIVE",
|
||||
"INTERP_FP32",
|
||||
"INTERP_FP16",
|
||||
"INTERP_COLOR",
|
||||
"INTERP_FP32_PERSP_PIXEL",
|
||||
"INTERP_FP32_PERSP_CENTROID",
|
||||
"INTERP_FP32_PERSP_SAMPLE",
|
||||
"INTERP_FP32_LINEAR_PIXEL",
|
||||
"INTERP_FP32_LINEAR_CENTROID",
|
||||
"INTERP_FP32_LINEAR_SAMPLE",
|
||||
"INTERP_FP16_PERSP_PIXEL",
|
||||
"INTERP_FP16_PERSP_CENTROID",
|
||||
"INTERP_FP16_PERSP_SAMPLE",
|
||||
"INTERP_FP16_LINEAR_PIXEL",
|
||||
"INTERP_FP16_LINEAR_CENTROID",
|
||||
"INTERP_FP16_LINEAR_SAMPLE",
|
||||
"INTERP_COLOR_PIXEL",
|
||||
"INTERP_COLOR_CENTROID",
|
||||
"INTERP_COLOR_SAMPLE",
|
||||
};
|
||||
#endif // PRINT_RELOCATE_SLOT
|
||||
|
||||
typedef BITSET_WORD INTERP_QUAL_BITSET[NUM_INTERP_QUALIFIERS][BITSET_WORDS(NUM_SCALAR_SLOTS)];
|
||||
typedef BITSET_WORD COLOR_QUAL_BITSET[NUM_COLOR_QUALIFIERS][BITSET_WORDS(NUM_SCALAR_SLOTS)];
|
||||
|
||||
static unsigned
|
||||
get_scalar_16bit_slot(nir_io_semantics sem, unsigned component)
|
||||
{
|
||||
@ -598,6 +654,7 @@ struct linkage_info {
|
||||
bool can_move_uniforms;
|
||||
bool can_move_ubos;
|
||||
bool can_mix_convergent_flat_with_interpolated;
|
||||
bool has_flexible_interp;
|
||||
bool always_interpolate_convergent_fs_inputs;
|
||||
|
||||
gl_shader_stage producer_stage;
|
||||
@ -667,6 +724,13 @@ struct linkage_info {
|
||||
/* Color interpolation unqualified (follows the flat-shade state). */
|
||||
BITSET_DECLARE(color32_mask, NUM_SCALAR_SLOTS);
|
||||
|
||||
/* A separate bitmask for each qualifier when
|
||||
* nir_io_has_flexible_input_interpolation_except_flat is not set.
|
||||
*/
|
||||
INTERP_QUAL_BITSET interp_fp32_qual_masks;
|
||||
INTERP_QUAL_BITSET interp_fp16_qual_masks;
|
||||
COLOR_QUAL_BITSET color32_qual_masks;
|
||||
|
||||
/* Mask of output components that have only one store instruction, or if
|
||||
* they have multiple store instructions, all those instructions store
|
||||
* the same value. If the output has multiple vertices, all vertices store
|
||||
@ -701,6 +765,12 @@ struct linkage_info {
|
||||
#define BITSET_TEST32(m, b) \
|
||||
(BITSET_TEST(m, (b) & ~0x1) || BITSET_TEST(m, ((b) & ~0x1) + 1))
|
||||
|
||||
#define BITSET3_TEST_ANY(bitsets, b) (BITSET_TEST((bitsets)[0], (b)) || \
|
||||
BITSET_TEST((bitsets)[1], (b)) || \
|
||||
BITSET_TEST((bitsets)[2], (b)))
|
||||
#define BITSET6_TEST_ANY(bitsets, b) (BITSET3_TEST_ANY((bitsets), (b)) || \
|
||||
BITSET3_TEST_ANY(&(bitsets)[3], (b)))
|
||||
|
||||
static void
|
||||
print_linkage(struct linkage_info *linkage)
|
||||
{
|
||||
@ -725,6 +795,10 @@ print_linkage(struct linkage_info *linkage)
|
||||
!BITSET_TEST(linkage->no_varying16_mask, i) &&
|
||||
!BITSET_TEST(linkage->interp_fp32_mask, i) &&
|
||||
!BITSET_TEST(linkage->interp_fp16_mask, i) &&
|
||||
!BITSET6_TEST_ANY(linkage->interp_fp32_qual_masks, i) &&
|
||||
!BITSET6_TEST_ANY(linkage->interp_fp16_qual_masks, i) &&
|
||||
!BITSET_TEST(linkage->color32_mask, i) &&
|
||||
!BITSET3_TEST_ANY(linkage->color32_qual_masks, i) &&
|
||||
!BITSET_TEST(linkage->flat32_mask, i) &&
|
||||
!BITSET_TEST(linkage->flat16_mask, i) &&
|
||||
!BITSET_TEST(linkage->interp_explicit32_mask, i) &&
|
||||
@ -738,7 +812,7 @@ print_linkage(struct linkage_info *linkage)
|
||||
!BITSET_TEST(linkage->output_equal_mask, i))
|
||||
continue;
|
||||
|
||||
printf(" %7s.%c.%s: num_slots=%2u%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
|
||||
printf(" %7s.%c.%s: num_slots=%2u%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
|
||||
gl_varying_slot_name_for_stage(vec4_slot(i),
|
||||
linkage->producer_stage) + 13,
|
||||
"xyzw"[(i / 2) % 4],
|
||||
@ -753,7 +827,23 @@ print_linkage(struct linkage_info *linkage)
|
||||
BITSET_TEST(linkage->no_varying32_mask, i) ? " no_varying32" : "",
|
||||
BITSET_TEST(linkage->no_varying16_mask, i) ? " no_varying16" : "",
|
||||
BITSET_TEST(linkage->interp_fp32_mask, i) ? " interp_fp32" : "",
|
||||
BITSET_TEST(linkage->interp_fp32_qual_masks[0], i) ? " interp_fp32_persp_pixel" : "",
|
||||
BITSET_TEST(linkage->interp_fp32_qual_masks[1], i) ? " interp_fp32_persp_centroid" : "",
|
||||
BITSET_TEST(linkage->interp_fp32_qual_masks[2], i) ? " interp_fp32_persp_sample" : "",
|
||||
BITSET_TEST(linkage->interp_fp32_qual_masks[3], i) ? " interp_fp32_linear_pixel" : "",
|
||||
BITSET_TEST(linkage->interp_fp32_qual_masks[4], i) ? " interp_fp32_linear_centroid" : "",
|
||||
BITSET_TEST(linkage->interp_fp32_qual_masks[5], i) ? " interp_fp32_linear_sample" : "",
|
||||
BITSET_TEST(linkage->interp_fp16_mask, i) ? " interp_fp16" : "",
|
||||
BITSET_TEST(linkage->interp_fp16_qual_masks[0], i) ? " interp_fp16_persp_pixel" : "",
|
||||
BITSET_TEST(linkage->interp_fp16_qual_masks[1], i) ? " interp_fp16_persp_centroid" : "",
|
||||
BITSET_TEST(linkage->interp_fp16_qual_masks[2], i) ? " interp_fp16_persp_sample" : "",
|
||||
BITSET_TEST(linkage->interp_fp16_qual_masks[3], i) ? " interp_fp16_linear_pixel" : "",
|
||||
BITSET_TEST(linkage->interp_fp16_qual_masks[4], i) ? " interp_fp16_linear_centroid" : "",
|
||||
BITSET_TEST(linkage->interp_fp16_qual_masks[5], i) ? " interp_fp16_linear_sample" : "",
|
||||
BITSET_TEST(linkage->color32_mask, i) ? " color32" : "",
|
||||
BITSET_TEST(linkage->color32_qual_masks[0], i) ? " color32_pixel" : "",
|
||||
BITSET_TEST(linkage->color32_qual_masks[1], i) ? " color32_centroid" : "",
|
||||
BITSET_TEST(linkage->color32_qual_masks[2], i) ? " color32_sample" : "",
|
||||
BITSET_TEST(linkage->flat32_mask, i) ? " flat32" : "",
|
||||
BITSET_TEST(linkage->flat16_mask, i) ? " flat16" : "",
|
||||
BITSET_TEST(linkage->interp_explicit32_mask, i) ? " interp_explicit32" : "",
|
||||
@ -780,6 +870,10 @@ slot_disable_optimizations_and_compaction(struct linkage_info *linkage,
|
||||
BITSET_CLEAR(linkage->convergent16_mask, i);
|
||||
BITSET_CLEAR(linkage->interp_fp32_mask, i);
|
||||
BITSET_CLEAR(linkage->interp_fp16_mask, i);
|
||||
for (unsigned b = 0; b < NUM_INTERP_QUALIFIERS; b++) {
|
||||
BITSET_CLEAR(linkage->interp_fp32_qual_masks[b], i);
|
||||
BITSET_CLEAR(linkage->interp_fp16_qual_masks[b], i);
|
||||
}
|
||||
BITSET_CLEAR(linkage->flat32_mask, i);
|
||||
BITSET_CLEAR(linkage->flat16_mask, i);
|
||||
BITSET_CLEAR(linkage->interp_explicit32_mask, i);
|
||||
@ -793,6 +887,8 @@ slot_disable_optimizations_and_compaction(struct linkage_info *linkage,
|
||||
BITSET_CLEAR(linkage->no_varying32_mask, i);
|
||||
BITSET_CLEAR(linkage->no_varying16_mask, i);
|
||||
BITSET_CLEAR(linkage->color32_mask, i);
|
||||
for (unsigned b = 0; b < NUM_COLOR_QUALIFIERS; b++)
|
||||
BITSET_CLEAR(linkage->color32_qual_masks[b], i);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -873,6 +969,45 @@ color_uses_shade_model(struct linkage_info *linkage, unsigned i)
|
||||
return false;
|
||||
}
|
||||
|
||||
static enum fs_vec4_type
|
||||
get_interp_vec4_type(struct linkage_info *linkage, unsigned slot,
|
||||
nir_intrinsic_instr *load)
|
||||
{
|
||||
assert(!linkage->has_flexible_interp);
|
||||
assert(load->intrinsic == nir_intrinsic_load_interpolated_input);
|
||||
|
||||
nir_intrinsic_instr *baryc =
|
||||
nir_instr_as_intrinsic(load->src[0].ssa->parent_instr);
|
||||
enum fs_vec4_type base;
|
||||
|
||||
if (color_uses_shade_model(linkage, slot))
|
||||
base = FS_VEC4_TYPE_INTERP_COLOR_PIXEL;
|
||||
else if (load->def.bit_size == 32)
|
||||
base = FS_VEC4_TYPE_INTERP_FP32_PERSP_PIXEL;
|
||||
else if (load->def.bit_size == 16)
|
||||
base = FS_VEC4_TYPE_INTERP_FP16_PERSP_PIXEL;
|
||||
else
|
||||
unreachable("invalid load_interpolated_input type");
|
||||
|
||||
bool linear = nir_intrinsic_interp_mode(baryc) == INTERP_MODE_NOPERSPECTIVE;
|
||||
|
||||
if (linear)
|
||||
base += 3;
|
||||
|
||||
switch (baryc->intrinsic) {
|
||||
case nir_intrinsic_load_barycentric_pixel:
|
||||
case nir_intrinsic_load_barycentric_at_offset:
|
||||
case nir_intrinsic_load_barycentric_at_sample:
|
||||
return base;
|
||||
case nir_intrinsic_load_barycentric_centroid:
|
||||
return base + 1;
|
||||
case nir_intrinsic_load_barycentric_sample:
|
||||
return base + 2;
|
||||
default:
|
||||
unreachable("unexpected barycentric intrinsic");
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
preserve_infs_nans(nir_shader *nir, unsigned bit_size)
|
||||
{
|
||||
@ -1180,14 +1315,18 @@ gather_inputs(struct nir_builder *builder, nir_intrinsic_instr *intr, void *cb_d
|
||||
fs_vec4_type = FS_VEC4_TYPE_INTERP_EXPLICIT;
|
||||
break;
|
||||
case nir_intrinsic_load_interpolated_input:
|
||||
if (color_uses_shade_model(linkage, slot))
|
||||
fs_vec4_type = FS_VEC4_TYPE_INTERP_COLOR;
|
||||
else if (intr->def.bit_size == 32)
|
||||
fs_vec4_type = FS_VEC4_TYPE_INTERP_FP32;
|
||||
else if (intr->def.bit_size == 16)
|
||||
fs_vec4_type = FS_VEC4_TYPE_INTERP_FP16;
|
||||
else
|
||||
unreachable("invalid load_interpolated_input type");
|
||||
if (linkage->has_flexible_interp) {
|
||||
if (color_uses_shade_model(linkage, slot))
|
||||
fs_vec4_type = FS_VEC4_TYPE_INTERP_COLOR;
|
||||
else if (intr->def.bit_size == 32)
|
||||
fs_vec4_type = FS_VEC4_TYPE_INTERP_FP32;
|
||||
else if (intr->def.bit_size == 16)
|
||||
fs_vec4_type = FS_VEC4_TYPE_INTERP_FP16;
|
||||
else
|
||||
unreachable("invalid load_interpolated_input type");
|
||||
} else {
|
||||
fs_vec4_type = get_interp_vec4_type(linkage, slot, intr);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
unreachable("unexpected input load intrinsic");
|
||||
@ -1215,52 +1354,107 @@ gather_inputs(struct nir_builder *builder, nir_intrinsic_instr *intr, void *cb_d
|
||||
|
||||
/* Record inputs that can be compacted. */
|
||||
if (linkage->consumer_stage == MESA_SHADER_FRAGMENT) {
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_input:
|
||||
unsigned i;
|
||||
assert(intr->def.bit_size == 32 || intr->def.bit_size == 16);
|
||||
|
||||
switch (fs_vec4_type) {
|
||||
case FS_VEC4_TYPE_FLAT:
|
||||
if (intr->def.bit_size == 32)
|
||||
BITSET_SET(linkage->flat32_mask, slot);
|
||||
else if (intr->def.bit_size == 16)
|
||||
BITSET_SET(linkage->flat16_mask, slot);
|
||||
else
|
||||
unreachable("invalid load_input type");
|
||||
BITSET_SET(linkage->flat16_mask, slot);
|
||||
break;
|
||||
case nir_intrinsic_load_per_primitive_input:
|
||||
case FS_VEC4_TYPE_INTERP_EXPLICIT:
|
||||
if (intr->def.bit_size == 32)
|
||||
BITSET_SET(linkage->interp_explicit32_mask, slot);
|
||||
else
|
||||
BITSET_SET(linkage->interp_explicit16_mask, slot);
|
||||
break;
|
||||
case FS_VEC4_TYPE_INTERP_EXPLICIT_STRICT:
|
||||
if (intr->def.bit_size == 32)
|
||||
BITSET_SET(linkage->interp_explicit_strict32_mask, slot);
|
||||
else
|
||||
BITSET_SET(linkage->interp_explicit_strict16_mask, slot);
|
||||
break;
|
||||
case FS_VEC4_TYPE_PER_PRIMITIVE:
|
||||
if (intr->def.bit_size == 32)
|
||||
BITSET_SET(linkage->per_primitive32_mask, slot);
|
||||
else if (intr->def.bit_size == 16)
|
||||
else
|
||||
BITSET_SET(linkage->per_primitive16_mask, slot);
|
||||
else
|
||||
unreachable("invalid load_input type");
|
||||
break;
|
||||
case nir_intrinsic_load_input_vertex:
|
||||
if (sem.interp_explicit_strict) {
|
||||
if (intr->def.bit_size == 32)
|
||||
BITSET_SET(linkage->interp_explicit_strict32_mask, slot);
|
||||
else if (intr->def.bit_size == 16)
|
||||
BITSET_SET(linkage->interp_explicit_strict16_mask, slot);
|
||||
else
|
||||
unreachable("invalid load_input_vertex type");
|
||||
|
||||
case FS_VEC4_TYPE_INTERP_FP32:
|
||||
BITSET_SET(linkage->interp_fp32_mask, slot);
|
||||
break;
|
||||
case FS_VEC4_TYPE_INTERP_FP16:
|
||||
BITSET_SET(linkage->interp_fp16_mask, slot);
|
||||
break;
|
||||
case FS_VEC4_TYPE_INTERP_COLOR:
|
||||
BITSET_SET(linkage->color32_mask, slot);
|
||||
break;
|
||||
|
||||
case FS_VEC4_TYPE_INTERP_FP32_PERSP_PIXEL:
|
||||
case FS_VEC4_TYPE_INTERP_FP32_PERSP_CENTROID:
|
||||
case FS_VEC4_TYPE_INTERP_FP32_PERSP_SAMPLE:
|
||||
case FS_VEC4_TYPE_INTERP_FP32_LINEAR_PIXEL:
|
||||
case FS_VEC4_TYPE_INTERP_FP32_LINEAR_CENTROID:
|
||||
case FS_VEC4_TYPE_INTERP_FP32_LINEAR_SAMPLE:
|
||||
i = fs_vec4_type - FS_VEC4_TYPE_INTERP_FP32_PERSP_PIXEL;
|
||||
BITSET_SET(linkage->interp_fp32_qual_masks[i], slot);
|
||||
break;
|
||||
|
||||
case FS_VEC4_TYPE_INTERP_FP16_PERSP_PIXEL:
|
||||
case FS_VEC4_TYPE_INTERP_FP16_PERSP_CENTROID:
|
||||
case FS_VEC4_TYPE_INTERP_FP16_PERSP_SAMPLE:
|
||||
case FS_VEC4_TYPE_INTERP_FP16_LINEAR_PIXEL:
|
||||
case FS_VEC4_TYPE_INTERP_FP16_LINEAR_CENTROID:
|
||||
case FS_VEC4_TYPE_INTERP_FP16_LINEAR_SAMPLE:
|
||||
i = fs_vec4_type - FS_VEC4_TYPE_INTERP_FP16_PERSP_PIXEL;
|
||||
BITSET_SET(linkage->interp_fp16_qual_masks[i], slot);
|
||||
break;
|
||||
|
||||
case FS_VEC4_TYPE_INTERP_COLOR_PIXEL:
|
||||
case FS_VEC4_TYPE_INTERP_COLOR_CENTROID:
|
||||
case FS_VEC4_TYPE_INTERP_COLOR_SAMPLE:
|
||||
i = fs_vec4_type - FS_VEC4_TYPE_INTERP_COLOR_PIXEL;
|
||||
BITSET_SET(linkage->color32_qual_masks[i], slot);
|
||||
break;
|
||||
|
||||
case FS_VEC4_TYPE_NONE:
|
||||
unreachable("unexpected fs_vec4_type");
|
||||
}
|
||||
|
||||
if (!linkage->has_flexible_interp &&
|
||||
intr->intrinsic == nir_intrinsic_load_interpolated_input) {
|
||||
/* interpolateAtCentroid can occur simultaneously with any other
|
||||
* qualifier. If centroid is flagged with any other qualifier,
|
||||
* unflag centroid. Even though we track such outputs as the other
|
||||
* qualifier, the load_barycentric_centroid intrinsic must be
|
||||
* preserved by all optimizations. The only case when it's not
|
||||
* preserved is when the input is convergent, in which case
|
||||
* all qualifiers have the same behavior and we opportunistically
|
||||
* change it during compaction.
|
||||
*/
|
||||
if (color_uses_shade_model(linkage, slot)) {
|
||||
if (BITSET_TEST(linkage->color32_qual_masks[COLOR_CENTROID], slot) &&
|
||||
(BITSET_TEST(linkage->color32_qual_masks[COLOR_PIXEL], slot) ||
|
||||
BITSET_TEST(linkage->color32_qual_masks[COLOR_SAMPLE], slot)))
|
||||
BITSET_CLEAR(linkage->color32_qual_masks[COLOR_CENTROID], slot);
|
||||
} else {
|
||||
if (intr->def.bit_size == 32)
|
||||
BITSET_SET(linkage->interp_explicit32_mask, slot);
|
||||
else if (intr->def.bit_size == 16)
|
||||
BITSET_SET(linkage->interp_explicit16_mask, slot);
|
||||
else
|
||||
unreachable("invalid load_input_vertex type");
|
||||
INTERP_QUAL_BITSET *bitsets =
|
||||
intr->def.bit_size == 32 ? &linkage->interp_fp32_qual_masks :
|
||||
&linkage->interp_fp16_qual_masks;
|
||||
|
||||
if (BITSET_TEST((*bitsets)[PERSP_CENTROID], slot) &&
|
||||
(BITSET_TEST((*bitsets)[PERSP_PIXEL], slot) ||
|
||||
BITSET_TEST((*bitsets)[PERSP_SAMPLE], slot)))
|
||||
BITSET_CLEAR((*bitsets)[PERSP_CENTROID], slot);
|
||||
|
||||
if (BITSET_TEST((*bitsets)[LINEAR_CENTROID], slot) &&
|
||||
(BITSET_TEST((*bitsets)[LINEAR_PIXEL], slot) ||
|
||||
BITSET_TEST((*bitsets)[LINEAR_SAMPLE], slot)))
|
||||
BITSET_CLEAR((*bitsets)[LINEAR_CENTROID], slot);
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_load_interpolated_input:
|
||||
if (color_uses_shade_model(linkage, slot))
|
||||
BITSET_SET(linkage->color32_mask, slot);
|
||||
else if (intr->def.bit_size == 32)
|
||||
BITSET_SET(linkage->interp_fp32_mask, slot);
|
||||
else if (intr->def.bit_size == 16)
|
||||
BITSET_SET(linkage->interp_fp16_mask, slot);
|
||||
else
|
||||
unreachable("invalid load_interpolated_input type");
|
||||
break;
|
||||
default:
|
||||
unreachable("unexpected input load intrinsic");
|
||||
}
|
||||
} else {
|
||||
if (intr->def.bit_size == 32)
|
||||
@ -1558,8 +1752,10 @@ tidy_up_convergent_varyings(struct linkage_info *linkage)
|
||||
*/
|
||||
BITSET_FOREACH_SET(i, linkage->convergent32_mask, NUM_SCALAR_SLOTS) {
|
||||
if (!BITSET_TEST(linkage->interp_fp32_mask, i) &&
|
||||
!BITSET_TEST(linkage->color32_mask, i) &&
|
||||
!BITSET_TEST(linkage->flat32_mask, i) &&
|
||||
!BITSET_TEST(linkage->color32_mask, i)) {
|
||||
!BITSET6_TEST_ANY(linkage->interp_fp32_qual_masks, i) &&
|
||||
!BITSET3_TEST_ANY(linkage->color32_qual_masks, i)) {
|
||||
/* Clear the flag - not used by FS. */
|
||||
BITSET_CLEAR(linkage->convergent32_mask, i);
|
||||
} else if ((!linkage->can_mix_convergent_flat_with_interpolated &&
|
||||
@ -1571,13 +1767,19 @@ tidy_up_convergent_varyings(struct linkage_info *linkage)
|
||||
} else {
|
||||
/* Keep it convergent. */
|
||||
BITSET_CLEAR(linkage->interp_fp32_mask, i);
|
||||
for (unsigned b = 0; b < NUM_INTERP_QUALIFIERS; b++)
|
||||
BITSET_CLEAR(linkage->interp_fp32_qual_masks[b], i);
|
||||
BITSET_CLEAR(linkage->color32_mask, i);
|
||||
for (unsigned b = 0; b < NUM_COLOR_QUALIFIERS; b++)
|
||||
BITSET_CLEAR(linkage->color32_qual_masks[b], i);
|
||||
BITSET_CLEAR(linkage->flat32_mask, i);
|
||||
}
|
||||
}
|
||||
|
||||
BITSET_FOREACH_SET(i, linkage->convergent16_mask, NUM_SCALAR_SLOTS) {
|
||||
if (!BITSET_TEST(linkage->interp_fp16_mask, i) &&
|
||||
!BITSET_TEST(linkage->flat16_mask, i)) {
|
||||
!BITSET_TEST(linkage->flat16_mask, i) &&
|
||||
!BITSET6_TEST_ANY(linkage->interp_fp16_qual_masks, i)) {
|
||||
/* Clear the flag - not used by FS. */
|
||||
BITSET_CLEAR(linkage->convergent16_mask, i);
|
||||
} else if ((!linkage->can_mix_convergent_flat_with_interpolated &&
|
||||
@ -1589,6 +1791,8 @@ tidy_up_convergent_varyings(struct linkage_info *linkage)
|
||||
} else {
|
||||
/* Keep it convergent. */
|
||||
BITSET_CLEAR(linkage->interp_fp16_mask, i);
|
||||
for (unsigned b = 0; b < NUM_INTERP_QUALIFIERS; b++)
|
||||
BITSET_CLEAR(linkage->interp_fp16_qual_masks[b], i);
|
||||
BITSET_CLEAR(linkage->flat16_mask, i);
|
||||
}
|
||||
}
|
||||
@ -2293,7 +2497,7 @@ enum var_qualifier {
|
||||
QUAL_VAR_INTERP_ANY,
|
||||
QUAL_COLOR_INTERP_ANY,
|
||||
QUAL_COLOR_SHADEMODEL_ANY,
|
||||
/* When nir_io_has_flexible_input_interpolation_except_flat is unset: */
|
||||
/* When nir_io_has_flexible_input_interpolation_except_flat is not set: */
|
||||
QUAL_VAR_PERSP_PIXEL,
|
||||
QUAL_VAR_PERSP_CENTROID,
|
||||
QUAL_VAR_PERSP_SAMPLE,
|
||||
@ -2342,8 +2546,7 @@ get_input_qualifier(struct linkage_info *linkage, unsigned i)
|
||||
nir_intrinsic_instr *baryc =
|
||||
nir_instr_as_intrinsic(load->src[0].ssa->parent_instr);
|
||||
|
||||
if (linkage->consumer_builder.shader->options->io_options &
|
||||
nir_io_has_flexible_input_interpolation_except_flat) {
|
||||
if (linkage->has_flexible_interp) {
|
||||
if (is_color) {
|
||||
return nir_intrinsic_interp_mode(baryc) == INTERP_MODE_NONE ?
|
||||
QUAL_COLOR_SHADEMODEL_ANY : QUAL_COLOR_INTERP_ANY;
|
||||
@ -2352,6 +2555,25 @@ get_input_qualifier(struct linkage_info *linkage, unsigned i)
|
||||
}
|
||||
}
|
||||
|
||||
/* If interpolateAt{Centroid,Offset,Sample} is used, see if there is
|
||||
* another load that doesn't use those, so that we get the real qualifier.
|
||||
*/
|
||||
if (baryc->intrinsic == nir_intrinsic_load_barycentric_centroid ||
|
||||
baryc->intrinsic == nir_intrinsic_load_barycentric_at_offset ||
|
||||
baryc->intrinsic == nir_intrinsic_load_barycentric_at_sample) {
|
||||
list_for_each_entry(struct list_node, iter, &slot->consumer.loads, head) {
|
||||
nir_intrinsic_instr *bar =
|
||||
nir_instr_as_intrinsic(iter->instr->src[0].ssa->parent_instr);
|
||||
|
||||
if (bar->intrinsic != nir_intrinsic_load_barycentric_centroid &&
|
||||
bar->intrinsic != nir_intrinsic_load_barycentric_at_offset &&
|
||||
bar->intrinsic != nir_intrinsic_load_barycentric_at_sample) {
|
||||
baryc = bar;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Get the exact interpolation qualifier. */
|
||||
unsigned pixel_location;
|
||||
enum var_qualifier qual;
|
||||
@ -3194,9 +3416,14 @@ try_move_postdominator(struct linkage_info *linkage,
|
||||
if (alu_interp == FLAG_INTERP_CONVERGENT) {
|
||||
mask = new_bit_size == 16 ? linkage->convergent16_mask
|
||||
: linkage->convergent32_mask;
|
||||
} else {
|
||||
} else if (linkage->has_flexible_interp) {
|
||||
mask = new_bit_size == 16 ? linkage->interp_fp16_mask
|
||||
: linkage->interp_fp32_mask;
|
||||
} else {
|
||||
/* The index of the qualifier is encoded in alu_interp, so extract it. */
|
||||
unsigned i = (alu_interp - FLAG_INTERP_PERSP_PIXEL) >> 5;
|
||||
mask = new_bit_size == 16 ? linkage->interp_fp16_qual_masks[i]
|
||||
: linkage->interp_fp32_qual_masks[i];
|
||||
}
|
||||
} else if (linkage->consumer_stage == MESA_SHADER_TESS_EVAL &&
|
||||
alu_interp > FLAG_INTERP_FLAT) {
|
||||
@ -3275,7 +3502,7 @@ try_move_postdominator(struct linkage_info *linkage,
|
||||
assert(!BITSET_TEST(linkage->no_varying16_mask, slot_index));
|
||||
|
||||
/* Re-set the category of the new scalar input. This will cause
|
||||
* the compaction to treat it as a different type, so that it will move it
|
||||
* the compaction to treat it as a different type, so that it will be moved
|
||||
* into the vec4 that has compatible interpolation qualifiers.
|
||||
*
|
||||
* This shouldn't be done if any of the interp masks are not set, which
|
||||
@ -3283,12 +3510,18 @@ try_move_postdominator(struct linkage_info *linkage,
|
||||
*/
|
||||
if (BITSET_TEST(linkage->interp_fp32_mask, slot_index) ||
|
||||
BITSET_TEST(linkage->interp_fp16_mask, slot_index) ||
|
||||
BITSET6_TEST_ANY(linkage->interp_fp32_qual_masks, slot_index) ||
|
||||
BITSET6_TEST_ANY(linkage->interp_fp16_qual_masks, slot_index) ||
|
||||
BITSET_TEST(linkage->flat32_mask, slot_index) ||
|
||||
BITSET_TEST(linkage->flat16_mask, slot_index) ||
|
||||
BITSET_TEST(linkage->convergent32_mask, slot_index) ||
|
||||
BITSET_TEST(linkage->convergent16_mask, slot_index)) {
|
||||
BITSET_CLEAR(linkage->interp_fp32_mask, slot_index);
|
||||
for (unsigned i = 0; i < NUM_INTERP_QUALIFIERS; i++)
|
||||
BITSET_CLEAR(linkage->interp_fp32_qual_masks[i], slot_index);
|
||||
BITSET_CLEAR(linkage->interp_fp16_mask, slot_index);
|
||||
for (unsigned i = 0; i < NUM_INTERP_QUALIFIERS; i++)
|
||||
BITSET_CLEAR(linkage->interp_fp16_qual_masks[i], slot_index);
|
||||
BITSET_CLEAR(linkage->flat16_mask, slot_index);
|
||||
BITSET_CLEAR(linkage->flat32_mask, slot_index);
|
||||
BITSET_CLEAR(linkage->convergent16_mask, slot_index);
|
||||
@ -3779,14 +4012,16 @@ relocate_slot(struct linkage_info *linkage, struct scalar_slot *slot,
|
||||
intr->intrinsic != nir_intrinsic_load_per_primitive_input);
|
||||
}
|
||||
|
||||
if (intr->intrinsic != nir_intrinsic_load_interpolated_input)
|
||||
continue;
|
||||
|
||||
/* This path is used when promoting convergent interpolated
|
||||
* inputs to flat. Replace load_interpolated_input with load_input.
|
||||
*/
|
||||
if (intr->intrinsic == nir_intrinsic_load_interpolated_input &&
|
||||
(fs_vec4_type == FS_VEC4_TYPE_FLAT ||
|
||||
/* Promote all convergent loads to flat if the driver supports it. */
|
||||
(convergent &&
|
||||
linkage->can_mix_convergent_flat_with_interpolated))) {
|
||||
if (fs_vec4_type == FS_VEC4_TYPE_FLAT ||
|
||||
/* Promote all convergent loads to flat if the driver supports it. */
|
||||
(convergent &&
|
||||
linkage->can_mix_convergent_flat_with_interpolated)) {
|
||||
assert(instruction_lists[i] == &slot->consumer.loads);
|
||||
nir_builder *b = &linkage->consumer_builder;
|
||||
|
||||
@ -3820,6 +4055,76 @@ relocate_slot(struct linkage_info *linkage, struct scalar_slot *slot,
|
||||
nir_src_rewrite(&store->src[0], repl);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* We are packing convergent inputs with any other interpolated
|
||||
* inputs in the same vec4, but the interpolation qualifier might not
|
||||
* be the same between the two. Set the qualifier of the convergent
|
||||
* input to match the input it's being packed with.
|
||||
*/
|
||||
if (!linkage->has_flexible_interp && convergent) {
|
||||
enum fs_vec4_type current_vec4_type =
|
||||
get_interp_vec4_type(linkage, i, intr);
|
||||
|
||||
/* Make the interpolation qualifier match the slot where we are
|
||||
* moving this input.
|
||||
*/
|
||||
if (current_vec4_type != fs_vec4_type) {
|
||||
nir_builder *b = &linkage->consumer_builder;
|
||||
nir_def *baryc;
|
||||
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
switch (fs_vec4_type) {
|
||||
case FS_VEC4_TYPE_INTERP_FP32_PERSP_PIXEL:
|
||||
case FS_VEC4_TYPE_INTERP_FP16_PERSP_PIXEL:
|
||||
baryc = nir_load_barycentric_pixel(b, 32,
|
||||
.interp_mode = INTERP_MODE_SMOOTH);
|
||||
break;
|
||||
case FS_VEC4_TYPE_INTERP_FP32_PERSP_CENTROID:
|
||||
case FS_VEC4_TYPE_INTERP_FP16_PERSP_CENTROID:
|
||||
baryc = nir_load_barycentric_centroid(b, 32,
|
||||
.interp_mode = INTERP_MODE_SMOOTH);
|
||||
break;
|
||||
case FS_VEC4_TYPE_INTERP_FP32_PERSP_SAMPLE:
|
||||
case FS_VEC4_TYPE_INTERP_FP16_PERSP_SAMPLE:
|
||||
baryc = nir_load_barycentric_sample(b, 32,
|
||||
.interp_mode = INTERP_MODE_SMOOTH);
|
||||
break;
|
||||
case FS_VEC4_TYPE_INTERP_FP32_LINEAR_PIXEL:
|
||||
case FS_VEC4_TYPE_INTERP_FP16_LINEAR_PIXEL:
|
||||
baryc = nir_load_barycentric_pixel(b, 32,
|
||||
.interp_mode = INTERP_MODE_NOPERSPECTIVE);
|
||||
break;
|
||||
case FS_VEC4_TYPE_INTERP_FP32_LINEAR_CENTROID:
|
||||
case FS_VEC4_TYPE_INTERP_FP16_LINEAR_CENTROID:
|
||||
baryc = nir_load_barycentric_centroid(b, 32,
|
||||
.interp_mode = INTERP_MODE_NOPERSPECTIVE);
|
||||
break;
|
||||
case FS_VEC4_TYPE_INTERP_FP32_LINEAR_SAMPLE:
|
||||
case FS_VEC4_TYPE_INTERP_FP16_LINEAR_SAMPLE:
|
||||
baryc = nir_load_barycentric_sample(b, 32,
|
||||
.interp_mode = INTERP_MODE_NOPERSPECTIVE);
|
||||
break;
|
||||
case FS_VEC4_TYPE_INTERP_COLOR_PIXEL:
|
||||
baryc = nir_load_barycentric_pixel(b, 32,
|
||||
.interp_mode = INTERP_MODE_NONE);
|
||||
break;
|
||||
case FS_VEC4_TYPE_INTERP_COLOR_CENTROID:
|
||||
baryc = nir_load_barycentric_centroid(b, 32,
|
||||
.interp_mode = INTERP_MODE_NONE);
|
||||
break;
|
||||
case FS_VEC4_TYPE_INTERP_COLOR_SAMPLE:
|
||||
baryc = nir_load_barycentric_sample(b, 32,
|
||||
.interp_mode = INTERP_MODE_NONE);
|
||||
break;
|
||||
default:
|
||||
unreachable("invalid qualifier");
|
||||
}
|
||||
|
||||
nir_src_rewrite(&intr->src[0], baryc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3965,7 +4270,7 @@ fs_assign_slots(struct linkage_info *linkage,
|
||||
* \param flat_mask The list of flat slots to assign locations for.
|
||||
* \param convergent_mask The list of slots that have convergent output
|
||||
* stores.
|
||||
* \param sized_interp_type One of FS_VEC4_TYPE_INTERP_{FP32, FP16, COLOR}.
|
||||
* \param sized_interp_type One of FS_VEC4_TYPE_INTERP_{FP32, FP16, COLOR}*.
|
||||
* \param slot_size 1 for 16 bits, 2 for 32 bits
|
||||
* \param color_channel_rotate Assign color channels starting with this index,
|
||||
* e.g. 2 assigns channels in the zwxy order.
|
||||
@ -4051,6 +4356,106 @@ fs_assign_slot_groups(struct linkage_info *linkage,
|
||||
color_channel_rotate, progress);
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as fs_assign_slot_groups, but don't mix different interpolation
|
||||
* qualifiers in the same vec4.
|
||||
*/
|
||||
static void
|
||||
fs_assign_slot_groups_separate_qual(struct linkage_info *linkage,
|
||||
BITSET_WORD *assigned_mask,
|
||||
uint8_t assigned_fs_vec4_type[NUM_TOTAL_VARYING_SLOTS],
|
||||
INTERP_QUAL_BITSET *interp_masks,
|
||||
BITSET_WORD *flat_mask,
|
||||
BITSET_WORD *convergent_mask,
|
||||
COLOR_QUAL_BITSET *color_interp_masks,
|
||||
enum fs_vec4_type sized_interp_type_base,
|
||||
unsigned slot_size,
|
||||
bool assign_colors,
|
||||
unsigned color_channel_rotate,
|
||||
nir_opt_varyings_progress *progress)
|
||||
{
|
||||
unsigned unused_interp_slots[NUM_INTERP_QUALIFIERS] = {0};
|
||||
unsigned unused_color_slots[NUM_COLOR_QUALIFIERS] = {0};
|
||||
|
||||
/* Put interpolated slots first. */
|
||||
for (unsigned i = 0; i < NUM_INTERP_QUALIFIERS; i++) {
|
||||
unused_interp_slots[i] =
|
||||
fs_assign_slots(linkage, assigned_mask, assigned_fs_vec4_type,
|
||||
(*interp_masks)[i], sized_interp_type_base + i,
|
||||
slot_size, NUM_SCALAR_SLOTS, false, assign_colors,
|
||||
color_channel_rotate, progress);
|
||||
}
|
||||
|
||||
if (color_interp_masks) {
|
||||
for (unsigned i = 0; i < NUM_COLOR_QUALIFIERS; i++) {
|
||||
unused_color_slots[i] =
|
||||
fs_assign_slots(linkage, assigned_mask, assigned_fs_vec4_type,
|
||||
(*color_interp_masks)[i],
|
||||
FS_VEC4_TYPE_INTERP_COLOR_PIXEL + i,
|
||||
slot_size, NUM_SCALAR_SLOTS, false, assign_colors,
|
||||
color_channel_rotate, progress);
|
||||
}
|
||||
}
|
||||
|
||||
/* Put flat slots next.
|
||||
* Note that only flat vec4 slots can have both 32-bit and 16-bit types
|
||||
* packed in the same vec4. 32-bit flat inputs are packed first, followed
|
||||
* by 16-bit flat inputs.
|
||||
*/
|
||||
unsigned unused_flat_slots =
|
||||
fs_assign_slots(linkage, assigned_mask, assigned_fs_vec4_type,
|
||||
flat_mask, FS_VEC4_TYPE_FLAT,
|
||||
slot_size, NUM_SCALAR_SLOTS, false, assign_colors,
|
||||
color_channel_rotate, progress);
|
||||
|
||||
/* Take the inputs with convergent values and assign them as follows.
|
||||
* Since they can be assigned as both interpolated and flat, we can
|
||||
* choose. We prefer them to be flat, but if interpolated vec4s have
|
||||
* unused components, try to fill those before starting a new flat vec4.
|
||||
*
|
||||
* First, fill the unused components of flat (if any) with convergent
|
||||
* inputs.
|
||||
*/
|
||||
if (!linkage->always_interpolate_convergent_fs_inputs &&
|
||||
unused_flat_slots) {
|
||||
fs_assign_slots(linkage, assigned_mask, assigned_fs_vec4_type,
|
||||
convergent_mask, FS_VEC4_TYPE_FLAT,
|
||||
slot_size, unused_flat_slots, true, assign_colors,
|
||||
color_channel_rotate, progress);
|
||||
}
|
||||
|
||||
/* Then fill the unused components of interpolated slots (if any) with
|
||||
* convergent inputs.
|
||||
*/
|
||||
for (unsigned i = 0; i < NUM_INTERP_QUALIFIERS; i++) {
|
||||
if (unused_interp_slots[i]) {
|
||||
fs_assign_slots(linkage, assigned_mask, assigned_fs_vec4_type,
|
||||
convergent_mask, sized_interp_type_base + i,
|
||||
slot_size, unused_interp_slots[i], true,
|
||||
assign_colors, color_channel_rotate, progress);
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < NUM_COLOR_QUALIFIERS; i++) {
|
||||
if (unused_color_slots[i]) {
|
||||
fs_assign_slots(linkage, assigned_mask, assigned_fs_vec4_type,
|
||||
convergent_mask, FS_VEC4_TYPE_INTERP_COLOR_PIXEL + i,
|
||||
slot_size, unused_color_slots[i], true, assign_colors,
|
||||
color_channel_rotate, progress);
|
||||
}
|
||||
}
|
||||
|
||||
/* Then make the remaining convergent inputs flat. */
|
||||
fs_assign_slots(linkage, assigned_mask, assigned_fs_vec4_type,
|
||||
convergent_mask,
|
||||
linkage->always_interpolate_convergent_fs_inputs ?
|
||||
(slot_size == 2 ? FS_VEC4_TYPE_INTERP_FP32_LINEAR_PIXEL :
|
||||
FS_VEC4_TYPE_INTERP_FP16_LINEAR_PIXEL) :
|
||||
FS_VEC4_TYPE_FLAT,
|
||||
slot_size, NUM_SCALAR_SLOTS, true, assign_colors,
|
||||
color_channel_rotate, progress);
|
||||
}
|
||||
|
||||
static void
|
||||
vs_tcs_tes_gs_assign_slots(struct linkage_info *linkage,
|
||||
BITSET_WORD *input_mask,
|
||||
@ -4120,16 +4525,34 @@ compact_varyings(struct linkage_info *linkage,
|
||||
BITSET_DECLARE(assigned_mask, NUM_SCALAR_SLOTS);
|
||||
BITSET_ZERO(assigned_mask);
|
||||
|
||||
fs_assign_slot_groups(linkage, assigned_mask, assigned_fs_vec4_type,
|
||||
linkage->interp_fp32_mask, linkage->flat32_mask,
|
||||
linkage->convergent32_mask, NULL,
|
||||
FS_VEC4_TYPE_INTERP_FP32, 2, false, 0, progress);
|
||||
if (linkage->has_flexible_interp) {
|
||||
/* This codepath packs convergent varyings with both interpolated and
|
||||
* flat, whichever has free space.
|
||||
*/
|
||||
fs_assign_slot_groups(linkage, assigned_mask, assigned_fs_vec4_type,
|
||||
linkage->interp_fp32_mask, linkage->flat32_mask,
|
||||
linkage->convergent32_mask, NULL,
|
||||
FS_VEC4_TYPE_INTERP_FP32, 2, false, 0, progress);
|
||||
|
||||
/* Now do the same thing, but for 16-bit inputs. */
|
||||
fs_assign_slot_groups(linkage, assigned_mask, assigned_fs_vec4_type,
|
||||
linkage->interp_fp16_mask, linkage->flat16_mask,
|
||||
linkage->convergent16_mask, NULL,
|
||||
FS_VEC4_TYPE_INTERP_FP16, 1, false, 0, progress);
|
||||
/* Now do the same thing, but for 16-bit inputs. */
|
||||
fs_assign_slot_groups(linkage, assigned_mask, assigned_fs_vec4_type,
|
||||
linkage->interp_fp16_mask, linkage->flat16_mask,
|
||||
linkage->convergent16_mask, NULL,
|
||||
FS_VEC4_TYPE_INTERP_FP16, 1, false, 0, progress);
|
||||
} else {
|
||||
/* Basically the same as above. */
|
||||
fs_assign_slot_groups_separate_qual(
|
||||
linkage, assigned_mask, assigned_fs_vec4_type,
|
||||
&linkage->interp_fp32_qual_masks, linkage->flat32_mask,
|
||||
linkage->convergent32_mask, NULL,
|
||||
FS_VEC4_TYPE_INTERP_FP32_PERSP_PIXEL, 2, false, 0, progress);
|
||||
|
||||
fs_assign_slot_groups_separate_qual(
|
||||
linkage, assigned_mask, assigned_fs_vec4_type,
|
||||
&linkage->interp_fp16_qual_masks, linkage->flat16_mask,
|
||||
linkage->convergent16_mask, NULL,
|
||||
FS_VEC4_TYPE_INTERP_FP16_PERSP_PIXEL, 1, false, 0, progress);
|
||||
}
|
||||
|
||||
/* Assign INTERP_MODE_EXPLICIT. Both FP32 and FP16 can occupy the same
|
||||
* slot because the vertex data is passed to FS as-is.
|
||||
@ -4184,6 +4607,17 @@ compact_varyings(struct linkage_info *linkage,
|
||||
!BITSET_TEST_RANGE_INSIDE_WORD(linkage->xfb32_only_mask, col0,
|
||||
col0 + 15, 0);
|
||||
|
||||
for (unsigned i = 0; i < NUM_INTERP_QUALIFIERS; i++) {
|
||||
has_colors |=
|
||||
!BITSET_TEST_RANGE_INSIDE_WORD(linkage->interp_fp32_qual_masks[i],
|
||||
col0, col0 + 15, 0);
|
||||
}
|
||||
for (unsigned i = 0; i < NUM_COLOR_QUALIFIERS; i++) {
|
||||
has_colors |=
|
||||
!BITSET_TEST_RANGE_INSIDE_WORD(linkage->color32_qual_masks[i],
|
||||
col0, col0 + 15, 0);
|
||||
}
|
||||
|
||||
if (has_colors) {
|
||||
unsigned color_channel_rotate = 0;
|
||||
|
||||
@ -4193,11 +4627,20 @@ compact_varyings(struct linkage_info *linkage,
|
||||
DIV_ROUND_UP(BITSET_LAST_BIT(assigned_mask), 2) % 4;
|
||||
}
|
||||
|
||||
fs_assign_slot_groups(linkage, assigned_mask, assigned_fs_vec4_type,
|
||||
linkage->interp_fp32_mask, linkage->flat32_mask,
|
||||
linkage->convergent32_mask, linkage->color32_mask,
|
||||
FS_VEC4_TYPE_INTERP_FP32, 2, true,
|
||||
color_channel_rotate, progress);
|
||||
if (linkage->has_flexible_interp) {
|
||||
fs_assign_slot_groups(linkage, assigned_mask, assigned_fs_vec4_type,
|
||||
linkage->interp_fp32_mask, linkage->flat32_mask,
|
||||
linkage->convergent32_mask, linkage->color32_mask,
|
||||
FS_VEC4_TYPE_INTERP_FP32, 2, true,
|
||||
color_channel_rotate, progress);
|
||||
} else {
|
||||
fs_assign_slot_groups_separate_qual(
|
||||
linkage, assigned_mask, assigned_fs_vec4_type,
|
||||
&linkage->interp_fp32_qual_masks, linkage->flat32_mask,
|
||||
linkage->convergent32_mask, &linkage->color32_qual_masks,
|
||||
FS_VEC4_TYPE_INTERP_FP32_PERSP_PIXEL, 2, true,
|
||||
color_channel_rotate, progress);
|
||||
}
|
||||
|
||||
/* Put transform-feedback-only outputs last. */
|
||||
fs_assign_slots(linkage, assigned_mask, NULL,
|
||||
@ -4274,6 +4717,10 @@ init_linkage(nir_shader *producer, nir_shader *consumer, bool spirv,
|
||||
consumer->info.stage == MESA_SHADER_FRAGMENT &&
|
||||
consumer->options->io_options &
|
||||
nir_io_mix_convergent_flat_with_interpolated,
|
||||
.has_flexible_interp =
|
||||
consumer->info.stage == MESA_SHADER_FRAGMENT &&
|
||||
consumer->options->io_options &
|
||||
nir_io_has_flexible_input_interpolation_except_flat,
|
||||
.always_interpolate_convergent_fs_inputs =
|
||||
consumer->info.stage == MESA_SHADER_FRAGMENT &&
|
||||
consumer->options->io_options &
|
||||
|
Loading…
Reference in New Issue
Block a user