diff --git a/.pick_status.json b/.pick_status.json index 223ff6995be..a92adb3b1fd 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1219,7 +1219,7 @@ "description": "amd/registers: fix fields conflict detection", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "e6184b089240b76942650e847b8a4879821caaa6" }, diff --git a/src/amd/registers/makeregheader.py b/src/amd/registers/makeregheader.py index 472e6e491da..770d67847f5 100644 --- a/src/amd/registers/makeregheader.py +++ b/src/amd/registers/makeregheader.py @@ -112,6 +112,23 @@ def get_chips_comment(chips, parent=None): return ', '.join(comment) +def detect_conflict(regdb, field_in_type1, field_in_type2): + """ + Returns False if field_in_type1 and field_in_type2 can be merged + into a single field = if writing to field_in_type1 bits won't + overwrite adjacent fields in type2, and the other way around. + """ + for idx, type_refs in enumerate([field_in_type1.type_refs, field_in_type2.type_refs]): + ref = field_in_type2 if idx == 0 else field_in_type1 + for type_ref in type_refs: + for field in regdb.register_type(type_ref).fields: + # If a different field in the other type starts in + # the tested field's bits[0, 1] interval + if (field.bits[0] > ref.bits[0] and + field.bits[0] <= ref.bits[1]): + return True + + return False class HeaderWriter(object): def __init__(self, regdb, guard=None): @@ -200,21 +217,10 @@ class HeaderWriter(object): if prev.bits[0] != line.bits[0]: continue - if prev.bits[1] < line.bits[1]: + if prev.bits[1] != line.bits[1]: # Current line's field extends beyond the range of prev. # Need to check for conflicts - conflict = False - for type_ref in prev.type_refs: - for field in regdb.register_type(type_ref).fields: - # The only possible conflict is for a prev field - # that starts at a higher bit. - if (field.bits[0] > line.bits[0] and - field.bits[0] <= line.bits[1]): - conflict = True - break - if conflict: - break - if conflict: + if detect_conflict(regdb, prev, line): continue prev.bits[1] = max(prev.bits[1], line.bits[1]) diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index 7c67c6a5875..de773755c86 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -495,13 +495,13 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) if (physical_device->rad_info.chip_class >= GFX9) { radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION, S_028B50_ACCUM_ISOLINE(40) | S_028B50_ACCUM_TRI(30) | - S_028B50_ACCUM_QUAD(24) | S_028B50_DONUT_SPLIT(24) | + S_028B50_ACCUM_QUAD(24) | S_028B50_DONUT_SPLIT_GFX9(24) | S_028B50_TRAP_SPLIT(6)); } else if (physical_device->rad_info.chip_class >= GFX8) { uint32_t vgt_tess_distribution; vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) | S_028B50_ACCUM_TRI(11) | - S_028B50_ACCUM_QUAD(11) | S_028B50_DONUT_SPLIT(16); + S_028B50_ACCUM_QUAD(11) | S_028B50_DONUT_SPLIT_GFX81(16); if (physical_device->rad_info.family == CHIP_FIJI || physical_device->rad_info.family >= CHIP_POLARIS10) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 91d9f4a5b72..18d8bca3c83 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -5335,7 +5335,7 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing) unsigned vgt_tess_distribution; vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) | S_028B50_ACCUM_TRI(11) | - S_028B50_ACCUM_QUAD(11) | S_028B50_DONUT_SPLIT(16); + S_028B50_ACCUM_QUAD(11) | S_028B50_DONUT_SPLIT_GFX81(16); /* Testing with Unigine Heaven extreme tesselation yielded best results * with TRAP_SPLIT = 3. @@ -5362,7 +5362,7 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing) si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, S_028B50_ACCUM_ISOLINE(40) | S_028B50_ACCUM_TRI(30) | S_028B50_ACCUM_QUAD(24) | - S_028B50_DONUT_SPLIT(24) | S_028B50_TRAP_SPLIT(6)); + S_028B50_DONUT_SPLIT_GFX9(24) | S_028B50_TRAP_SPLIT(6)); si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1, S_028C48_MAX_ALLOC_COUNT(sscreen->info.pbb_max_alloc_count - 1) | S_028C48_MAX_PRIM_PER_BATCH(1023));