mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2024-11-25 03:04:29 +08:00
nir,radeonsi: move ffma fusing to late optimizations for better codegen
The freedreno trace changes were suggested by Rob Clark. ALU performance is higher, because ffma is used more often, but so is register usage, because trinary opcodes (such as ffma) usually need at least 3 live registers. 54793 shaders in 33659 tests Totals: SGPRS: 2639746 -> 2642938 (0.12 %) VGPRS: 1534120 -> 1536392 (0.15 %) Spilled SGPRs: 3541 -> 3618 (2.17 %) Spilled VGPRs: 33 -> 44 (33.33 %) Scratch size: 292 -> 312 (6.85 %) dwords per thread Code Size: 55639836 -> 55620116 (-0.04 %) bytes Max Waves: 964785 -> 963977 (-0.08 %) Totals from affected shaders: SGPRS: 1105800 -> 1108992 (0.29 %) VGPRS: 635292 -> 637564 (0.36 %) Spilled SGPRs: 3193 -> 3270 (2.41 %) Spilled VGPRs: 33 -> 44 (33.33 %) Scratch size: 36 -> 56 (55.56 %) dwords per thread Code Size: 31568708 -> 31548988 (-0.06 %) bytes Max Waves: 319991 -> 319183 (-0.25 %) Reviewed-by: Connor Abbott <cwabbott0@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6596>
This commit is contained in:
parent
a3512ddfdf
commit
57bf4c2028
@ -11,12 +11,12 @@ traces:
|
||||
- path: gputest/furmark.trace
|
||||
expectations:
|
||||
- device: freedreno-a630
|
||||
checksum: de674022e53fc9e0a9eb217f8bf0fe03
|
||||
checksum: af6e1faf11407a7e7c416f2c532de029
|
||||
# Note: Requires GL3.3
|
||||
- path: gputest/gimark.trace
|
||||
expectations:
|
||||
- device: freedreno-a630
|
||||
checksum: 2cae8e2104356e2b3017cbd953cf7b4a
|
||||
checksum: 47419914b87422b267e20b6981a7eb43
|
||||
- path: gputest/pixmark-julia-fp32.trace
|
||||
expectations:
|
||||
- device: freedreno-a630
|
||||
@ -37,16 +37,16 @@ traces:
|
||||
expectations:
|
||||
# Looks fine, but totally different shape from the rendering on i965.
|
||||
- device: freedreno-a630
|
||||
checksum: 86d678c70b8adf27095ace1a6bbfe2d2
|
||||
checksum: 9ee5a036510be0f506705eacc1516bf3
|
||||
- path: gputest/plot3d.trace
|
||||
expectations:
|
||||
- device: freedreno-a630
|
||||
checksum: 67a9eb692e694b11107860bbcd47d493
|
||||
checksum: 42aba3ab943dae2fe952cae1ff91c354
|
||||
# Note: Requires GL4 for tess.
|
||||
- path: gputest/tessmark.trace
|
||||
expectations:
|
||||
- device: freedreno-a630
|
||||
checksum: 985e231b58b7dc4b6da34ff32f8ebb82
|
||||
checksum: 8688b3904b6b2bc591d8b669ecae4d53
|
||||
- path: gputest/triangle.trace
|
||||
expectations:
|
||||
- device: freedreno-a630
|
||||
@ -149,7 +149,7 @@ traces:
|
||||
- path: glmark2/effect2d-kernel=1,1,1,1,1;1,1,1,1,1;1,1,1,1,1;.rdc
|
||||
expectations:
|
||||
- device: freedreno-a630
|
||||
checksum: 2346a6597f4d1f20b493e8d6a8f7e592
|
||||
checksum: 2964d37446db126a5fe462b1ba4542cd
|
||||
- path: glmark2/function-fragment-complexity=low:fragment-steps=5.rdc
|
||||
expectations:
|
||||
# Incorrect rendering, a bunch of the area is uniform gray when it should
|
||||
@ -215,7 +215,7 @@ traces:
|
||||
- path: glmark2/shading-shading=gouraud.rdc
|
||||
expectations:
|
||||
- device: freedreno-a630
|
||||
checksum: fcc26fca31375b216382e69bc5f113fb
|
||||
checksum: bd9058f041bd2d59c039cccdb7d50bf7
|
||||
- path: glmark2/shading-shading=phong.rdc
|
||||
# Some speckling on the main specular highlight that may just be
|
||||
# mediump artifacts
|
||||
@ -226,11 +226,6 @@ traces:
|
||||
expectations:
|
||||
- device: freedreno-a630
|
||||
checksum: d8b5931669733240797f1acf5d98db25
|
||||
# Very yellow terrain compared to i965, may just be mediump artifacts.
|
||||
- path: glmark2/terrain.rdc
|
||||
expectations:
|
||||
- device: freedreno-a630
|
||||
checksum: 114f7dfe97768d9c565a29f656c8f9cf
|
||||
- path: glmark2/texture-texture-filter=linear.rdc
|
||||
expectations:
|
||||
- device: freedreno-a630
|
||||
|
@ -33,11 +33,11 @@ traces:
|
||||
- path: gputest/furmark.trace
|
||||
expectations:
|
||||
- device: gl-radeonsi-stoney
|
||||
checksum: 1c569668d608c644f353caa177d577c6
|
||||
checksum: d71c0d8e6c46c8f29d1aa8d0ed7d3c87
|
||||
- path: gputest/pixmark-piano.trace
|
||||
expectations:
|
||||
- device: gl-radeonsi-stoney
|
||||
checksum: a0e1d6358f76666603b08eab383af080
|
||||
checksum: 777d48e82cabceef6d9489189f91d266
|
||||
- path: gputest/triangle.trace
|
||||
expectations:
|
||||
- device: gl-radeonsi-stoney
|
||||
@ -153,7 +153,7 @@ traces:
|
||||
- path: glmark2/shadow.rdc
|
||||
expectations:
|
||||
- device: gl-radeonsi-stoney
|
||||
checksum: 4bf5ca9ce641de1031eb8125d80a3005
|
||||
checksum: 03dfbf026a0f0ab643e5a6ef19623e81
|
||||
- path: glmark2/terrain.rdc
|
||||
expectations:
|
||||
- device: gl-radeonsi-stoney
|
||||
@ -173,7 +173,7 @@ traces:
|
||||
- path: godot/Material Testers.x86_64_2020.04.08_13.38_frame799.rdc
|
||||
expectations:
|
||||
- device: gl-radeonsi-stoney
|
||||
checksum: 5164e238381e7d77a64e3de771cc005f
|
||||
checksum: 990abd360dc380c95ee2645f8b402d47
|
||||
- path: gputest/gimark.trace
|
||||
expectations:
|
||||
- device: gl-radeonsi-stoney
|
||||
@ -189,15 +189,15 @@ traces:
|
||||
- path: gputest/pixmark-piano.trace
|
||||
expectations:
|
||||
- device: gl-radeonsi-stoney
|
||||
checksum: a0e1d6358f76666603b08eab383af080
|
||||
checksum: 777d48e82cabceef6d9489189f91d266
|
||||
- path: gputest/pixmark-volplosion.trace
|
||||
expectations:
|
||||
- device: gl-radeonsi-stoney
|
||||
checksum: 2fba173643c014bcfa4b31eb55a514b9
|
||||
checksum: 708f92a8ac8aef23a4a544cc5ec755d6
|
||||
- path: gputest/plot3d.trace
|
||||
expectations:
|
||||
- device: gl-radeonsi-stoney
|
||||
checksum: fd367551aa74e2903e0590a893da01a6
|
||||
checksum: f9e6c1cb70add69cf2a4724800d48b25
|
||||
- path: gputest/tessmark.trace
|
||||
expectations:
|
||||
- device: gl-radeonsi-stoney
|
||||
@ -229,7 +229,7 @@ traces:
|
||||
- path: supertuxkart/supertuxkart-antediluvian-abyss.rdc
|
||||
expectations:
|
||||
- device: gl-radeonsi-stoney
|
||||
checksum: 17f4039392a65ad23133cb2cac82dba4
|
||||
checksum: a2c4c127873f93b7db4ef48ea9fb7689
|
||||
- path: supertuxkart/supertuxkart-menu.rdc
|
||||
expectations:
|
||||
- device: gl-radeonsi-stoney
|
||||
@ -237,4 +237,4 @@ traces:
|
||||
- path: supertuxkart/supertuxkart-ravenbridge-mansion.rdc
|
||||
expectations:
|
||||
- device: gl-radeonsi-stoney
|
||||
checksum: 46f08af5c49d711b41d4082f8a5cf6d6
|
||||
checksum: c8f9eae92c67c7d53db4d69a703e3914
|
||||
|
@ -194,7 +194,8 @@ optimizations.extend([
|
||||
(('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
|
||||
(('fceil', a), ('fneg', ('ffloor', ('fneg', a))), 'options->lower_fceil'),
|
||||
(('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
|
||||
(('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma'),
|
||||
# Always lower inexact ffma, because it will be fused back by late optimizations (nir_opt_algebraic_late).
|
||||
(('~ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma'),
|
||||
|
||||
(('~fmul', ('fadd', ('iand', ('ineg', ('b2i', 'a@bool')), ('fmul', b, c)), '#d'), '#e'),
|
||||
('bcsel', a, ('fmul', ('fadd', ('fmul', b, c), d), e), ('fmul', d, e))),
|
||||
@ -2027,6 +2028,7 @@ late_optimizations = [
|
||||
(('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
|
||||
(('ineg', a), ('isub', 0, a), 'options->lower_negate'),
|
||||
(('iabs', a), ('imax', a, ('ineg', a)), 'options->lower_iabs'),
|
||||
(('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma'),
|
||||
|
||||
# These are duplicated from the main optimizations table. The late
|
||||
# patterns that rearrange expressions like x - .5 < 0 to x < .5 can create
|
||||
|
@ -698,6 +698,17 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
|
||||
if (changed)
|
||||
si_nir_opts(nir, false);
|
||||
|
||||
/* Run late optimizations to fuse ffma. */
|
||||
bool more_late_algebraic = true;
|
||||
while (more_late_algebraic) {
|
||||
more_late_algebraic = false;
|
||||
NIR_PASS(more_late_algebraic, nir, nir_opt_algebraic_late);
|
||||
NIR_PASS_V(nir, nir_opt_constant_folding);
|
||||
NIR_PASS_V(nir, nir_copy_prop);
|
||||
NIR_PASS_V(nir, nir_opt_dce);
|
||||
NIR_PASS_V(nir, nir_opt_cse);
|
||||
}
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_bool_to_int32);
|
||||
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user