summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlyssa Rosenzweig <alyssa@collabora.com>2022-02-18 19:20:27 -0500
committerDylan Baker <dylan.c.baker@intel.com>2022-02-24 14:56:52 -0800
commit60ad4707d4834e4c76e62a0934568dab3e7d6409 (patch)
treeaf63cf58ff669ebfe29754acbe1855fee6fdc5c4
parenta7364245060c5410a8f3e814550bf3a2528437b6 (diff)
pan/bi: Specialize IDVS in NIR
It's a bit more code, but it's needed to chew through control flow since we don't have a backend version of dead_cf. Results are really good, meaning I really screwed this up the first time around (hence the cc mesa-stable). total instructions in shared programs: 1963576 -> 1939513 (-1.23%) instructions in affected programs: 671053 -> 646990 (-3.59%) helped: 4436 HURT: 729 helped stats (abs) min: 1.0 max: 43.0 x̄: 5.75 x̃: 6 helped stats (rel) min: 0.21% max: 100.00% x̄: 6.47% x̃: 5.17% HURT stats (abs) min: 1.0 max: 22.0 x̄: 2.01 x̃: 1 HURT stats (rel) min: 0.50% max: 50.00% x̄: 10.45% x̃: 9.09% 95% mean confidence interval for instructions value: -4.77 -4.55 95% mean confidence interval for instructions %-change: -4.36% -3.80% Instructions are helped. total tuples in shared programs: 1533335 -> 1523194 (-0.66%) tuples in affected programs: 483167 -> 473026 (-2.10%) helped: 3414 HURT: 1288 helped stats (abs) min: 1.0 max: 20.0 x̄: 3.73 x̃: 2 helped stats (rel) min: 0.27% max: 100.00% x̄: 4.87% x̃: 3.03% HURT stats (abs) min: 1.0 max: 19.0 x̄: 2.02 x̃: 1 HURT stats (rel) min: 0.24% max: 38.10% x̄: 8.10% x̃: 5.88% 95% mean confidence interval for tuples value: -2.28 -2.03 95% mean confidence interval for tuples %-change: -1.62% -1.02% Tuples are helped. total clauses in shared programs: 351432 -> 329158 (-6.34%) clauses in affected programs: 142237 -> 119963 (-15.66%) helped: 5328 HURT: 3 helped stats (abs) min: 1.0 max: 43.0 x̄: 4.18 x̃: 4 helped stats (rel) min: 0.74% max: 100.00% x̄: 19.44% x̃: 17.24% HURT stats (abs) min: 1.0 max: 1.0 x̄: 1.00 x̃: 1 HURT stats (rel) min: 9.09% max: 12.50% x̄: 10.90% x̃: 11.11% 95% mean confidence interval for clauses value: -4.25 -4.11 95% mean confidence interval for clauses %-change: -19.72% -19.12% Clauses are helped. total cycles in shared programs: 202830.92 -> 172084.50 (-15.16%) cycles in affected programs: 117078.42 -> 86332 (-26.26%) helped: 5450 HURT: 1 helped stats (abs) min: 0.083333 max: 49.0 x̄: 5.64 x̃: 5 helped stats (rel) min: 1.42% max: 100.00% x̄: 27.94% x̃: 25.64% HURT stats (abs) min: 0.25 max: 0.25 x̄: 0.25 x̃: 0 HURT stats (rel) min: 2.46% max: 2.46% x̄: 2.46% x̃: 2.46% 95% mean confidence interval for cycles value: -5.74 -5.54 95% mean confidence interval for cycles %-change: -28.30% -27.58% Cycles are helped. total arith in shared programs: 57274.29 -> 57145.04 (-0.23%) arith in affected programs: 16418.33 -> 16289.08 (-0.79%) helped: 2442 HURT: 1784 helped stats (abs) min: 0.041665999999999315 max: 0.75 x̄: 0.14 x̃: 0 helped stats (rel) min: 0.23% max: 100.00% x̄: 5.51% x̃: 2.87% HURT stats (abs) min: 0.041665999999999315 max: 0.9166670000000003 x̄: 0.12 x̃: 0 HURT stats (rel) min: 0.00% max: 100.00% x̄: 25.13% x̃: 9.09% 95% mean confidence interval for arith value: -0.04 -0.03 95% mean confidence interval for arith %-change: 6.61% 8.24% Inconclusive result (value mean confidence interval and %-change mean confidence interval disagree). total texture in shared programs: 12857 -> 12857 (0.00%) texture in affected programs: 0 -> 0 helped: 0 HURT: 0 total vary in shared programs: 11157.75 -> 11157.75 (0.00%) vary in affected programs: 0 -> 0 helped: 0 HURT: 0 total ldst in shared programs: 177208 -> 146420 (-17.37%) ldst in affected programs: 117098 -> 86310 (-26.29%) helped: 5447 HURT: 0 helped stats (abs) min: 1.0 max: 49.0 x̄: 5.65 x̃: 5 helped stats (rel) min: 1.92% max: 100.00% x̄: 27.91% x̃: 25.64% 95% mean confidence interval for ldst value: -5.75 -5.55 95% mean confidence interval for ldst %-change: -28.27% -27.56% Ldst are helped. total quadwords in shared programs: 1436507 -> 1398329 (-2.66%) quadwords in affected programs: 515101 -> 476923 (-7.41%) helped: 5150 HURT: 111 helped stats (abs) min: 1.0 max: 39.0 x̄: 7.46 x̃: 6 helped stats (rel) min: 0.17% max: 100.00% x̄: 10.02% x̃: 8.24% HURT stats (abs) min: 1.0 max: 9.0 x̄: 2.01 x̃: 1 HURT stats (rel) min: 0.43% max: 21.62% x̄: 3.57% x̃: 1.94% 95% mean confidence interval for quadwords value: -7.41 -7.11 95% mean confidence interval for quadwords %-change: -9.98% -9.49% Quadwords are helped. total threads in shared programs: 35025 -> 35228 (0.58%) threads in affected programs: 218 -> 421 (93.12%) helped: 208 HURT: 5 helped stats (abs) min: 1.0 max: 1.0 x̄: 1.00 x̃: 1 helped stats (rel) min: 100.00% max: 100.00% x̄: 100.00% x̃: 100.00% HURT stats (abs) min: 1.0 max: 1.0 x̄: 1.00 x̃: 1 HURT stats (rel) min: 50.00% max: 50.00% x̄: 50.00% x̃: 50.00% 95% mean confidence interval for threads value: 0.91 0.99 95% mean confidence interval for threads %-change: 93.40% 99.55% Threads are helped. total loops in shared programs: 128 -> 125 (-2.34%) loops in affected programs: 3 -> 0 helped: 3 HURT: 0 helped stats (abs) min: 1.0 max: 1.0 x̄: 1.00 x̃: 1 helped stats (rel) min: 100.00% max: 100.00% x̄: 100.00% x̃: 100.00% total spills in shared programs: 158 -> 149 (-5.70%) spills in affected programs: 15 -> 6 (-60.00%) helped: 9 HURT: 0 total fills in shared programs: 1133 -> 966 (-14.74%) fills in affected programs: 197 -> 30 (-84.77%) helped: 9 HURT: 0 Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15090> (cherry picked from commit e0e63c2a8e6dba9d5806aebe355f16a0431fe64b)
-rw-r--r--.pick_status.json2
-rw-r--r--src/panfrost/bifrost/bifrost_compile.c58
2 files changed, 51 insertions, 9 deletions
diff --git a/.pick_status.json b/.pick_status.json
index 5fc006e5db1..e1786948c79 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -157,7 +157,7 @@
"description": "pan/bi: Specialize IDVS in NIR",
"nominated": true,
"nomination_type": 0,
- "resolution": 0,
+ "resolution": 1,
"main_sha": null,
"because_sha": null
},
diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c
index 608f9af9657..d16ba005fad 100644
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -695,6 +695,27 @@ bi_should_remove_store(nir_intrinsic_instr *intr, enum bi_idvs_mode idvs)
}
}
+static bool
+bifrost_nir_specialize_idvs(nir_builder *b, nir_instr *instr, void *data)
+{
+ enum bi_idvs_mode *idvs = data;
+
+ if (instr->type != nir_instr_type_intrinsic)
+ return false;
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+ if (intr->intrinsic != nir_intrinsic_store_output)
+ return false;
+
+ if (bi_should_remove_store(intr, *idvs)) {
+ nir_instr_remove(instr);
+ return true;
+ }
+
+ return false;
+}
+
static void
bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr)
{
@@ -710,12 +731,6 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr)
unsigned imm_index = 0;
bool immediate = bi_is_intr_immediate(instr, &imm_index, 16);
- /* Skip stores to the wrong kind of variable in a specialized IDVS
- * shader. Backend dead code elimination will clean up the mess.
- */
- if (bi_should_remove_store(instr, b->shader->idvs))
- return;
-
/* Only look at the total components needed. In effect, we fill in all
* the intermediate "holes" in the write mask, since we can't mask off
* stores. Since nir_lower_io_to_temporaries ensures each varying is
@@ -3498,8 +3513,6 @@ bi_optimize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend)
NIR_PASS_V(nir, nir_shader_instructions_pass,
nir_invalidate_divergence, nir_metadata_all, NULL);
}
-
- NIR_PASS(progress, nir, nir_convert_from_ssa, true);
}
/* The cmdstream lowers 8-bit fragment output as 16-bit, so we need to do the
@@ -3762,6 +3775,35 @@ bi_compile_variant_nir(nir_shader *nir,
ctx->info = info;
ctx->idvs = idvs;
+ if (idvs != BI_IDVS_NONE) {
+ /* Specializing shaders for IDVS is destructive, so we need to
+ * clone. However, the last (second) IDVS shader does not need
+ * to be preserved so we can skip cloning that one.
+ */
+ if (offset == 0)
+ ctx->nir = nir = nir_shader_clone(ctx, nir);
+
+ NIR_PASS_V(nir, nir_shader_instructions_pass,
+ bifrost_nir_specialize_idvs,
+ nir_metadata_block_index | nir_metadata_dominance,
+ &idvs);
+
+ /* After specializing, clean up the mess */
+ bool progress = true;
+
+ while (progress) {
+ progress = false;
+
+ NIR_PASS(progress, nir, nir_opt_dce);
+ NIR_PASS(progress, nir, nir_opt_dead_cf);
+ }
+ }
+
+ /* We can only go out-of-SSA after speciailizing IDVS, as opt_dead_cf
+ * doesn't know how to deal with nir_register.
+ */
+ NIR_PASS_V(nir, nir_convert_from_ssa, true);
+
/* If nothing is pushed, all UBOs need to be uploaded */
ctx->ubo_mask = ~0;