summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Ekstrand <jason.ekstrand@intel.com>2019-02-21 10:32:01 -0600
committerJason Ekstrand <jason.ekstrand@intel.com>2019-02-28 16:58:20 -0600
commit95ae400abcda4f692fd31c9132462d904f939ec3 (patch)
treebf0407b0d37574601b858790144558075f80fa53
parentaeaba24fcb98839be73a59f6bb74a39523d79a3d (diff)
intel/schedule_instructions: Move some comments
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
-rw-r--r--src/intel/compiler/brw_schedule_instructions.cpp74
1 files changed, 37 insertions, 37 deletions
diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp
index 4a516223cf9..1d5ee56bd4a 100644
--- a/src/intel/compiler/brw_schedule_instructions.cpp
+++ b/src/intel/compiler/brw_schedule_instructions.cpp
@@ -368,44 +368,13 @@ schedule_node::set_latency_gen7(bool is_haswell)
break;
case SHADER_OPCODE_UNTYPED_ATOMIC:
- /* Test code:
- * mov(8) g112<1>ud 0x00000000ud { align1 WE_all 1Q };
- * mov(1) g112.7<1>ud g1.7<0,1,0>ud { align1 WE_all };
- * mov(8) g113<1>ud 0x00000000ud { align1 WE_normal 1Q };
- * send(8) g4<1>ud g112<8,8,1>ud
- * data (38, 5, 6) mlen 2 rlen 1 { align1 WE_normal 1Q };
- *
- * Running it 100 times as fragment shader on a 128x128 quad
- * gives an average latency of 13867 cycles per atomic op,
- * standard deviation 3%. Note that this is a rather
- * pessimistic estimate, the actual latency in cases with few
- * collisions between threads and favorable pipelining has been
- * seen to be reduced by a factor of 100.
- */
+ /* See GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP */
latency = 14000;
break;
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
- /* Test code:
- * mov(8) g112<1>UD 0x00000000UD { align1 WE_all 1Q };
- * mov(1) g112.7<1>UD g1.7<0,1,0>UD { align1 WE_all };
- * mov(8) g113<1>UD 0x00000000UD { align1 WE_normal 1Q };
- * send(8) g4<1>UD g112<8,8,1>UD
- * data (38, 6, 5) mlen 2 rlen 1 { align1 WE_normal 1Q };
- * .
- * . [repeats 8 times]
- * .
- * mov(8) g112<1>UD 0x00000000UD { align1 WE_all 1Q };
- * mov(1) g112.7<1>UD g1.7<0,1,0>UD { align1 WE_all };
- * mov(8) g113<1>UD 0x00000000UD { align1 WE_normal 1Q };
- * send(8) g4<1>UD g112<8,8,1>UD
- * data (38, 6, 5) mlen 2 rlen 1 { align1 WE_normal 1Q };
- *
- * Running it 100 times as fragment shader on a 128x128 quad
- * gives an average latency of 583 cycles per surface read,
- * standard deviation 0.9%.
- */
+ /* See also GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ */
latency = is_haswell ? 300 : 600;
break;
@@ -460,13 +429,44 @@ schedule_node::set_latency_gen7(bool is_haswell)
case GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ:
case GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE:
- /* See also SHADER_OPCODE_UNTYPED_SURFACE_READ */
+ /* Test code:
+ * mov(8) g112<1>UD 0x00000000UD { align1 WE_all 1Q };
+ * mov(1) g112.7<1>UD g1.7<0,1,0>UD { align1 WE_all };
+ * mov(8) g113<1>UD 0x00000000UD { align1 WE_normal 1Q };
+ * send(8) g4<1>UD g112<8,8,1>UD
+ * data (38, 6, 5) mlen 2 rlen 1 { align1 WE_normal 1Q };
+ * .
+ * . [repeats 8 times]
+ * .
+ * mov(8) g112<1>UD 0x00000000UD { align1 WE_all 1Q };
+ * mov(1) g112.7<1>UD g1.7<0,1,0>UD { align1 WE_all };
+ * mov(8) g113<1>UD 0x00000000UD { align1 WE_normal 1Q };
+ * send(8) g4<1>UD g112<8,8,1>UD
+ * data (38, 6, 5) mlen 2 rlen 1 { align1 WE_normal 1Q };
+ *
+ * Running it 100 times as fragment shader on a 128x128 quad
+ * gives an average latency of 583 cycles per surface read,
+ * standard deviation 0.9%.
+ */
assert(!is_haswell);
latency = 600;
break;
case GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP:
- /* See also SHADER_OPCODE_UNTYPED_ATOMIC */
+ /* Test code:
+ * mov(8) g112<1>ud 0x00000000ud { align1 WE_all 1Q };
+ * mov(1) g112.7<1>ud g1.7<0,1,0>ud { align1 WE_all };
+ * mov(8) g113<1>ud 0x00000000ud { align1 WE_normal 1Q };
+ * send(8) g4<1>ud g112<8,8,1>ud
+ * data (38, 5, 6) mlen 2 rlen 1 { align1 WE_normal 1Q };
+ *
+ * Running it 100 times as fragment shader on a 128x128 quad
+ * gives an average latency of 13867 cycles per atomic op,
+ * standard deviation 3%. Note that this is a rather
+ * pessimistic estimate, the actual latency in cases with few
+ * collisions between threads and favorable pipelining has been
+ * seen to be reduced by a factor of 100.
+ */
assert(!is_haswell);
latency = 14000;
break;
@@ -486,7 +486,7 @@ schedule_node::set_latency_gen7(bool is_haswell)
case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ:
case GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE:
case GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ:
- /* See also SHADER_OPCODE_UNTYPED_SURFACE_READ */
+ /* See also GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ */
latency = 300;
break;
@@ -497,7 +497,7 @@ schedule_node::set_latency_gen7(bool is_haswell)
case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP:
case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP:
case GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP:
- /* See also SHADER_OPCODE_UNTYPED_ATOMIC */
+ /* See also GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP */
latency = 14000;
break;