ir3: Switch tess lowering to use location

Clip & cull distances, which are compact arrays, exposed a lot of holes because they can take up multiple slots and partially overlap. I wanted to eliminate our dependence on knowing the layout of the variables, as this can get complicated with things like partially overlapping arrays, which can happen with ARB_enhanced_layouts or with clip/cull distance arrays. This means no longer changing the layout based on whether the i/o is part of an array or not, and no longer matching producer <-> consumer based on the variables. At the end of the day we have to match things based on the user-specified location, so for simplicity this switches the entire i/o handling to be based off the user location rather than the driver location. This means that the primitive map may be a little bigger, but it reduces the complexity because we never have to build a table mapping user location to driver location, and it reduces the amount of work done at link time in the SSO case. It also brings us closer to what the other drivers do. While here, I also fixed the handling of component qualifiers, which was another thing broken with clip/cull distances. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6959>
author: Connor Abbott <cwabbott0@gmail.com> 2020-09-24 16:24:55 +0200
committer: Marge Bot <eric+marge@anholt.net> 2020-10-23 11:09:18 +0000
commit: 9e063b01b77059d13756794f95fa0eb0e5ef6633 (patch)
tree: 2657d66f97a3331c65cf2ec96bbf1b25e92e7286
parent: 4ca38a19957f88523aef8761d68db413ebb1d562 (diff)
6 files changed, 199 insertions, 158 deletions
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index 2628746a302..1ee29568790 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -433,17 +433,17 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
 			break;
 		case MESA_SHADER_TESS_CTRL:
 			NIR_PASS_V(s, ir3_nir_lower_tess_ctrl, so, so->key.tessellation);
-			NIR_PASS_V(s, ir3_nir_lower_to_explicit_input, so->shader->compiler);
+			NIR_PASS_V(s, ir3_nir_lower_to_explicit_input, so);
 			progress = true;
 			break;
 		case MESA_SHADER_TESS_EVAL:
-			NIR_PASS_V(s, ir3_nir_lower_tess_eval, so->key.tessellation);
+			NIR_PASS_V(s, ir3_nir_lower_tess_eval, so, so->key.tessellation);
 			if (so->key.has_gs)
 				NIR_PASS_V(s, ir3_nir_lower_to_explicit_output, so, so->key.tessellation);
 			progress = true;
 			break;
 		case MESA_SHADER_GEOMETRY:
-			NIR_PASS_V(s, ir3_nir_lower_to_explicit_input, so->shader->compiler);
+			NIR_PASS_V(s, ir3_nir_lower_to_explicit_input, so);
 			progress = true;
 			break;
 		default:
@@ -694,12 +694,12 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
 		constoff = align(constoff - 1, 4) + 3;
 		const_state->offsets.primitive_param = constoff;
 		const_state->offsets.primitive_map = constoff + 5;
-		constoff += 5 + DIV_ROUND_UP(nir->num_inputs, 4);
+		constoff += 5 + DIV_ROUND_UP(v->input_size, 4);
 		break;
 	case MESA_SHADER_GEOMETRY:
 		const_state->offsets.primitive_param = constoff;
 		const_state->offsets.primitive_map = constoff + 1;
-		constoff += 1 + DIV_ROUND_UP(nir->num_inputs, 4);
+		constoff += 1 + DIV_ROUND_UP(v->input_size, 4);
 		break;
 	default:
 		break;
diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h
index e9fe495767b..a6ec1440e9b 100644
--- a/src/freedreno/ir3/ir3_nir.h
+++ b/src/freedreno/ir3/ir3_nir.h
@@ -46,9 +46,9 @@ bool ir3_nir_lower_tex_prefetch(nir_shader *shader);
 
 void ir3_nir_lower_to_explicit_output(nir_shader *shader,
 		struct ir3_shader_variant *v, unsigned topology);
-void ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_compiler *compiler);
+void ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_shader_variant *v);
 void ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v, unsigned topology);
-void ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology);
+void ir3_nir_lower_tess_eval(nir_shader *shader, struct ir3_shader_variant *v, unsigned topology);
 void ir3_nir_lower_gs(nir_shader *shader);
 
 const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler);
@@ -65,10 +65,6 @@ bool ir3_nir_lower_ubo_loads(nir_shader *nir, struct ir3_shader_variant *v);
 nir_ssa_def *
 ir3_nir_try_propagate_bit_shift(nir_builder *b, nir_ssa_def *offset, int32_t shift);
 
-uint32_t ir3_link_geometry_stages(const struct ir3_shader_variant *producer,
-		const struct ir3_shader_variant *consumer,
-		uint32_t *locs);
-
 static inline nir_intrinsic_instr *
 ir3_bindless_resource(nir_src src)
 {
diff --git a/src/freedreno/ir3/ir3_nir_lower_tess.c b/src/freedreno/ir3/ir3_nir_lower_tess.c
index 44b2921dcb7..d54f9a4c928 100644
--- a/src/freedreno/ir3/ir3_nir_lower_tess.c
+++ b/src/freedreno/ir3/ir3_nir_lower_tess.c
@@ -30,7 +30,6 @@ struct state {
 
 	struct primitive_map {
 		unsigned loc[32];
-		unsigned size[32];
 		unsigned stride;
 	} map;
 
@@ -73,45 +72,65 @@ build_local_primitive_id(nir_builder *b, struct state *state)
 	return bitfield_extract(b, state->header, state->local_primitive_id_start, 63);
 }
 
-static nir_variable *
-get_var(nir_shader *shader, nir_variable_mode mode, int driver_location)
+static bool
+is_tess_levels(gl_varying_slot slot)
 {
-	nir_foreach_variable_with_modes (v, shader, mode) {
-		if (v->data.driver_location == driver_location) {
-			return v;
-		}
-	}
-
-	return NULL;
+	return (slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
+			slot == VARYING_SLOT_TESS_LEVEL_INNER);
 }
 
-static bool
-is_tess_levels(nir_variable *var)
+/* Return a deterministic index for varyings. We can't rely on driver_location
+ * to be correct without linking the different stages first, so we create
+ * "primitive maps" where the producer decides on the location of each varying
+ * slot and then exports a per-slot array to the consumer. This compacts the
+ * gl_varying_slot space down a bit so that the primitive maps aren't too
+ * large.
+ *
+ * Note: per-patch varyings are currently handled separately, without any
+ * compacting.
+ *
+ * TODO: We could probably use the driver_location's directly in the non-SSO
+ * (Vulkan) case.
+ */
+
+static unsigned
+shader_io_get_unique_index(gl_varying_slot slot)
 {
-	return (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER ||
-			var->data.location == VARYING_SLOT_TESS_LEVEL_INNER);
+	if (slot == VARYING_SLOT_POS)
+		return 0;
+	if (slot == VARYING_SLOT_PSIZ)
+		return 1;
+	if (slot == VARYING_SLOT_CLIP_DIST0)
+		return 2;
+	if (slot == VARYING_SLOT_CLIP_DIST1)
+		return 3;
+	if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31)
+		return 4 + (slot - VARYING_SLOT_VAR0);
+	unreachable("illegal slot in get unique index\n");
 }
 
 static nir_ssa_def *
 build_local_offset(nir_builder *b, struct state *state,
-		nir_ssa_def *vertex, uint32_t base, nir_ssa_def *offset)
+		nir_ssa_def *vertex, uint32_t location, uint32_t comp, nir_ssa_def *offset)
 {
 	nir_ssa_def *primitive_stride = nir_load_vs_primitive_stride_ir3(b);
 	nir_ssa_def *primitive_offset =
 		nir_imul24(b, build_local_primitive_id(b, state), primitive_stride);
 	nir_ssa_def *attr_offset;
 	nir_ssa_def *vertex_stride;
+	unsigned index = shader_io_get_unique_index(location);
 
 	switch (b->shader->info.stage) {
 	case MESA_SHADER_VERTEX:
 	case MESA_SHADER_TESS_EVAL:
 		vertex_stride = nir_imm_int(b, state->map.stride * 4);
-		attr_offset = nir_imm_int(b, state->map.loc[base] * 4);
+		attr_offset = nir_imm_int(b, state->map.loc[index] + 4 * comp);
 		break;
 	case MESA_SHADER_TESS_CTRL:
 	case MESA_SHADER_GEOMETRY:
 		vertex_stride = nir_load_vs_vertex_stride_ir3(b);
-		attr_offset = nir_load_primitive_location_ir3(b, base);
+		attr_offset = nir_iadd(b, nir_load_primitive_location_ir3(b, index),
+							   nir_imm_int(b, comp * 4));
 		break;
 	default:
 		unreachable("bad shader stage");
@@ -120,7 +139,7 @@ build_local_offset(nir_builder *b, struct state *state,
 	nir_ssa_def *vertex_offset = nir_imul24(b, vertex, vertex_stride);
 
 	return nir_iadd(b, nir_iadd(b, primitive_offset, vertex_offset),
-			nir_iadd(b, attr_offset, offset));
+			nir_iadd(b, attr_offset, nir_ishl(b, offset, nir_imm_int(b, 4))));
 }
 
 static nir_intrinsic_instr *
@@ -153,37 +172,58 @@ replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
 }
 
 static void
-build_primitive_map(nir_shader *shader, nir_variable_mode mode, struct primitive_map *map)
+build_primitive_map(nir_shader *shader, struct primitive_map *map)
 {
-	nir_foreach_variable_with_modes (var, shader, mode) {
-		switch (var->data.location) {
-		case VARYING_SLOT_TESS_LEVEL_OUTER:
-		case VARYING_SLOT_TESS_LEVEL_INNER:
-			continue;
-		}
-
-		unsigned size = glsl_count_attribute_slots(var->type, false) * 4;
-
-		assert(var->data.driver_location < ARRAY_SIZE(map->size));
-		map->size[var->data.driver_location] =
-			MAX2(map->size[var->data.driver_location], size);
+	/* All interfaces except the TCS <-> TES interface use ldlw, which takes
+	 * an offset in bytes, so each vec4 slot is 16 bytes. TCS <-> TES uses
+	 * ldg, which takes an offset in dwords, but each per-vertex slot has
+	 * space for every vertex, and there's space at the beginning for
+	 * per-patch varyings.
+	 */
+	unsigned slot_size = 16, start = 0;
+	if (shader->info.stage == MESA_SHADER_TESS_CTRL) {
+		slot_size = shader->info.tess.tcs_vertices_out * 4;
+		start = util_last_bit(shader->info.patch_outputs_written) * 4;
 	}
 
-	unsigned loc = 0;
-	for (uint32_t i = 0; i < ARRAY_SIZE(map->size); i++) {
-		if (map->size[i] == 0)
-				continue;
-		nir_variable *var = get_var(shader, mode, i);
-		map->loc[i] = loc;
-		loc += map->size[i];
-
-		if (var->data.patch)
-			map->size[i] = 0;
-		else
-			map->size[i] = map->size[i] / glsl_get_length(var->type);
+	uint64_t mask = shader->info.outputs_written;
+	unsigned loc = start;
+	while (mask) {
+		int location = u_bit_scan64(&mask);
+		if (is_tess_levels(location))
+			continue;
+
+		unsigned index = shader_io_get_unique_index(location);
+		map->loc[index] = loc;
+		loc += slot_size;
 	}
 
 	map->stride = loc;
+	/* Use units of dwords for the stride. */
+	if (shader->info.stage != MESA_SHADER_TESS_CTRL)
+		map->stride /= 4;
+}
+
+/* For shader stages that receive a primitive map, calculate how big it should
+ * be.
+ */
+
+static unsigned
+calc_primitive_map_size(nir_shader *shader)
+{
+	uint64_t mask = shader->info.inputs_read;
+	unsigned max_index = 0;
+	while (mask) {
+		int location = u_bit_scan64(&mask);
+
+		if (is_tess_levels(location))
+			continue;
+
+		unsigned index = shader_io_get_unique_index(location);
+		max_index = MAX2(max_index, index + 1);
+	}
+	
+	return max_index;
 }
 
 static void
@@ -209,7 +249,9 @@ lower_block_to_explicit_output(nir_block *block, nir_builder *b, struct state *s
 			b->cursor = nir_instr_remove(&intr->instr);
 
 			nir_ssa_def *vertex_id = build_vertex_id(b, state);
-			nir_ssa_def *offset = build_local_offset(b, state, vertex_id, nir_intrinsic_base(intr),
+			nir_ssa_def *offset = build_local_offset(b, state, vertex_id,
+					nir_intrinsic_io_semantics(intr).location,
+					nir_intrinsic_component(intr),
 					intr->src[1].ssa);
 			nir_intrinsic_instr *store =
 				nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_shared_ir3);
@@ -240,7 +282,7 @@ ir3_nir_lower_to_explicit_output(nir_shader *shader, struct ir3_shader_variant *
 {
 	struct state state = { };
 
-	build_primitive_map(shader, nir_var_shader_out, &state.map);
+	build_primitive_map(shader, &state.map);
 	memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
 
 	nir_function_impl *impl = nir_shader_get_entrypoint(shader);
@@ -282,7 +324,8 @@ lower_block_to_explicit_input(nir_block *block, nir_builder *b, struct state *st
 
 			nir_ssa_def *offset = build_local_offset(b, state,
 					intr->src[0].ssa, // this is typically gl_InvocationID
-					nir_intrinsic_base(intr),
+					nir_intrinsic_io_semantics(intr).location,
+					nir_intrinsic_component(intr),
 					intr->src[1].ssa);
 
 			replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL, NULL);
@@ -305,14 +348,14 @@ lower_block_to_explicit_input(nir_block *block, nir_builder *b, struct state *st
 }
 
 void
-ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_compiler *compiler)
+ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_shader_variant *v)
 {
  	struct state state = { };
 
 	/* when using stl/ldl (instead of stlw/ldlw) for linking VS and HS,
 	 * HS uses a different primitive id, which starts at bit 16 in the header
 	 */
-	if (shader->info.stage == MESA_SHADER_TESS_CTRL && compiler->tess_use_shared)
+	if (shader->info.stage == MESA_SHADER_TESS_CTRL && v->shader->compiler->tess_use_shared)
 		state.local_primitive_id_start = 16;
 
 	nir_function_impl *impl = nir_shader_get_entrypoint(shader);
@@ -329,43 +372,74 @@ ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_compiler *compile
 
 	nir_foreach_block_safe (block, impl)
 		lower_block_to_explicit_input(block, &b, &state);
+
+	v->input_size = calc_primitive_map_size(shader);
 }
 
+static nir_ssa_def *
+build_tcs_out_vertices(nir_builder *b)
+{
+	if (b->shader->info.stage == MESA_SHADER_TESS_CTRL)
+		return nir_imm_int(b, b->shader->info.tess.tcs_vertices_out);
+	else
+		return nir_load_patch_vertices_in(b);
+}
 
 static nir_ssa_def *
 build_per_vertex_offset(nir_builder *b, struct state *state,
-		nir_ssa_def *vertex, nir_ssa_def *offset, nir_variable *var)
+		nir_ssa_def *vertex, uint32_t location, uint32_t comp, nir_ssa_def *offset)
 {
 	nir_ssa_def *primitive_id = nir_load_primitive_id(b);
 	nir_ssa_def *patch_stride = nir_load_hs_patch_stride_ir3(b);
 	nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, patch_stride);
 	nir_ssa_def *attr_offset;
-	int loc = var->data.driver_location;
 
-	switch (b->shader->info.stage) {
-	case MESA_SHADER_TESS_CTRL:
-		attr_offset = nir_imm_int(b, state->map.loc[loc]);
-		break;
-	case MESA_SHADER_TESS_EVAL:
-		attr_offset = nir_load_primitive_location_ir3(b, loc);
-		break;
-	default:
-		unreachable("bad shader state");
+	if (nir_src_is_const(nir_src_for_ssa(offset))) {
+		location += nir_src_as_uint(nir_src_for_ssa(offset));
+		offset = nir_imm_int(b, 0);
+	} else {
+		/* Offset is in vec4's, but we need it in unit of components for the
+		 * load/store_global_ir3 offset.
+		 */
+		offset = nir_ishl(b, offset, nir_imm_int(b, 2));
 	}
 
-	nir_ssa_def *attr_stride = nir_imm_int(b, state->map.size[loc]);
-	nir_ssa_def *vertex_offset = nir_imul24(b, vertex, attr_stride);
+	nir_ssa_def *vertex_offset;
+	if (vertex) {
+		unsigned index = shader_io_get_unique_index(location);
+		switch (b->shader->info.stage) {
+		case MESA_SHADER_TESS_CTRL:
+			attr_offset = nir_imm_int(b, state->map.loc[index] + comp);
+			break;
+		case MESA_SHADER_TESS_EVAL:
+			attr_offset =
+				nir_iadd(b, nir_load_primitive_location_ir3(b, index),
+						 nir_imm_int(b, comp));
+			break;
+		default:
+			unreachable("bad shader state");
+		}
+
+		attr_offset = nir_iadd(b, attr_offset,
+							   nir_imul24(b, offset,
+										  build_tcs_out_vertices(b)));
+		vertex_offset = nir_ishl(b, vertex, nir_imm_int(b, 2));
+	} else {
+		assert(location >= VARYING_SLOT_PATCH0 &&
+			   location <= VARYING_SLOT_TESS_MAX);
+		unsigned index = location - VARYING_SLOT_PATCH0;
+		attr_offset = nir_iadd(b, nir_imm_int(b, index * 4 + comp), offset);
+		vertex_offset = nir_imm_int(b, 0);
+	}
 
-	return nir_iadd(b, nir_iadd(b, patch_offset, attr_offset),
-			nir_iadd(b, vertex_offset, nir_ishl(b, offset, nir_imm_int(b, 2))));
+	return nir_iadd(b, nir_iadd(b, patch_offset, attr_offset), vertex_offset);
 }
 
 static nir_ssa_def *
-build_patch_offset(nir_builder *b, struct state *state, nir_ssa_def *offset, nir_variable *var)
+build_patch_offset(nir_builder *b, struct state *state,
+		uint32_t base, uint32_t comp, nir_ssa_def *offset)
 {
-	debug_assert(var && var->data.patch);
-
-	return build_per_vertex_offset(b, state, nir_imm_int(b, 0), offset, var);
+	return build_per_vertex_offset(b, state, NULL, base, comp, offset);
 }
 
 static void
@@ -444,9 +518,11 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
 			b->cursor = nir_before_instr(&intr->instr);
 
 			nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
-			nir_variable *var = get_var(b->shader, nir_var_shader_out, nir_intrinsic_base(intr));
 			nir_ssa_def *offset = build_per_vertex_offset(b, state,
-					intr->src[0].ssa, intr->src[1].ssa, var);
+					intr->src[0].ssa,
+					nir_intrinsic_io_semantics(intr).location,
+					nir_intrinsic_component(intr),
+				   	intr->src[1].ssa);
 
 			replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
 			break;
@@ -462,12 +538,13 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
 
 			nir_ssa_def *value = intr->src[0].ssa;
 			nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
-			nir_variable *var = get_var(b->shader, nir_var_shader_out, nir_intrinsic_base(intr));
 			nir_ssa_def *offset = build_per_vertex_offset(b, state,
-					intr->src[1].ssa, intr->src[2].ssa, var);
+					intr->src[1].ssa,
+					nir_intrinsic_io_semantics(intr).location,
+					nir_intrinsic_component(intr),
+					intr->src[2].ssa);
 
-			replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
-					nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
+			replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address, offset);
 
 			break;
 		}
@@ -475,8 +552,6 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
 		case nir_intrinsic_load_output: {
 			// src[] = { offset }.
 
-			nir_variable *var = get_var(b->shader, nir_var_shader_out, nir_intrinsic_base(intr));
-
 			b->cursor = nir_before_instr(&intr->instr);
 
 			nir_ssa_def *address, *offset;
@@ -486,13 +561,17 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
 			 * are never used. most likely some issue with (sy) not properly
 			 * syncing with values coming from a second memory transaction.
 			 */
-			if (is_tess_levels(var)) {
+			gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
+			if (is_tess_levels(location)) {
 				assert(intr->dest.ssa.num_components == 1);
 				address = nir_load_tess_factor_base_ir3(b);
-				offset = build_tessfactor_base(b, var->data.location, state);
+				offset = build_tessfactor_base(b, location, state);
 			} else {
 				address = nir_load_tess_param_base_ir3(b);
-				offset = build_patch_offset(b, state, intr->src[0].ssa, var);
+				offset = build_patch_offset(b, state,
+											location,
+											nir_intrinsic_component(intr),
+											intr->src[0].ssa);
 			}
 
 			replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
@@ -504,14 +583,13 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
 
 			/* write patch output to bo */
 
-			nir_variable *var = get_var(b->shader, nir_var_shader_out, nir_intrinsic_base(intr));
-
 			b->cursor = nir_before_instr(&intr->instr);
 
 			/* sparse writemask not supported */
 			assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
 
-			if (is_tess_levels(var)) {
+			gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
+			if (is_tess_levels(location)) {
 				/* with tess levels are defined as float[4] and float[2],
 				 * but tess factor BO has smaller sizes for tris/isolines,
 				 * so we have to discard any writes beyond the number of
@@ -519,7 +597,7 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
 				uint32_t inner_levels, outer_levels, levels;
 				tess_level_components(state, &inner_levels, &outer_levels);
 
-				if (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
+				if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
 					levels = outer_levels;
 				else
 					levels = inner_levels;
@@ -534,12 +612,15 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
 				replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
 						intr->src[0].ssa,
 						nir_load_tess_factor_base_ir3(b),
-						nir_iadd(b, offset, build_tessfactor_base(b, var->data.location, state)));
+						nir_iadd(b, offset, build_tessfactor_base(b, location, state)));
 
 				nir_pop_if(b, nif);
 			} else {
 				nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
-				nir_ssa_def *offset = build_patch_offset(b, state, intr->src[1].ssa, var);
+				nir_ssa_def *offset = build_patch_offset(b, state, 
+														 location,
+														 nir_intrinsic_component(intr),
+														 intr->src[1].ssa);
 
 				debug_assert(nir_intrinsic_component(intr) == 0);
 
@@ -580,7 +661,7 @@ ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v,
 		nir_print_shader(shader, stderr);
 	}
 
-	build_primitive_map(shader, nir_var_shader_out, &state.map);
+	build_primitive_map(shader, &state.map);
 	memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
 	v->output_size = state.map.stride;
 
@@ -672,9 +753,11 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
 			b->cursor = nir_before_instr(&intr->instr);
 
 			nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
-			nir_variable *var = get_var(b->shader, nir_var_shader_in, nir_intrinsic_base(intr));
 			nir_ssa_def *offset = build_per_vertex_offset(b, state,
-					intr->src[0].ssa, intr->src[1].ssa, var);
+					intr->src[0].ssa,
+					nir_intrinsic_io_semantics(intr).location,
+					nir_intrinsic_component(intr),
+				   	intr->src[1].ssa);
 
 			replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
 			break;
@@ -683,10 +766,6 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
 		case nir_intrinsic_load_input: {
 			// src[] = { offset }.
 
-			nir_variable *var = get_var(b->shader, nir_var_shader_in, nir_intrinsic_base(intr));
-
-			debug_assert(var->data.patch);
-
 			b->cursor = nir_before_instr(&intr->instr);
 
 			nir_ssa_def *address, *offset;
@@ -696,13 +775,17 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
 			 * are never used. most likely some issue with (sy) not properly
 			 * syncing with values coming from a second memory transaction.
 			 */
-			if (is_tess_levels(var)) {
+			gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
+			if (is_tess_levels(location)) {
 				assert(intr->dest.ssa.num_components == 1);
 				address = nir_load_tess_factor_base_ir3(b);
-				offset = build_tessfactor_base(b, var->data.location, state);
+				offset = build_tessfactor_base(b, location, state);
 			} else {
 				address = nir_load_tess_param_base_ir3(b);
-				offset = build_patch_offset(b, state, intr->src[0].ssa, var);
+				offset = build_patch_offset(b, state,
+											location,
+											nir_intrinsic_component(intr),
+											intr->src[0].ssa);
 			}
 
 			offset = nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr)));
@@ -718,7 +801,7 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
 }
 
 void
-ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology)
+ir3_nir_lower_tess_eval(nir_shader *shader, struct ir3_shader_variant *v, unsigned topology)
 {
 	struct state state = { .topology = topology };
 
@@ -728,9 +811,6 @@ ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology)
 		nir_print_shader(shader, stderr);
 	}
 
-	/* Build map of inputs so we have the sizes. */
-	build_primitive_map(shader, nir_var_shader_in, &state.map);
-
 	nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 	assert(impl);
 
@@ -740,6 +820,8 @@ ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology)
 	nir_foreach_block_safe (block, impl)
 		lower_tess_eval_block(block, &b, &state);
 
+	v->input_size = calc_primitive_map_size(shader);
+
 	nir_metadata_preserve(impl, 0);
 }
 
@@ -804,8 +886,6 @@ ir3_nir_lower_gs(nir_shader *shader)
 		nir_print_shader(shader, stderr);
 	}
 
-	build_primitive_map(shader, nir_var_shader_in, &state.map);
-
 	/* Create an output var for vertex_flags. This will be shadowed below,
 	 * same way regular outputs get shadowed, and this variable will become a
 	 * temporary.
@@ -914,38 +994,3 @@ ir3_nir_lower_gs(nir_shader *shader)
 	}
 }
 
-uint32_t
-ir3_link_geometry_stages(const struct ir3_shader_variant *producer,
-		const struct ir3_shader_variant *consumer,
-		uint32_t *locs)
-{
-	uint32_t num_loc = 0, factor;
-
-	switch (consumer->type) {
-	case MESA_SHADER_TESS_CTRL:
-	case MESA_SHADER_GEOMETRY:
-		/* These stages load with ldlw, which expects byte offsets. */
-		factor = 4;
-		break;
-	case MESA_SHADER_TESS_EVAL:
-		/* The tess eval shader uses ldg, which takes dword offsets. */
-		factor = 1;
-		break;
-	default:
-		unreachable("bad shader stage");
-	}
-
-	nir_foreach_shader_in_variable(in_var, consumer->shader->nir) {
-		nir_foreach_shader_out_variable(out_var, producer->shader->nir) {
-			if (in_var->data.location == out_var->data.location) {
-				locs[in_var->data.driver_location] =
-					producer->output_loc[out_var->data.driver_location] * factor;
-
-				debug_assert(num_loc <= in_var->data.driver_location + 1);
-				num_loc = in_var->data.driver_location + 1;
-			}
-		}
-	}
-
-	return num_loc;
-}
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index 39870066956..f78d8026e2a 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -570,7 +570,13 @@ struct ir3_shader_variant {
 	/* Size in dwords of all outputs for VS, size of entire patch for HS. */
 	uint32_t output_size;
 
-	/* Map from driver_location to byte offset in per-primitive storage */
+	/* Expected size of incoming output_loc for HS, DS, and GS */
+	uint32_t input_size;
+
+	/* Map from location to offset in per-primitive storage. In dwords for
+	 * HS, where varyings are read in the next stage via ldg with a dword
+	 * offset, and in bytes for all other stages.
+	 */
 	unsigned output_loc[32];
 
 	/* attributes (VS) / varyings (FS):
diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c
index c2cc4dbd009..c1e6000c138 100644
--- a/src/freedreno/vulkan/tu_pipeline.c
+++ b/src/freedreno/vulkan/tu_pipeline.c
@@ -686,7 +686,7 @@ tu6_setup_streamout(struct tu_cs *cs,
 static void
 tu6_emit_const(struct tu_cs *cs, uint32_t opcode, uint32_t base,
                enum a6xx_state_block block, uint32_t offset,
-               uint32_t size, uint32_t *dwords) {
+               uint32_t size, const uint32_t *dwords) {
    assert(size % 4 == 0);
 
    tu_cs_emit_pkt7(cs, opcode, 3 + size);
@@ -711,16 +711,14 @@ tu6_emit_link_map(struct tu_cs *cs,
 {
    const struct ir3_const_state *const_state = ir3_const_state(consumer);
    uint32_t base = const_state->offsets.primitive_map;
-   uint32_t patch_locs[MAX_VARYING] = { }, num_loc;
-   num_loc = ir3_link_geometry_stages(producer, consumer, patch_locs);
-   int size = DIV_ROUND_UP(num_loc, 4);
+   int size = DIV_ROUND_UP(consumer->input_size, 4);
 
    size = (MIN2(size + base, consumer->constlen) - base) * 4;
    if (size <= 0)
       return;
 
    tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, base, sb, 0, size,
-                         patch_locs);
+                         producer->output_loc);
 }
 
 static uint16_t
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_const.h b/src/gallium/drivers/freedreno/ir3/ir3_const.h
index 5e79661c5bf..4784ac673bc 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_const.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_const.h
@@ -308,11 +308,7 @@ ir3_emit_link_map(struct fd_screen *screen,
 {
 	const struct ir3_const_state *const_state = ir3_const_state(v);
 	uint32_t base = const_state->offsets.primitive_map;
-	uint32_t patch_locs[MAX_VARYING] = { }, num_loc;
-
-	num_loc = ir3_link_geometry_stages(producer, v, patch_locs);
-
-	int size = DIV_ROUND_UP(num_loc, 4);
+	int size = DIV_ROUND_UP(v->input_size, 4);
 
 	/* truncate size to avoid writing constants that shader
 	 * does not use:
@@ -324,7 +320,7 @@ ir3_emit_link_map(struct fd_screen *screen,
 	size *= 4;
 
 	if (size > 0)
-		emit_const_user(ring, v, base, size, patch_locs);
+		emit_const_user(ring, v, base, size, producer->output_loc);
 }
 
 /* emit stream-out buffers: */
author	Connor Abbott <cwabbott0@gmail.com>	2020-09-24 16:24:55 +0200
committer	Marge Bot <eric+marge@anholt.net>	2020-10-23 11:09:18 +0000
commit	9e063b01b77059d13756794f95fa0eb0e5ef6633 (patch)
tree	2657d66f97a3331c65cf2ec96bbf1b25e92e7286
parent	4ca38a19957f88523aef8761d68db413ebb1d562 (diff)