summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/svga
diff options
context:
space:
mode:
authorBrian Paul <brianp@vmware.com>2014-04-17 08:54:47 -0700
committerBrian Paul <brianp@vmware.com>2014-04-17 11:29:33 -0700
commit615a356ee38d882e9f073dba0b8918a903094124 (patch)
treeb1950dc8b9eb0c612161ff371ef280dc083cd119 /src/gallium/drivers/svga
parent52faafa17424a3842f150019a9d76ce626019d9f (diff)
svga: implement support for signed byte vertex attributes
There's no SVGA3D_DECLTYPE that directly corresponds to PIPE_FORMAT_R8G8B8_SNORM. Previously, we used the swtnl fallback path to handle this but that's slow and causes invariance issues. Now we fetch the attribute as SVGA3D_DECLTYPE_UBYTE4N and insert some extra VS instructions to remap the attributes from the range [0,1] to the range[-1,1]. Fixes Sauerbraten sw fallback. Fixes piglit normal3b3s-invariance test. Reviewed-by: Charmaine Lee <charmainel@vmware.com>
Diffstat (limited to 'src/gallium/drivers/svga')
-rw-r--r--src/gallium/drivers/svga/svga_context.h1
-rw-r--r--src/gallium/drivers/svga/svga_pipe_vertex.c29
-rw-r--r--src/gallium/drivers/svga/svga_state_vs.c4
-rw-r--r--src/gallium/drivers/svga/svga_tgsi.h1
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_emit.h2
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_insn.c128
6 files changed, 151 insertions, 14 deletions
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index 79cecbf3221..55642773f89 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -201,6 +201,7 @@ struct svga_velems_state {
unsigned count;
struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
SVGA3dDeclType decl_type[PIPE_MAX_ATTRIBS]; /**< vertex attrib formats */
+ unsigned adjust_attrib_range; /* bitmask of attrs needing range adjustment */
};
/* Use to calculate differences between state emitted to hardware and
diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c
index e34f3a00eeb..d679ad3bdf3 100644
--- a/src/gallium/drivers/svga/svga_pipe_vertex.c
+++ b/src/gallium/drivers/svga/svga_pipe_vertex.c
@@ -94,6 +94,9 @@ translate_vertex_format(enum pipe_format format)
case PIPE_FORMAT_R16G16_FLOAT: return SVGA3D_DECLTYPE_FLOAT16_2;
case PIPE_FORMAT_R16G16B16A16_FLOAT: return SVGA3D_DECLTYPE_FLOAT16_4;
+ /* See attrib_needs_adjustment() below */
+ case PIPE_FORMAT_R8G8B8_SNORM: return SVGA3D_DECLTYPE_UBYTE4N;
+
default:
/* There are many formats without hardware support. This case
* will be hit regularly, meaning we'll need swvfetch.
@@ -103,6 +106,23 @@ translate_vertex_format(enum pipe_format format)
}
+/**
+ * Does the given vertex attrib format need range adjustment in the VS?
+ * Range adjustment scales and biases values from [0,1] to [-1,1].
+ * This lets us avoid the swtnl path.
+ */
+static boolean
+attrib_needs_range_adjustment(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_R8G8B8_SNORM:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+
static void *
svga_create_vertex_elements_state(struct pipe_context *pipe,
unsigned count,
@@ -117,9 +137,16 @@ svga_create_vertex_elements_state(struct pipe_context *pipe,
velems->count = count;
memcpy(velems->velem, attribs, sizeof(*attribs) * count);
+ velems->adjust_attrib_range = 0x0;
+
/* Translate Gallium vertex format to SVGA3dDeclType */
for (i = 0; i < count; i++) {
- velems->decl_type[i] = translate_vertex_format(attribs[i].src_format);
+ enum pipe_format f = attribs[i].src_format;
+ velems->decl_type[i] = translate_vertex_format(f);
+
+ if (attrib_needs_range_adjustment(f)) {
+ velems->adjust_attrib_range |= (1 << i);
+ }
}
}
return velems;
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
index 2f130aec5b4..2ea25495b20 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -159,6 +159,9 @@ make_vs_key(struct svga_context *svga, struct svga_vs_compile_key *key)
/* SVGA_NEW_FS */
key->fs_generic_inputs = svga->curr.fs->generic_inputs;
+
+ /* SVGA_NEW_VELEMENT */
+ key->adjust_attrib_range = svga->curr.velems->adjust_attrib_range;
}
@@ -248,6 +251,7 @@ struct svga_tracked_state svga_hw_vs =
(SVGA_NEW_VS |
SVGA_NEW_FS |
SVGA_NEW_PRESCALE |
+ SVGA_NEW_VELEMENT |
SVGA_NEW_NEED_SWTNL),
emit_hw_vs
};
diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h
index 4fe88b3b70d..cb40560242a 100644
--- a/src/gallium/drivers/svga/svga_tgsi.h
+++ b/src/gallium/drivers/svga/svga_tgsi.h
@@ -49,6 +49,7 @@ struct svga_vs_compile_key
unsigned fs_generic_inputs;
unsigned need_prescale:1;
unsigned allow_psiz:1;
+ unsigned adjust_attrib_range:16;
};
struct svga_fs_compile_key
diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h
index 1a9731ffde8..1894296e6d7 100644
--- a/src/gallium/drivers/svga/svga_tgsi_emit.h
+++ b/src/gallium/drivers/svga/svga_tgsi_emit.h
@@ -86,7 +86,7 @@ struct svga_shader_emitter
boolean in_main_func;
boolean created_common_immediate;
- int common_immediate_idx;
+ int common_immediate_idx[2];
boolean created_loop_const;
int loop_const_idx;
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index d3570582404..e798b17daa7 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -859,8 +859,20 @@ create_common_immediate( struct svga_shader_emitter *emit )
if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
idx, 0.0f, 0.5f, -1.0f, 1.0f ))
return FALSE;
+ emit->common_immediate_idx[0] = idx;
+ idx++;
+
+ /* Emit constant {2, 0, 0, 0} (only the 2 is used for now) */
+ if (emit->key.vkey.adjust_attrib_range) {
+ if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
+ idx, 2.0f, 0.0f, 0.0f, 0.0f ))
+ return FALSE;
+ emit->common_immediate_idx[1] = idx;
+ }
+ else {
+ emit->common_immediate_idx[1] = -1;
+ }
- emit->common_immediate_idx = idx;
emit->created_common_immediate = TRUE;
return TRUE;
@@ -889,7 +901,7 @@ common_immediate_swizzle(float value)
/**
- * Returns an immediate reg where all the terms are either 0, 1, -1 or 0.5
+ * Returns an immediate reg where all the terms are either 0, 1, 2 or 0.5
*/
static struct src_register
get_immediate(struct svga_shader_emitter *emit,
@@ -900,8 +912,8 @@ get_immediate(struct svga_shader_emitter *emit,
unsigned sz = common_immediate_swizzle(z);
unsigned sw = common_immediate_swizzle(w);
assert(emit->created_common_immediate);
- assert(emit->common_immediate_idx >= 0);
- return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx),
+ assert(emit->common_immediate_idx[0] >= 0);
+ return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]),
sx, sy, sz, sw);
}
@@ -913,9 +925,9 @@ static struct src_register
get_zero_immediate( struct svga_shader_emitter *emit )
{
assert(emit->created_common_immediate);
- assert(emit->common_immediate_idx >= 0);
+ assert(emit->common_immediate_idx[0] >= 0);
return swizzle(src_register( SVGA3DREG_CONST,
- emit->common_immediate_idx),
+ emit->common_immediate_idx[0]),
0, 0, 0, 0);
}
@@ -927,9 +939,9 @@ static struct src_register
get_one_immediate( struct svga_shader_emitter *emit )
{
assert(emit->created_common_immediate);
- assert(emit->common_immediate_idx >= 0);
+ assert(emit->common_immediate_idx[0] >= 0);
return swizzle(src_register( SVGA3DREG_CONST,
- emit->common_immediate_idx),
+ emit->common_immediate_idx[0]),
3, 3, 3, 3);
}
@@ -941,13 +953,28 @@ static struct src_register
get_half_immediate( struct svga_shader_emitter *emit )
{
assert(emit->created_common_immediate);
- assert(emit->common_immediate_idx >= 0);
- return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx),
+ assert(emit->common_immediate_idx[0] >= 0);
+ return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]),
1, 1, 1, 1);
}
/**
+ * returns {2, 2, 2, 2} immediate
+ */
+static struct src_register
+get_two_immediate( struct svga_shader_emitter *emit )
+{
+ /* Note we use the second common immediate here */
+ assert(emit->created_common_immediate);
+ assert(emit->common_immediate_idx[1] >= 0);
+ return swizzle(src_register( SVGA3DREG_CONST,
+ emit->common_immediate_idx[1]),
+ 0, 0, 0, 0);
+}
+
+
+/**
* returns the loop const
*/
static struct src_register
@@ -3498,6 +3525,74 @@ emit_inverted_texcoords(struct svga_shader_emitter *emit)
/**
+ * Emit code to invert the T component of the incoming texture coordinate.
+ * This is used for drawing point sprites when
+ * pipe_rasterizer_state::sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT.
+ */
+static boolean
+emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit)
+{
+ unsigned adjust_attrib_range = emit->key.vkey.adjust_attrib_range;
+
+ while (adjust_attrib_range) {
+ /* The vertex input/attribute is supposed to be a signed value in
+ * the range [-1,1] but we actually fetched/converted it to the
+ * range [0,1]. This most likely happens when the app specifies a
+ * signed byte attribute but we interpreted it as unsigned bytes.
+ * See also svga_translate_vertex_format().
+ *
+ * Here, we emit some extra instructions to adjust
+ * the attribute values from [0,1] to [-1,1].
+ *
+ * The adjustment we implement is:
+ * new_attrib = attrib * 2.0;
+ * if (attrib >= 0.5)
+ * new_attrib = new_attrib - 2.0;
+ * This isn't exactly right (it's off by a bit or so) but close enough.
+ */
+ const unsigned index = u_bit_scan(&adjust_attrib_range);
+ struct src_register tmp;
+
+ SVGA3dShaderDestToken pred_reg = dst_register(SVGA3DREG_PREDICATE, 0);
+
+ /* allocate a temp reg */
+ tmp = src_register(SVGA3DREG_TEMP, emit->nr_hw_temp);
+ emit->nr_hw_temp++;
+
+ /* tmp = attrib * 2.0 */
+ if (!submit_op2(emit,
+ inst_token(SVGA3DOP_MUL),
+ dst(tmp),
+ emit->input_map[index],
+ get_two_immediate(emit)))
+ return FALSE;
+
+ /* pred = (attrib >= 0.5) */
+ if (!submit_op2(emit,
+ inst_token_setp(SVGA3DOPCOMP_GE),
+ pred_reg,
+ emit->input_map[index], /* vert attrib */
+ get_half_immediate(emit))) /* 0.5 */
+ return FALSE;
+
+ /* sub(pred) tmp, tmp, 2.0 */
+ if (!submit_op3(emit,
+ inst_token_predicated(SVGA3DOP_SUB),
+ dst(tmp),
+ src(pred_reg),
+ tmp,
+ get_two_immediate(emit)))
+ return FALSE;
+
+ /* Reassign the input_map entry to the new tmp register */
+ emit->input_map[index] = tmp;
+ }
+
+ return TRUE;
+}
+
+
+/**
* Determine if we need to create the "common" immediate value which is
* used for generating useful vector constants such as {0,0,0,0} and
* {1,1,1,1}.
@@ -3542,10 +3637,11 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit)
return TRUE;
}
}
-
- if (emit->unit == PIPE_SHADER_VERTEX) {
+ else if (emit->unit == PIPE_SHADER_VERTEX) {
if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1)
return TRUE;
+ if (emit->key.vkey.adjust_attrib_range)
+ return TRUE;
}
if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 ||
@@ -3705,6 +3801,14 @@ svga_shader_emit_helpers(struct svga_shader_emitter *emit)
return FALSE;
}
}
+ else {
+ assert(emit->unit == PIPE_SHADER_VERTEX);
+ if (emit->key.vkey.adjust_attrib_range) {
+ if (!emit_adjusted_vertex_attribs(emit))
+ return FALSE;
+ }
+ }
+
return TRUE;
}