summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/r600/evergreen_compute.c2
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c63
-rw-r--r--src/gallium/drivers/r600/r600_hw_context.c10
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c1
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h33
-rw-r--r--src/gallium/drivers/r600/r600_state.c53
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c36
7 files changed, 116 insertions, 82 deletions
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index acf91ba43f3..35333120654 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -583,7 +583,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
/* since all required registers are initialised in the
* start_compute_cs_cmd atom, we can EMIT_EARLY here.
*/
- r600_init_command_buffer(cb, 256, EMIT_EARLY);
+ r600_init_command_buffer(ctx, cb, 1, 256);
cb->pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
switch (ctx->family) {
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index bda8ed5dc2d..9a5183ec6be 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2161,27 +2161,50 @@ static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom
void evergreen_init_state_functions(struct r600_context *rctx)
{
- r600_init_atom(&rctx->cb_misc_state.atom, evergreen_emit_cb_misc_state, 0, 0);
- r600_atom_dirty(rctx, &rctx->cb_misc_state.atom);
- r600_init_atom(&rctx->db_misc_state.atom, evergreen_emit_db_misc_state, 7, 0);
- r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
- r600_init_atom(&rctx->vertex_buffer_state.atom, evergreen_fs_emit_vertex_buffers, 0, 0);
- r600_init_atom(&rctx->cs_vertex_buffer_state.atom, evergreen_cs_emit_vertex_buffers, 0, 0);
- r600_init_atom(&rctx->vs_constbuf_state.atom, evergreen_emit_vs_constant_buffers, 0, 0);
- r600_init_atom(&rctx->ps_constbuf_state.atom, evergreen_emit_ps_constant_buffers, 0, 0);
- r600_init_atom(&rctx->vs_samplers.views.atom, evergreen_emit_vs_sampler_views, 0, 0);
- r600_init_atom(&rctx->ps_samplers.views.atom, evergreen_emit_ps_sampler_views, 0, 0);
- r600_init_atom(&rctx->cs_shader_state.atom, evergreen_emit_cs_shader, 0, 0);
- r600_init_atom(&rctx->vs_samplers.atom_sampler, evergreen_emit_vs_sampler, 0, 0);
- r600_init_atom(&rctx->ps_samplers.atom_sampler, evergreen_emit_ps_sampler, 0, 0);
-
- if (rctx->chip_class == EVERGREEN)
- r600_init_atom(&rctx->sample_mask.atom, evergreen_emit_sample_mask, 3, 0);
- else
- r600_init_atom(&rctx->sample_mask.atom, cayman_emit_sample_mask, 4, 0);
+ unsigned id = 4;
+
+ /* !!!
+ * To avoid GPU lockup registers must be emited in a specific order
+ * (no kidding ...). The order below is important and have been
+ * partialy infered from analyzing fglrx command stream.
+ *
+ * Don't reorder atom without carefully checking the effect (GPU lockup
+ * or piglit regression).
+ * !!!
+ */
+
+ /* shader const */
+ r600_init_atom(rctx, &rctx->vs_constbuf_state.atom, id++, evergreen_emit_vs_constant_buffers, 0);
+ r600_init_atom(rctx, &rctx->ps_constbuf_state.atom, id++, evergreen_emit_ps_constant_buffers, 0);
+ /* shader program */
+ r600_init_atom(rctx, &rctx->cs_shader_state.atom, id++, evergreen_emit_cs_shader, 0);
+ /* sampler */
+ r600_init_atom(rctx, &rctx->vs_samplers.atom_sampler, id++, evergreen_emit_vs_sampler, 0);
+ r600_init_atom(rctx, &rctx->ps_samplers.atom_sampler, id++, evergreen_emit_ps_sampler, 0);
+ /* resources */
+ r600_init_atom(rctx, &rctx->vertex_buffer_state.atom, id++, evergreen_fs_emit_vertex_buffers, 0);
+ r600_init_atom(rctx, &rctx->cs_vertex_buffer_state.atom, id++, evergreen_cs_emit_vertex_buffers, 0);
+ r600_init_atom(rctx, &rctx->vs_samplers.views.atom, id++, evergreen_emit_vs_sampler_views, 0);
+ r600_init_atom(rctx, &rctx->ps_samplers.views.atom, id++, evergreen_emit_ps_sampler_views, 0);
+
+ if (rctx->chip_class == EVERGREEN) {
+ r600_init_atom(rctx, &rctx->sample_mask.atom, id++, evergreen_emit_sample_mask, 3);
+ } else {
+ r600_init_atom(rctx, &rctx->sample_mask.atom, id++, cayman_emit_sample_mask, 4);
+ }
rctx->sample_mask.sample_mask = ~0;
r600_atom_dirty(rctx, &rctx->sample_mask.atom);
+ r600_init_atom(rctx, &rctx->cb_misc_state.atom, id++, evergreen_emit_cb_misc_state, 0);
+ r600_atom_dirty(rctx, &rctx->cb_misc_state.atom);
+
+ r600_init_atom(rctx, &rctx->alphatest_state.atom, id++, r600_emit_alphatest_state, 6);
+ r600_atom_dirty(rctx, &rctx->alphatest_state.atom);
+
+ r600_init_atom(rctx, &rctx->db_misc_state.atom, id++, evergreen_emit_db_misc_state, 7);
+ r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
+
+
rctx->context.create_blend_state = evergreen_create_blend_state;
rctx->context.create_depth_stencil_alpha_state = evergreen_create_dsa_state;
rctx->context.create_fs_state = r600_create_shader_state_ps;
@@ -2230,7 +2253,7 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
{
struct r600_command_buffer *cb = &rctx->start_cs_cmd;
- r600_init_command_buffer(cb, 256, EMIT_EARLY);
+ r600_init_command_buffer(rctx, cb, 0, 256);
/* This must be first. */
r600_store_value(cb, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
@@ -2608,7 +2631,7 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
return;
}
- r600_init_command_buffer(cb, 256, EMIT_EARLY);
+ r600_init_command_buffer(rctx, cb, 0, 256);
/* This must be first. */
r600_store_value(cb, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 7b7b6b1ffac..0ec13e5bade 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -633,15 +633,17 @@ out_err:
void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
boolean count_draw_in)
{
- struct r600_atom *state;
-
/* The number of dwords we already used in the CS so far. */
num_dw += ctx->cs->cdw;
if (count_draw_in) {
+ unsigned i;
+
/* The number of dwords all the dirty states would take. */
- LIST_FOR_EACH_ENTRY(state, &ctx->dirty_states, head) {
- num_dw += state->num_dw;
+ for (i = 0; i < R600_MAX_ATOM; i++) {
+ if (ctx->atoms[i] && ctx->atoms[i]->dirty) {
+ num_dw += ctx->atoms[i]->num_dw;
+ }
}
num_dw += ctx->pm4_dirty_cdwords;
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 286c676f85b..9e6c28d523b 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -224,7 +224,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
rctx->family = rscreen->family;
rctx->chip_class = rscreen->chip_class;
- LIST_INITHEAD(&rctx->dirty_states);
LIST_INITHEAD(&rctx->active_timer_queries);
LIST_INITHEAD(&rctx->active_nontimer_queries);
LIST_INITHEAD(&rctx->dirty);
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 721334d1e84..ff720e95cfe 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -35,6 +35,8 @@
#include "r600_resource.h"
#include "evergreen_compute.h"
+#define R600_MAX_ATOM 17
+
#define R600_MAX_CONST_BUFFERS 2
#define R600_MAX_CONST_BUFFER_SIZE 4096
@@ -44,23 +46,14 @@
#define R600_BIG_ENDIAN 0
#endif
-enum r600_atom_flags {
- /* When set, atoms are added at the beginning of the dirty list
- * instead of the end. */
- EMIT_EARLY = (1 << 0)
-};
-
/* This encapsulates a state or an operation which can emitted into the GPU
* command stream. It's not limited to states only, it can be used for anything
* that wants to write commands into the CS (e.g. cache flushes). */
struct r600_atom {
void (*emit)(struct r600_context *ctx, struct r600_atom *state);
-
+ unsigned id;
unsigned num_dw;
- enum r600_atom_flags flags;
bool dirty;
-
- struct list_head head;
};
/* This is an atom containing GPU commands that never change.
@@ -372,8 +365,8 @@ struct r600_context {
unsigned default_ps_gprs, default_vs_gprs;
/* States based on r600_atom. */
- struct list_head dirty_states;
struct r600_command_buffer start_cs_cmd; /* invariant state mostly */
+ struct r600_atom *atoms[R600_MAX_ATOM];
/** Compute specific registers initializations. The start_cs_cmd atom
* must be emitted before start_compute_cs_cmd. */
struct r600_command_buffer start_compute_cs_cmd;
@@ -464,20 +457,11 @@ static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *a
{
atom->emit(rctx, atom);
atom->dirty = false;
- if (atom->head.next && atom->head.prev)
- LIST_DELINIT(&atom->head);
}
static INLINE void r600_atom_dirty(struct r600_context *rctx, struct r600_atom *state)
{
- if (!state->dirty) {
- if (state->flags & EMIT_EARLY) {
- LIST_ADD(&state->head, &rctx->dirty_states);
- } else {
- LIST_ADDTAIL(&state->head, &rctx->dirty_states);
- }
- state->dirty = true;
- }
+ state->dirty = true;
}
/* evergreen_state.c */
@@ -587,9 +571,10 @@ void r600_translate_index_buffer(struct r600_context *r600,
unsigned count);
/* r600_state_common.c */
-void r600_init_atom(struct r600_atom *atom,
+void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom);
+void r600_init_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned id,
void (*emit)(struct r600_context *ctx, struct r600_atom *state),
- unsigned num_dw, enum r600_atom_flags flags);
+ unsigned num_dw);
void r600_init_common_atoms(struct r600_context *rctx);
unsigned r600_get_cb_flush_flags(struct r600_context *rctx);
void r600_texture_barrier(struct pipe_context *ctx);
@@ -772,7 +757,7 @@ static INLINE void eg_store_loop_const(struct r600_command_buffer *cb, unsigned
r600_store_value(cb, value);
}
-void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw, enum r600_atom_flags flags);
+void r600_init_command_buffer(struct r600_context *rctx, struct r600_command_buffer *cb, unsigned id, unsigned num_dw);
void r600_release_command_buffer(struct r600_command_buffer *cb);
/*
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 53bbdd9da7d..ccafdc6733b 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2031,27 +2031,48 @@ static void r600_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a
void r600_init_state_functions(struct r600_context *rctx)
{
- r600_init_atom(&rctx->seamless_cube_map.atom, r600_emit_seamless_cube_map, 3, 0);
- r600_atom_dirty(rctx, &rctx->seamless_cube_map.atom);
- r600_init_atom(&rctx->cb_misc_state.atom, r600_emit_cb_misc_state, 0, 0);
- r600_atom_dirty(rctx, &rctx->cb_misc_state.atom);
- r600_init_atom(&rctx->db_misc_state.atom, r600_emit_db_misc_state, 4, 0);
- r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
- r600_init_atom(&rctx->vertex_buffer_state.atom, r600_emit_vertex_buffers, 0, 0);
- r600_init_atom(&rctx->vs_constbuf_state.atom, r600_emit_vs_constant_buffers, 0, 0);
- r600_init_atom(&rctx->ps_constbuf_state.atom, r600_emit_ps_constant_buffers, 0, 0);
- r600_init_atom(&rctx->vs_samplers.views.atom, r600_emit_vs_sampler_views, 0, 0);
- r600_init_atom(&rctx->ps_samplers.views.atom, r600_emit_ps_sampler_views, 0, 0);
+ unsigned id = 4;
+
+ /* !!!
+ * To avoid GPU lockup registers must be emited in a specific order
+ * (no kidding ...). The order below is important and have been
+ * partialy infered from analyzing fglrx command stream.
+ *
+ * Don't reorder atom without carefully checking the effect (GPU lockup
+ * or piglit regression).
+ * !!!
+ */
+
+ /* shader const */
+ r600_init_atom(rctx, &rctx->vs_constbuf_state.atom, id++, r600_emit_vs_constant_buffers, 0);
+ r600_init_atom(rctx, &rctx->ps_constbuf_state.atom, id++, r600_emit_ps_constant_buffers, 0);
+
/* sampler must be emited before TA_CNTL_AUX otherwise DISABLE_CUBE_WRAP change
- * does not take effect
+ * does not take effect (TA_CNTL_AUX emited by r600_emit_seamless_cube_map)
*/
- r600_init_atom(&rctx->vs_samplers.atom_sampler, r600_emit_vs_sampler, 0, EMIT_EARLY);
- r600_init_atom(&rctx->ps_samplers.atom_sampler, r600_emit_ps_sampler, 0, EMIT_EARLY);
+ r600_init_atom(rctx, &rctx->vs_samplers.atom_sampler, id++, r600_emit_vs_sampler, 0);
+ r600_init_atom(rctx, &rctx->ps_samplers.atom_sampler, id++, r600_emit_ps_sampler, 0);
+ /* resource */
+ r600_init_atom(rctx, &rctx->vs_samplers.views.atom, id++, r600_emit_vs_sampler_views, 0);
+ r600_init_atom(rctx, &rctx->ps_samplers.views.atom, id++, r600_emit_ps_sampler_views, 0);
+ r600_init_atom(rctx, &rctx->vertex_buffer_state.atom, id++, r600_emit_vertex_buffers, 0);
+
+ r600_init_atom(rctx, &rctx->seamless_cube_map.atom, id++, r600_emit_seamless_cube_map, 3);
+ r600_atom_dirty(rctx, &rctx->seamless_cube_map.atom);
- r600_init_atom(&rctx->sample_mask.atom, r600_emit_sample_mask, 3, 0);
+ r600_init_atom(rctx, &rctx->sample_mask.atom, id++, r600_emit_sample_mask, 3);
rctx->sample_mask.sample_mask = ~0;
r600_atom_dirty(rctx, &rctx->sample_mask.atom);
+ r600_init_atom(rctx, &rctx->cb_misc_state.atom, id++, r600_emit_cb_misc_state, 0);
+ r600_atom_dirty(rctx, &rctx->cb_misc_state.atom);
+
+ r600_init_atom(rctx, &rctx->alphatest_state.atom, id++, r600_emit_alphatest_state, 6);
+ r600_atom_dirty(rctx, &rctx->alphatest_state.atom);
+
+ r600_init_atom(rctx, &rctx->db_misc_state.atom, id++, r600_emit_db_misc_state, 4);
+ r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
+
rctx->context.create_blend_state = r600_create_blend_state;
rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state;
rctx->context.create_fs_state = r600_create_shader_state_ps;
@@ -2157,7 +2178,7 @@ void r600_init_atom_start_cs(struct r600_context *rctx)
struct r600_command_buffer *cb = &rctx->start_cs_cmd;
uint32_t tmp;
- r600_init_command_buffer(cb, 256, EMIT_EARLY);
+ r600_init_command_buffer(rctx, cb, 0, 256);
/* R6xx requires this packet at the start of each command buffer */
if (rctx->chip_class == R600) {
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 26af6f609a5..e67eba8921c 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -44,11 +44,9 @@ static void r600_emit_command_buffer(struct r600_context *rctx, struct r600_atom
cs->cdw += cb->atom.num_dw;
}
-void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw, enum r600_atom_flags flags)
+void r600_init_command_buffer(struct r600_context *rctx, struct r600_command_buffer *cb, unsigned id, unsigned num_dw)
{
- cb->atom.emit = r600_emit_command_buffer;
- cb->atom.num_dw = 0;
- cb->atom.flags = flags;
+ r600_init_atom(rctx, &cb->atom, id, r600_emit_command_buffer, 0);
cb->buf = CALLOC(1, 4 * num_dw);
cb->max_num_dw = num_dw;
}
@@ -79,16 +77,22 @@ static void r600_emit_r6xx_flush_and_inv(struct r600_context *rctx, struct r600_
cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0);
}
-void r600_init_atom(struct r600_atom *atom,
+void r600_init_atom(struct r600_context *rctx,
+ struct r600_atom *atom,
+ unsigned id,
void (*emit)(struct r600_context *ctx, struct r600_atom *state),
- unsigned num_dw, enum r600_atom_flags flags)
+ unsigned num_dw)
{
+ assert(id < R600_MAX_ATOM);
+ assert(rctx->atoms[id] == NULL);
+ rctx->atoms[id] = atom;
+ atom->id = id;
atom->emit = emit;
atom->num_dw = num_dw;
- atom->flags = flags;
+ atom->dirty = false;
}
-static void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom)
+void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = rctx->cs;
struct r600_alphatest_state *a = (struct r600_alphatest_state*)atom;
@@ -106,10 +110,8 @@ static void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_ato
void r600_init_common_atoms(struct r600_context *rctx)
{
- r600_init_atom(&rctx->surface_sync_cmd.atom, r600_emit_surface_sync, 5, EMIT_EARLY);
- r600_init_atom(&rctx->r6xx_flush_and_inv_cmd, r600_emit_r6xx_flush_and_inv, 2, EMIT_EARLY);
- r600_init_atom(&rctx->alphatest_state.atom, r600_emit_alphatest_state, 6, 0);
- r600_atom_dirty(rctx, &rctx->alphatest_state.atom);
+ r600_init_atom(rctx, &rctx->r6xx_flush_and_inv_cmd, 2, r600_emit_r6xx_flush_and_inv, 2);
+ r600_init_atom(rctx, &rctx->surface_sync_cmd.atom, 3, r600_emit_surface_sync, 5);
}
unsigned r600_get_cb_flush_flags(struct r600_context *rctx)
@@ -1127,9 +1129,8 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo)
struct r600_context *rctx = (struct r600_context *)ctx;
struct pipe_draw_info info = *dinfo;
struct pipe_index_buffer ib = {};
- unsigned prim, ls_mask = 0;
+ unsigned prim, ls_mask = 0, i;
struct r600_block *dirty_block = NULL, *next_block = NULL;
- struct r600_atom *state = NULL, *next_state = NULL;
struct radeon_winsys_cs *cs = rctx->cs;
uint64_t va;
uint8_t *ptr;
@@ -1221,8 +1222,11 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo)
/* Emit states (the function expects that we emit at most 17 dwords here). */
r600_need_cs_space(rctx, 0, TRUE);
- LIST_FOR_EACH_ENTRY_SAFE(state, next_state, &rctx->dirty_states, head) {
- r600_emit_atom(rctx, state);
+ for (i = 0; i < R600_MAX_ATOM; i++) {
+ if (rctx->atoms[i] == NULL || !rctx->atoms[i]->dirty) {
+ continue;
+ }
+ r600_emit_atom(rctx, rctx->atoms[i]);
}
LIST_FOR_EACH_ENTRY_SAFE(dirty_block, next_block, &rctx->dirty,list) {
r600_context_block_emit_dirty(rctx, dirty_block, 0 /* pkt_flags */);