summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/r600/r600_hw_context.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/r600/r600_hw_context.c')
-rw-r--r--src/gallium/drivers/r600/r600_hw_context.c335
1 files changed, 335 insertions, 0 deletions
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 52e0be7d5e2..1dba96642aa 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -943,6 +943,9 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
/* Count in queries_suspend. */
num_dw += ctx->num_cs_dw_queries_suspend;
+ /* Count in streamout_end at the end of CS. */
+ num_dw += ctx->num_cs_dw_streamout_end;
+
/* Count in render_condition(NULL) at the end of CS. */
if (ctx->predicate_drawing) {
num_dw += 3;
@@ -1471,6 +1474,12 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
r600_context_block_resource_emit_dirty(ctx, dirty_block);
}
+ /* Enable stream out if needed. */
+ if (ctx->streamout_start) {
+ r600_context_streamout_begin(ctx);
+ ctx->streamout_start = FALSE;
+ }
+
/* draw packet */
pm4 = &ctx->pm4[ctx->pm4_cdwords];
@@ -1503,6 +1512,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
{
struct r600_block *enable_block = NULL;
bool queries_suspended = false;
+ bool streamout_suspended = false;
if (ctx->pm4_cdwords == ctx->init_dwords)
return;
@@ -1513,6 +1523,11 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
queries_suspended = true;
}
+ if (ctx->num_cs_dw_streamout_end) {
+ r600_context_streamout_end(ctx);
+ streamout_suspended = true;
+ }
+
if (ctx->screen->chip_class >= EVERGREEN)
evergreen_context_flush_dest_caches(ctx);
else
@@ -1542,6 +1557,11 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
r600_init_cs(ctx);
+ if (streamout_suspended) {
+ ctx->streamout_start = TRUE;
+ ctx->streamout_append_bitmask = ~0;
+ }
+
/* resume queries */
if (queries_suspended) {
r600_context_queries_resume(ctx);
@@ -1636,6 +1656,44 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu
results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0;
}
break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ /* SAMPLE_STREAMOUTSTATS stores this structure:
+ * {
+ * u64 NumPrimitivesWritten;
+ * u64 PrimitiveStorageNeeded;
+ * }
+ * We only need NumPrimitivesWritten here. */
+ while (results_base != query->results_end) {
+ query->result.u64 +=
+ r600_query_read_result(map + results_base, 2, 6, true);
+ results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0;
+ }
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ /* Here we read PrimitiveStorageNeeded. */
+ while (results_base != query->results_end) {
+ query->result.u64 +=
+ r600_query_read_result(map + results_base, 0, 4, true);
+ results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0;
+ }
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ while (results_base != query->results_end) {
+ query->result.so.num_primitives_written +=
+ r600_query_read_result(map + results_base, 2, 6, true);
+ query->result.so.primitives_storage_needed +=
+ r600_query_read_result(map + results_base, 0, 4, true);
+ results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0;
+ }
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ while (results_base != query->results_end) {
+ query->result.b = query->result.b ||
+ r600_query_read_result(map + results_base, 2, 6, true) !=
+ r600_query_read_result(map + results_base, 0, 4, true);
+ results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0;
+ }
+ break;
default:
assert(0);
}
@@ -1679,6 +1737,15 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
break;
case PIPE_QUERY_TIME_ELAPSED:
break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ results = ctx->ws->buffer_map(query->buffer->buf, ctx->cs, PIPE_TRANSFER_WRITE);
+ results = (u32*)((char*)results + query->results_end);
+ memset(results, 0, query->result_size);
+ ctx->ws->buffer_unmap(query->buffer->buf);
+ break;
default:
assert(0);
}
@@ -1692,6 +1759,15 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
ctx->pm4[ctx->pm4_cdwords++] = query->results_end;
ctx->pm4[ctx->pm4_cdwords++] = 0;
break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
+ ctx->pm4[ctx->pm4_cdwords++] = query->results_end;
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ break;
case PIPE_QUERY_TIME_ELAPSED:
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
@@ -1720,6 +1796,15 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query)
ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8;
ctx->pm4[ctx->pm4_cdwords++] = 0;
break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
+ ctx->pm4[ctx->pm4_cdwords++] = query->results_end + query->result_size/2;
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ break;
case PIPE_QUERY_TIME_ELAPSED:
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
@@ -1798,6 +1883,14 @@ struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned
query->result_size = 16;
query->num_cs_dw = 8;
break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
+ query->result_size = 32;
+ query->num_cs_dw = 6;
+ break;
default:
assert(0);
FREE(query);
@@ -1832,20 +1925,28 @@ boolean r600_context_query_result(struct r600_context *ctx,
{
boolean *result_b = (boolean*)vresult;
uint64_t *result_u64 = (uint64_t*)vresult;
+ struct pipe_query_data_so_statistics *result_so =
+ (struct pipe_query_data_so_statistics*)vresult;
if (!r600_query_result(ctx, query, wait))
return FALSE;
switch (query->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
*result_u64 = query->result.u64;
break;
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
*result_b = query->result.b;
break;
case PIPE_QUERY_TIME_ELAPSED:
*result_u64 = (1000000 * query->result.u64) / ctx->screen->info.r600_clock_crystal_freq;
break;
+ case PIPE_QUERY_SO_STATISTICS:
+ *result_so = query->result.so;
+ break;
default:
assert(0);
}
@@ -1872,3 +1973,237 @@ void r600_context_queries_resume(struct r600_context *ctx)
r600_query_begin(ctx, query);
}
}
+
+static void r600_flush_vgt_streamout(struct r600_context *ctx)
+{
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = (R_008490_CP_STRMOUT_CNTL - R600_CONFIG_REG_OFFSET) >> 2;
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0);
+
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_WAIT_REG_MEM, 5, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = WAIT_REG_MEM_EQUAL; /* wait until the register is equal to the reference value */
+ ctx->pm4[ctx->pm4_cdwords++] = R_008490_CP_STRMOUT_CNTL >> 2; /* register */
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ ctx->pm4[ctx->pm4_cdwords++] = S_008490_OFFSET_UPDATE_DONE(1); /* reference value */
+ ctx->pm4[ctx->pm4_cdwords++] = S_008490_OFFSET_UPDATE_DONE(1); /* mask */
+ ctx->pm4[ctx->pm4_cdwords++] = 4; /* poll interval */
+}
+
+static void r600_set_streamout_enable(struct r600_context *ctx, unsigned buffer_enable_bit)
+{
+ if (buffer_enable_bit) {
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = (R_028AB0_VGT_STRMOUT_EN - R600_CONTEXT_REG_OFFSET) >> 2;
+ ctx->pm4[ctx->pm4_cdwords++] = S_028AB0_STREAMOUT(1);
+
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = (R_028B20_VGT_STRMOUT_BUFFER_EN - R600_CONTEXT_REG_OFFSET) >> 2;
+ ctx->pm4[ctx->pm4_cdwords++] = buffer_enable_bit;
+ } else {
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = (R_028AB0_VGT_STRMOUT_EN - R600_CONTEXT_REG_OFFSET) >> 2;
+ ctx->pm4[ctx->pm4_cdwords++] = S_028AB0_STREAMOUT(0);
+ }
+}
+
+void r600_context_streamout_begin(struct r600_context *ctx)
+{
+ struct r600_so_target **t = ctx->so_targets;
+ unsigned *strides = ctx->vs_shader_so_strides;
+ unsigned buffer_en, i, update_flags = 0;
+
+ buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) |
+ (ctx->num_so_targets >= 2 && t[1] ? 2 : 0) |
+ (ctx->num_so_targets >= 3 && t[2] ? 4 : 0) |
+ (ctx->num_so_targets >= 4 && t[3] ? 8 : 0);
+
+ ctx->num_cs_dw_streamout_end =
+ 12 + /* flush_vgt_streamout */
+ util_bitcount(buffer_en) * 8 +
+ 8;
+
+ r600_need_cs_space(ctx,
+ 12 + /* flush_vgt_streamout */
+ 6 + /* enables */
+ util_bitcount(buffer_en & ctx->streamout_append_bitmask) * 8 +
+ util_bitcount(buffer_en & ~ctx->streamout_append_bitmask) * 6 +
+ (ctx->screen->family > CHIP_R600 && ctx->screen->family < CHIP_RV770 ? 2 : 0) +
+ ctx->num_cs_dw_streamout_end, TRUE);
+
+ if (ctx->screen->chip_class >= EVERGREEN) {
+ evergreen_flush_vgt_streamout(ctx);
+ evergreen_set_streamout_enable(ctx, buffer_en);
+ } else {
+ r600_flush_vgt_streamout(ctx);
+ r600_set_streamout_enable(ctx, buffer_en);
+ }
+
+ for (i = 0; i < ctx->num_so_targets; i++) {
+ if (t[i]) {
+ t[i]->stride = strides[i];
+ t[i]->so_index = i;
+
+ update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i);
+
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONTEXT_REG, 3, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = (R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 +
+ 16*i - R600_CONTEXT_REG_OFFSET) >> 2;
+ ctx->pm4[ctx->pm4_cdwords++] = (t[i]->b.buffer_offset +
+ t[i]->b.buffer_size) >> 2; /* BUFFER_SIZE (in DW) */
+ ctx->pm4[ctx->pm4_cdwords++] = strides[i] >> 2; /* VTX_STRIDE (in DW) */
+ ctx->pm4[ctx->pm4_cdwords++] = 0; /* BUFFER_BASE */
+
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
+ ctx->pm4[ctx->pm4_cdwords++] =
+ r600_context_bo_reloc(ctx, r600_resource(t[i]->b.buffer),
+ RADEON_USAGE_WRITE);
+
+ if (ctx->streamout_append_bitmask & (1 << i)) {
+ /* Append. */
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = STRMOUT_SELECT_BUFFER(i) |
+ STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM); /* control */
+ ctx->pm4[ctx->pm4_cdwords++] = 0; /* unused */
+ ctx->pm4[ctx->pm4_cdwords++] = 0; /* unused */
+ ctx->pm4[ctx->pm4_cdwords++] = 0; /* src address lo */
+ ctx->pm4[ctx->pm4_cdwords++] = 0; /* src address hi */
+
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
+ ctx->pm4[ctx->pm4_cdwords++] =
+ r600_context_bo_reloc(ctx, t[i]->filled_size,
+ RADEON_USAGE_READ);
+ } else {
+ /* Start from the beginning. */
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = STRMOUT_SELECT_BUFFER(i) |
+ STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET); /* control */
+ ctx->pm4[ctx->pm4_cdwords++] = 0; /* unused */
+ ctx->pm4[ctx->pm4_cdwords++] = 0; /* unused */
+ ctx->pm4[ctx->pm4_cdwords++] = t[i]->b.buffer_offset >> 2; /* buffer offset in DW */
+ ctx->pm4[ctx->pm4_cdwords++] = 0; /* unused */
+ }
+ }
+ }
+
+ if (ctx->screen->family > CHIP_R600 && ctx->screen->family < CHIP_RV770) {
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = update_flags;
+ }
+}
+
+void r600_context_streamout_end(struct r600_context *ctx)
+{
+ struct r600_so_target **t = ctx->so_targets;
+ unsigned i, flush_flags = 0;
+
+ if (ctx->screen->chip_class >= EVERGREEN) {
+ evergreen_flush_vgt_streamout(ctx);
+ } else {
+ r600_flush_vgt_streamout(ctx);
+ }
+
+ for (i = 0; i < ctx->num_so_targets; i++) {
+ if (t[i]) {
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = STRMOUT_SELECT_BUFFER(i) |
+ STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
+ STRMOUT_STORE_BUFFER_FILLED_SIZE; /* control */
+ ctx->pm4[ctx->pm4_cdwords++] = 0; /* dst address lo */
+ ctx->pm4[ctx->pm4_cdwords++] = 0; /* dst address hi */
+ ctx->pm4[ctx->pm4_cdwords++] = 0; /* unused */
+ ctx->pm4[ctx->pm4_cdwords++] = 0; /* unused */
+
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
+ ctx->pm4[ctx->pm4_cdwords++] =
+ r600_context_bo_reloc(ctx, t[i]->filled_size,
+ RADEON_USAGE_WRITE);
+
+ flush_flags |= S_0085F0_SO0_DEST_BASE_ENA(1) << i;
+ }
+ }
+
+ if (ctx->screen->chip_class >= EVERGREEN) {
+ evergreen_set_streamout_enable(ctx, 0);
+ } else {
+ r600_set_streamout_enable(ctx, 0);
+ }
+
+ if (ctx->screen->family < CHIP_RV770) {
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0);
+ } else {
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = flush_flags; /* CP_COHER_CNTL */
+ ctx->pm4[ctx->pm4_cdwords++] = 0xffffffff; /* CP_COHER_SIZE */
+ ctx->pm4[ctx->pm4_cdwords++] = 0; /* CP_COHER_BASE */
+ ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A; /* POLL_INTERVAL */
+ }
+
+ ctx->num_cs_dw_streamout_end = 0;
+
+ /* XXX print some debug info */
+ for (i = 0; i < ctx->num_so_targets; i++) {
+ if (!t[i])
+ continue;
+
+ uint32_t *ptr = ctx->ws->buffer_map(t[i]->filled_size->buf, ctx->cs, RADEON_USAGE_READ);
+ printf("FILLED_SIZE%i: %u\n", i, *ptr);
+ ctx->ws->buffer_unmap(t[i]->filled_size->buf);
+ }
+}
+
+void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_target *t)
+{
+ r600_need_cs_space(ctx, 14 + 21, TRUE);
+
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = (R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET - R600_CONTEXT_REG_OFFSET) >> 2;
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = (R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE - R600_CONTEXT_REG_OFFSET) >> 2;
+ ctx->pm4[ctx->pm4_cdwords++] = t->stride >> 2;
+
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_COPY_DW, 4, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = COPY_DW_SRC_IS_MEM | COPY_DW_DST_IS_REG;
+ ctx->pm4[ctx->pm4_cdwords++] = 0; /* src address lo */
+ ctx->pm4[ctx->pm4_cdwords++] = 0; /* src address hi */
+ ctx->pm4[ctx->pm4_cdwords++] = R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2; /* dst register */
+ ctx->pm4[ctx->pm4_cdwords++] = 0; /* unused */
+
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, t->filled_size,
+ RADEON_USAGE_READ);
+
+#if 0 /* I have not found this useful yet. */
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_COPY_DW, 4, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = COPY_DW_SRC_IS_REG | COPY_DW_DST_IS_REG;
+ ctx->pm4[ctx->pm4_cdwords++] = R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2; /* src register */
+ ctx->pm4[ctx->pm4_cdwords++] = 0; /* unused */
+ ctx->pm4[ctx->pm4_cdwords++] = R_0085F4_CP_COHER_SIZE >> 2; /* dst register */
+ ctx->pm4[ctx->pm4_cdwords++] = 0; /* unused */
+
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = (R_0085F0_CP_COHER_CNTL - R600_CONFIG_REG_OFFSET) >> 2;
+ ctx->pm4[ctx->pm4_cdwords++] = S_0085F0_SO0_DEST_BASE_ENA(1) << t->so_index;
+
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = (R_0085F8_CP_COHER_BASE - R600_CONFIG_REG_OFFSET) >> 2;
+ ctx->pm4[ctx->pm4_cdwords++] = t->b.buffer_offset >> 2;
+
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, (struct r600_resource*)t->b.buffer,
+ RADEON_USAGE_WRITE);
+
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_WAIT_REG_MEM, 5, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = WAIT_REG_MEM_EQUAL; /* wait until the register is equal to the reference value */
+ ctx->pm4[ctx->pm4_cdwords++] = R_0085FC_CP_COHER_STATUS >> 2; /* register */
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ ctx->pm4[ctx->pm4_cdwords++] = 0; /* reference value */
+ ctx->pm4[ctx->pm4_cdwords++] = 0xffffffff; /* mask */
+ ctx->pm4[ctx->pm4_cdwords++] = 4; /* poll interval */
+#endif
+}