summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerome Glisse <jglisse@redhat.com>2012-11-01 16:09:40 -0400
committerJerome Glisse <jglisse@redhat.com>2012-12-20 18:23:31 -0500
commit24b1206ab2dcd506aaac3ef656aebc8bc20cd27a (patch)
tree33102906acd5f0f9c749db048aa261271dc12e3b
parentcf5632094ba0c19d570ea47025cf6da75ef8457a (diff)
r600g: rework flusing and synchronization pattern v7
This bring r600g allmost inline with closed source driver when it comes to flushing and synchronization pattern. v2-v4: history lost somewhere in outer space v5: Fix compute size of flushing, use define for flags, update worst case cs size requirement for flush, treat rs780 and newer as r7xx when it comes to streamout. v6: Fix num dw computation for framebuffer state, remove dead code, use define instead of hardcoded value. v7: Remove dead code Signed-off-by: Jerome Glisse <jglisse@redhat.com>
-rw-r--r--src/gallium/drivers/r600/evergreen_compute.c8
-rw-r--r--src/gallium/drivers/r600/evergreen_compute_internal.c4
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c4
-rw-r--r--src/gallium/drivers/r600/r600.h16
-rw-r--r--src/gallium/drivers/r600/r600_hw_context.c178
-rw-r--r--src/gallium/drivers/r600/r600_hw_context_priv.h2
-rw-r--r--src/gallium/drivers/r600/r600_state.c20
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c19
8 files changed, 89 insertions, 162 deletions
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 66b0cc6d92c..ea75d805e06 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -98,7 +98,7 @@ static void evergreen_cs_set_vertex_buffer(
98 98
99 /* The vertex instructions in the compute shaders use the texture cache, 99 /* The vertex instructions in the compute shaders use the texture cache,
100 * so we need to invalidate it. */ 100 * so we need to invalidate it. */
101 rctx->flags |= R600_CONTEXT_TEX_FLUSH; 101 rctx->flags |= R600_CONTEXT_GPU_FLUSH;
102 state->enabled_mask |= 1 << vb_index; 102 state->enabled_mask |= 1 << vb_index;
103 state->dirty_mask |= 1 << vb_index; 103 state->dirty_mask |= 1 << vb_index;
104 state->atom.dirty = true; 104 state->atom.dirty = true;
@@ -329,7 +329,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
329 */ 329 */
330 r600_emit_command_buffer(ctx->cs, &ctx->start_compute_cs_cmd); 330 r600_emit_command_buffer(ctx->cs, &ctx->start_compute_cs_cmd);
331 331
332 ctx->flags |= R600_CONTEXT_CB_FLUSH; 332 ctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
333 r600_flush_emit(ctx); 333 r600_flush_emit(ctx);
334 334
335 /* Emit colorbuffers. */ 335 /* Emit colorbuffers. */
@@ -409,7 +409,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
409 409
410 /* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff 410 /* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff
411 */ 411 */
412 ctx->flags |= R600_CONTEXT_CB_FLUSH; 412 ctx->flags |= R600_CONTEXT_GPU_FLUSH;
413 r600_flush_emit(ctx); 413 r600_flush_emit(ctx);
414 414
415#if 0 415#if 0
@@ -468,7 +468,7 @@ void evergreen_emit_cs_shader(
468 r600_write_value(cs, r600_context_bo_reloc(rctx, kernel->code_bo, 468 r600_write_value(cs, r600_context_bo_reloc(rctx, kernel->code_bo,
469 RADEON_USAGE_READ)); 469 RADEON_USAGE_READ));
470 470
471 rctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH; 471 rctx->flags |= R600_CONTEXT_GPU_FLUSH;
472} 472}
473 473
474static void evergreen_launch_grid( 474static void evergreen_launch_grid(
diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c b/src/gallium/drivers/r600/evergreen_compute_internal.c
index f7aebf22374..94f556f38b3 100644
--- a/src/gallium/drivers/r600/evergreen_compute_internal.c
+++ b/src/gallium/drivers/r600/evergreen_compute_internal.c
@@ -545,7 +545,7 @@ void evergreen_set_tex_resource(
545 util_format_get_blockwidth(tmp->resource.b.b.format) * 545 util_format_get_blockwidth(tmp->resource.b.b.format) *
546 view->base.texture->width0*height*depth; 546 view->base.texture->width0*height*depth;
547 547
548 pipe->ctx->flags |= R600_CONTEXT_TEX_FLUSH; 548 pipe->ctx->flags |= R600_CONTEXT_GPU_FLUSH;
549 549
550 evergreen_emit_force_reloc(res); 550 evergreen_emit_force_reloc(res);
551 evergreen_emit_force_reloc(res); 551 evergreen_emit_force_reloc(res);
@@ -604,7 +604,7 @@ void evergreen_set_const_cache(
604 res->usage = RADEON_USAGE_READ; 604 res->usage = RADEON_USAGE_READ;
605 res->coher_bo_size = size; 605 res->coher_bo_size = size;
606 606
607 pipe->ctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH; 607 pipe->ctx->flags |= R600_CONTEXT_GPU_FLUSH;
608} 608}
609 609
610struct r600_resource* r600_compute_buffer_alloc_vram( 610struct r600_resource* r600_compute_buffer_alloc_vram(
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 996c1b48b9a..58964c47675 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1557,14 +1557,14 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
1557 uint32_t i, log_samples; 1557 uint32_t i, log_samples;
1558 1558
1559 if (rctx->framebuffer.state.nr_cbufs) { 1559 if (rctx->framebuffer.state.nr_cbufs) {
1560 rctx->flags |= R600_CONTEXT_CB_FLUSH; 1560 rctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
1561 1561
1562 if (rctx->framebuffer.state.cbufs[0]->texture->nr_samples > 1) { 1562 if (rctx->framebuffer.state.cbufs[0]->texture->nr_samples > 1) {
1563 rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_CB_META; 1563 rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_CB_META;
1564 } 1564 }
1565 } 1565 }
1566 if (rctx->framebuffer.state.zsbuf) { 1566 if (rctx->framebuffer.state.zsbuf) {
1567 rctx->flags |= R600_CONTEXT_DB_FLUSH; 1567 rctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
1568 } 1568 }
1569 1569
1570 util_copy_framebuffer_state(&rctx->framebuffer.state, state); 1570 util_copy_framebuffer_state(&rctx->framebuffer.state, state);
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index d15cd5256fa..c351982aa27 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -182,17 +182,11 @@ struct r600_so_target {
182 unsigned so_index; 182 unsigned so_index;
183}; 183};
184 184
185#define R600_CONTEXT_PS_PARTIAL_FLUSH (1 << 0) 185#define R600_CONTEXT_GPU_FLUSH (1 << 0)
186#define R600_CONTEXT_CB_FLUSH (1 << 1) 186#define R600_CONTEXT_STREAMOUT_FLUSH (1 << 1)
187#define R600_CONTEXT_DB_FLUSH (1 << 2) 187#define R600_CONTEXT_WAIT_IDLE (1 << 2)
188#define R600_CONTEXT_SHADERCONST_FLUSH (1 << 3) 188#define R600_CONTEXT_FLUSH_AND_INV (1 << 3)
189#define R600_CONTEXT_TEX_FLUSH (1 << 4) 189#define R600_CONTEXT_FLUSH_AND_INV_CB_META (1 << 4)
190#define R600_CONTEXT_VTX_FLUSH (1 << 5)
191#define R600_CONTEXT_STREAMOUT_FLUSH (1 << 6)
192#define R600_CONTEXT_WAIT_IDLE (1 << 7)
193#define R600_CONTEXT_FLUSH_AND_INV (1 << 8)
194#define R600_CONTEXT_HTILE_ERRATA (1 << 9)
195#define R600_CONTEXT_FLUSH_AND_INV_CB_META (1 << 10)
196 190
197struct r600_context; 191struct r600_context;
198struct r600_screen; 192struct r600_screen;
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index c7a357e15ed..8a22b885d2a 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -424,7 +424,7 @@ void r600_context_dirty_block(struct r600_context *ctx,
424 LIST_ADDTAIL(&block->list,&ctx->dirty); 424 LIST_ADDTAIL(&block->list,&ctx->dirty);
425 425
426 if (block->flags & REG_FLAG_FLUSH_CHANGE) { 426 if (block->flags & REG_FLAG_FLUSH_CHANGE) {
427 ctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; 427 ctx->flags |= R600_CONTEXT_WAIT_IDLE;
428 } 428 }
429 } 429 }
430} 430}
@@ -595,16 +595,13 @@ out:
595void r600_flush_emit(struct r600_context *rctx) 595void r600_flush_emit(struct r600_context *rctx)
596{ 596{
597 struct radeon_winsys_cs *cs = rctx->cs; 597 struct radeon_winsys_cs *cs = rctx->cs;
598 unsigned cp_coher_cntl = 0;
599 unsigned emit_flush = 0;
598 600
599 if (!rctx->flags) { 601 if (!rctx->flags) {
600 return; 602 return;
601 } 603 }
602 604
603 if (rctx->flags & R600_CONTEXT_PS_PARTIAL_FLUSH) {
604 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
605 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
606 }
607
608 if (rctx->chip_class >= R700 && 605 if (rctx->chip_class >= R700 &&
609 (rctx->flags & R600_CONTEXT_FLUSH_AND_INV_CB_META)) { 606 (rctx->flags & R600_CONTEXT_FLUSH_AND_INV_CB_META)) {
610 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); 607 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
@@ -614,110 +611,54 @@ void r600_flush_emit(struct r600_context *rctx)
614 if (rctx->flags & R600_CONTEXT_FLUSH_AND_INV) { 611 if (rctx->flags & R600_CONTEXT_FLUSH_AND_INV) {
615 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); 612 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
616 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0); 613 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0);
617 614 if (rctx->chip_class >= EVERGREEN) {
618 /* DB flushes are special due to errata with hyperz, we need to 615 cp_coher_cntl = S_0085F0_CB0_DEST_BASE_ENA(1) |
619 * insert a no-op, so that the cache has time to really flush. 616 S_0085F0_CB1_DEST_BASE_ENA(1) |
620 */ 617 S_0085F0_CB2_DEST_BASE_ENA(1) |
621 if (rctx->chip_class <= R700 && 618 S_0085F0_CB3_DEST_BASE_ENA(1) |
622 rctx->flags & R600_CONTEXT_HTILE_ERRATA) { 619 S_0085F0_CB4_DEST_BASE_ENA(1) |
623 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 31, 0); 620 S_0085F0_CB5_DEST_BASE_ENA(1) |
624 cs->buf[cs->cdw++] = 0xdeadcafe; 621 S_0085F0_CB6_DEST_BASE_ENA(1) |
625 cs->buf[cs->cdw++] = 0xdeadcafe; 622 S_0085F0_CB7_DEST_BASE_ENA(1) |
626 cs->buf[cs->cdw++] = 0xdeadcafe; 623 S_0085F0_CB8_DEST_BASE_ENA(1) |
627 cs->buf[cs->cdw++] = 0xdeadcafe; 624 S_0085F0_CB9_DEST_BASE_ENA(1) |
628 cs->buf[cs->cdw++] = 0xdeadcafe; 625 S_0085F0_CB10_DEST_BASE_ENA(1) |
629 cs->buf[cs->cdw++] = 0xdeadcafe; 626 S_0085F0_CB11_DEST_BASE_ENA(1) |
630 cs->buf[cs->cdw++] = 0xdeadcafe; 627 S_0085F0_DB_DEST_BASE_ENA(1) |
631 cs->buf[cs->cdw++] = 0xdeadcafe; 628 S_0085F0_TC_ACTION_ENA(1) |
632 cs->buf[cs->cdw++] = 0xdeadcafe; 629 S_0085F0_CB_ACTION_ENA(1) |
633 cs->buf[cs->cdw++] = 0xdeadcafe; 630 S_0085F0_DB_ACTION_ENA(1) |
634 cs->buf[cs->cdw++] = 0xdeadcafe; 631 S_0085F0_SH_ACTION_ENA(1) |
635 cs->buf[cs->cdw++] = 0xdeadcafe; 632 S_0085F0_SMX_ACTION_ENA(1) |
636 cs->buf[cs->cdw++] = 0xdeadcafe; 633 (1 << 20); /* unknown bit */
637 cs->buf[cs->cdw++] = 0xdeadcafe; 634 } else {
638 cs->buf[cs->cdw++] = 0xdeadcafe; 635 cp_coher_cntl = S_0085F0_SMX_ACTION_ENA(1) |
639 cs->buf[cs->cdw++] = 0xdeadcafe; 636 S_0085F0_SH_ACTION_ENA(1) |
640 cs->buf[cs->cdw++] = 0xdeadcafe; 637 S_0085F0_VC_ACTION_ENA(1) |
641 cs->buf[cs->cdw++] = 0xdeadcafe; 638 S_0085F0_TC_ACTION_ENA(1) |
642 cs->buf[cs->cdw++] = 0xdeadcafe; 639 (1 << 20); /* unknown bit */
643 cs->buf[cs->cdw++] = 0xdeadcafe;
644 cs->buf[cs->cdw++] = 0xdeadcafe;
645 cs->buf[cs->cdw++] = 0xdeadcafe;
646 cs->buf[cs->cdw++] = 0xdeadcafe;
647 cs->buf[cs->cdw++] = 0xdeadcafe;
648 cs->buf[cs->cdw++] = 0xdeadcafe;
649 cs->buf[cs->cdw++] = 0xdeadcafe;
650 cs->buf[cs->cdw++] = 0xdeadcafe;
651 cs->buf[cs->cdw++] = 0xdeadcafe;
652 cs->buf[cs->cdw++] = 0xdeadcafe;
653 cs->buf[cs->cdw++] = 0xdeadcafe;
654 cs->buf[cs->cdw++] = 0xdeadcafe;
655 cs->buf[cs->cdw++] = 0xdeadcafe;
656 } 640 }
657 } 641 }
658 642
659 if (rctx->flags & (R600_CONTEXT_CB_FLUSH | 643 if (rctx->flags & R600_CONTEXT_GPU_FLUSH) {
660 R600_CONTEXT_DB_FLUSH | 644 cp_coher_cntl |= S_0085F0_VC_ACTION_ENA(1) |
661 R600_CONTEXT_SHADERCONST_FLUSH | 645 S_0085F0_TC_ACTION_ENA(1) |
662 R600_CONTEXT_TEX_FLUSH | 646 (1 << 20); /* unknown bit */
663 R600_CONTEXT_VTX_FLUSH | 647 emit_flush = 1;
664 R600_CONTEXT_STREAMOUT_FLUSH)) { 648 }
665 /* anything left (cb, vtx, shader, streamout) can be flushed
666 * using the surface sync packet
667 */
668 unsigned flags = 0;
669
670 if (rctx->flags & R600_CONTEXT_CB_FLUSH) {
671 flags |= S_0085F0_CB_ACTION_ENA(1) |
672 S_0085F0_CB0_DEST_BASE_ENA(1) |
673 S_0085F0_CB1_DEST_BASE_ENA(1) |
674 S_0085F0_CB2_DEST_BASE_ENA(1) |
675 S_0085F0_CB3_DEST_BASE_ENA(1) |
676 S_0085F0_CB4_DEST_BASE_ENA(1) |
677 S_0085F0_CB5_DEST_BASE_ENA(1) |
678 S_0085F0_CB6_DEST_BASE_ENA(1) |
679 S_0085F0_CB7_DEST_BASE_ENA(1);
680
681 if (rctx->chip_class >= EVERGREEN) {
682 flags |= S_0085F0_CB8_DEST_BASE_ENA(1) |
683 S_0085F0_CB9_DEST_BASE_ENA(1) |
684 S_0085F0_CB10_DEST_BASE_ENA(1) |
685 S_0085F0_CB11_DEST_BASE_ENA(1);
686 }
687
688 /* RV670 errata
689 * (CB1_DEST_BASE_ENA is also required, which is
690 * included unconditionally above). */
691 if (rctx->family == CHIP_RV670 ||
692 rctx->family == CHIP_RS780 ||
693 rctx->family == CHIP_RS880) {
694 flags |= S_0085F0_DEST_BASE_0_ENA(1);
695 }
696 }
697
698 if (rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) {
699 flags |= S_0085F0_SO0_DEST_BASE_ENA(1) |
700 S_0085F0_SO1_DEST_BASE_ENA(1) |
701 S_0085F0_SO2_DEST_BASE_ENA(1) |
702 S_0085F0_SO3_DEST_BASE_ENA(1) |
703 S_0085F0_SMX_ACTION_ENA(1);
704
705 /* RV670 errata */
706 if (rctx->family == CHIP_RV670 ||
707 rctx->family == CHIP_RS780 ||
708 rctx->family == CHIP_RS880) {
709 flags |= S_0085F0_DEST_BASE_0_ENA(1);
710 }
711 }
712 649
713 flags |= (rctx->flags & R600_CONTEXT_DB_FLUSH) ? S_0085F0_DB_ACTION_ENA(1) | 650 if (rctx->family >= CHIP_RV770 && rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) {
714 S_0085F0_DB_DEST_BASE_ENA(1): 0; 651 cp_coher_cntl |= S_0085F0_SO0_DEST_BASE_ENA(1) |
715 flags |= (rctx->flags & R600_CONTEXT_SHADERCONST_FLUSH) ? S_0085F0_SH_ACTION_ENA(1) : 0; 652 S_0085F0_SO1_DEST_BASE_ENA(1) |
716 flags |= (rctx->flags & R600_CONTEXT_TEX_FLUSH) ? S_0085F0_TC_ACTION_ENA(1) : 0; 653 S_0085F0_SO2_DEST_BASE_ENA(1) |
717 flags |= (rctx->flags & R600_CONTEXT_VTX_FLUSH) ? S_0085F0_VC_ACTION_ENA(1) : 0; 654 S_0085F0_SO3_DEST_BASE_ENA(1) |
655 S_0085F0_SMX_ACTION_ENA(1);
656 emit_flush = 1;
657 }
718 658
659 if (emit_flush) {
719 cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); 660 cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
720 cs->buf[cs->cdw++] = flags; /* CP_COHER_CNTL */ 661 cs->buf[cs->cdw++] = cp_coher_cntl; /* CP_COHER_CNTL */
721 cs->buf[cs->cdw++] = 0xffffffff; /* CP_COHER_SIZE */ 662 cs->buf[cs->cdw++] = 0xffffffff; /* CP_COHER_SIZE */
722 cs->buf[cs->cdw++] = 0; /* CP_COHER_BASE */ 663 cs->buf[cs->cdw++] = 0; /* CP_COHER_BASE */
723 cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */ 664 cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */
@@ -758,16 +699,10 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
758 ctx->streamout_suspended = true; 699 ctx->streamout_suspended = true;
759 } 700 }
760 701
761 /* partial flush is needed to avoid lockups on some chips with user fences */ 702 /* flush is needed to avoid lockups on some chips with user fences
762 ctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; 703 * this will also flush the framebuffer cache
763 704 */
764 /* flush the framebuffer */ 705 ctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
765 ctx->flags |= R600_CONTEXT_CB_FLUSH | R600_CONTEXT_DB_FLUSH;
766
767 /* R6xx errata */
768 if (ctx->chip_class == R600) {
769 ctx->flags |= R600_CONTEXT_FLUSH_AND_INV;
770 }
771 706
772 r600_flush_emit(ctx); 707 r600_flush_emit(ctx);
773 708
@@ -884,9 +819,7 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fen
884 va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo); 819 va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo);
885 va = va + (offset << 2); 820 va = va + (offset << 2);
886 821
887 ctx->flags &= ~R600_CONTEXT_PS_PARTIAL_FLUSH; 822 r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
888 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
889 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
890 823
891 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); 824 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
892 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); 825 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
@@ -1073,15 +1006,14 @@ void r600_context_streamout_end(struct r600_context *ctx)
1073 } 1006 }
1074 1007
1075 if (ctx->chip_class >= EVERGREEN) { 1008 if (ctx->chip_class >= EVERGREEN) {
1009 ctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH;
1076 evergreen_set_streamout_enable(ctx, 0); 1010 evergreen_set_streamout_enable(ctx, 0);
1077 } else { 1011 } else {
1012 if (ctx->chip_class >= R700) {
1013 ctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH;
1014 }
1078 r600_set_streamout_enable(ctx, 0); 1015 r600_set_streamout_enable(ctx, 0);
1079 } 1016 }
1080 ctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH; 1017 ctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
1081
1082 /* R6xx errata */
1083 if (ctx->chip_class == R600) {
1084 ctx->flags |= R600_CONTEXT_FLUSH_AND_INV;
1085 }
1086 ctx->num_cs_dw_streamout_end = 0; 1018 ctx->num_cs_dw_streamout_end = 0;
1087} 1019}
diff --git a/src/gallium/drivers/r600/r600_hw_context_priv.h b/src/gallium/drivers/r600/r600_hw_context_priv.h
index c59a9abf3f9..050c472fa8c 100644
--- a/src/gallium/drivers/r600/r600_hw_context_priv.h
+++ b/src/gallium/drivers/r600/r600_hw_context_priv.h
@@ -29,7 +29,7 @@
29#include "r600_pipe.h" 29#include "r600_pipe.h"
30 30
31/* the number of CS dwords for flushing and drawing */ 31/* the number of CS dwords for flushing and drawing */
32#define R600_MAX_FLUSH_CS_DWORDS 46 32#define R600_MAX_FLUSH_CS_DWORDS 12
33#define R600_MAX_DRAW_CS_DWORDS 34 33#define R600_MAX_DRAW_CS_DWORDS 34
34 34
35/* these flags are used in register flags and added into block flags */ 35/* these flags are used in register flags and added into block flags */
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 9bfae4f2299..f969808603b 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1452,7 +1452,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
1452 unsigned i; 1452 unsigned i;
1453 1453
1454 if (rctx->framebuffer.state.nr_cbufs) { 1454 if (rctx->framebuffer.state.nr_cbufs) {
1455 rctx->flags |= R600_CONTEXT_CB_FLUSH; 1455 rctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
1456 1456
1457 if (rctx->chip_class >= R700 && 1457 if (rctx->chip_class >= R700 &&
1458 rctx->framebuffer.state.cbufs[0]->texture->nr_samples > 1) { 1458 rctx->framebuffer.state.cbufs[0]->texture->nr_samples > 1) {
@@ -1460,11 +1460,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
1460 } 1460 }
1461 } 1461 }
1462 if (rctx->framebuffer.state.zsbuf) { 1462 if (rctx->framebuffer.state.zsbuf) {
1463 rctx->flags |= R600_CONTEXT_DB_FLUSH; 1463 rctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
1464 }
1465 /* R6xx errata */
1466 if (rctx->chip_class == R600) {
1467 rctx->flags |= R600_CONTEXT_FLUSH_AND_INV;
1468 } 1464 }
1469 1465
1470 /* Set the new state. */ 1466 /* Set the new state. */
@@ -1558,7 +1554,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
1558 1554
1559 } 1555 }
1560 if (rctx->framebuffer.state.zsbuf) { 1556 if (rctx->framebuffer.state.zsbuf) {
1561 rctx->framebuffer.atom.num_dw += 16; 1557 rctx->framebuffer.atom.num_dw += 18;
1562 } else if (rctx->screen->info.drm_minor >= 18) { 1558 } else if (rctx->screen->info.drm_minor >= 18) {
1563 rctx->framebuffer.atom.num_dw += 3; 1559 rctx->framebuffer.atom.num_dw += 3;
1564 } 1560 }
@@ -1742,6 +1738,13 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
1742 sbu |= SURFACE_BASE_UPDATE_COLOR_NUM(nr_cbufs); 1738 sbu |= SURFACE_BASE_UPDATE_COLOR_NUM(nr_cbufs);
1743 } 1739 }
1744 1740
1741 /* SURFACE_BASE_UPDATE */
1742 if (rctx->family > CHIP_R600 && rctx->family < CHIP_RV770 && sbu) {
1743 r600_write_value(cs, PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0));
1744 r600_write_value(cs, sbu);
1745 sbu = 0;
1746 }
1747
1745 /* Zbuffer. */ 1748 /* Zbuffer. */
1746 if (state->zsbuf) { 1749 if (state->zsbuf) {
1747 struct r600_surface *surf = (struct r600_surface*)state->zsbuf; 1750 struct r600_surface *surf = (struct r600_surface*)state->zsbuf;
@@ -1775,6 +1778,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
1775 if (rctx->family > CHIP_R600 && rctx->family < CHIP_RV770 && sbu) { 1778 if (rctx->family > CHIP_R600 && rctx->family < CHIP_RV770 && sbu) {
1776 r600_write_value(cs, PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0)); 1779 r600_write_value(cs, PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0));
1777 r600_write_value(cs, sbu); 1780 r600_write_value(cs, sbu);
1781 sbu = 0;
1778 } 1782 }
1779 1783
1780 /* Framebuffer dimensions. */ 1784 /* Framebuffer dimensions. */
@@ -2243,7 +2247,7 @@ bool r600_adjust_gprs(struct r600_context *rctx)
2243 if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp) { 2247 if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp) {
2244 rctx->config_state.sq_gpr_resource_mgmt_1 = tmp; 2248 rctx->config_state.sq_gpr_resource_mgmt_1 = tmp;
2245 rctx->config_state.atom.dirty = true; 2249 rctx->config_state.atom.dirty = true;
2246 rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; 2250 rctx->flags |= R600_CONTEXT_WAIT_IDLE;
2247 } 2251 }
2248 return true; 2252 return true;
2249} 2253}
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index b20f6550c1c..c69149bf0b6 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -88,12 +88,9 @@ static void r600_texture_barrier(struct pipe_context *ctx)
88{ 88{
89 struct r600_context *rctx = (struct r600_context *)ctx; 89 struct r600_context *rctx = (struct r600_context *)ctx;
90 90
91 rctx->flags |= R600_CONTEXT_CB_FLUSH | R600_CONTEXT_TEX_FLUSH; 91 rctx->flags |= R600_CONTEXT_WAIT_IDLE;
92 92 rctx->flags |= R600_CONTEXT_GPU_FLUSH;
93 /* R6xx errata */ 93 rctx->flags |= R600_CONTEXT_FLUSH_AND_INV;
94 if (rctx->chip_class == R600) {
95 rctx->flags |= R600_CONTEXT_FLUSH_AND_INV;
96 }
97} 94}
98 95
99static unsigned r600_conv_pipe_prim(unsigned prim) 96static unsigned r600_conv_pipe_prim(unsigned prim)
@@ -360,7 +357,7 @@ void r600_sampler_states_dirty(struct r600_context *rctx,
360{ 357{
361 if (state->dirty_mask) { 358 if (state->dirty_mask) {
362 if (state->dirty_mask & state->has_bordercolor_mask) { 359 if (state->dirty_mask & state->has_bordercolor_mask) {
363 rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; 360 rctx->flags |= R600_CONTEXT_WAIT_IDLE;
364 } 361 }
365 state->atom.num_dw = 362 state->atom.num_dw =
366 util_bitcount(state->dirty_mask & state->has_bordercolor_mask) * 11 + 363 util_bitcount(state->dirty_mask & state->has_bordercolor_mask) * 11 +
@@ -423,7 +420,7 @@ static void r600_bind_sampler_states(struct pipe_context *pipe,
423 seamless_cube_map != -1 && 420 seamless_cube_map != -1 &&
424 seamless_cube_map != rctx->seamless_cube_map.enabled) { 421 seamless_cube_map != rctx->seamless_cube_map.enabled) {
425 /* change in TA_CNTL_AUX need a pipeline flush */ 422 /* change in TA_CNTL_AUX need a pipeline flush */
426 rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; 423 rctx->flags |= R600_CONTEXT_WAIT_IDLE;
427 rctx->seamless_cube_map.enabled = seamless_cube_map; 424 rctx->seamless_cube_map.enabled = seamless_cube_map;
428 rctx->seamless_cube_map.atom.dirty = true; 425 rctx->seamless_cube_map.atom.dirty = true;
429 } 426 }
@@ -491,7 +488,7 @@ static void r600_set_index_buffer(struct pipe_context *ctx,
491void r600_vertex_buffers_dirty(struct r600_context *rctx) 488void r600_vertex_buffers_dirty(struct r600_context *rctx)
492{ 489{
493 if (rctx->vertex_buffer_state.dirty_mask) { 490 if (rctx->vertex_buffer_state.dirty_mask) {
494 rctx->flags |= rctx->has_vertex_cache ? R600_CONTEXT_VTX_FLUSH : R600_CONTEXT_TEX_FLUSH; 491 rctx->flags |= R600_CONTEXT_GPU_FLUSH;
495 rctx->vertex_buffer_state.atom.num_dw = (rctx->chip_class >= EVERGREEN ? 12 : 11) * 492 rctx->vertex_buffer_state.atom.num_dw = (rctx->chip_class >= EVERGREEN ? 12 : 11) *
496 util_bitcount(rctx->vertex_buffer_state.dirty_mask); 493 util_bitcount(rctx->vertex_buffer_state.dirty_mask);
497 rctx->vertex_buffer_state.atom.dirty = true; 494 rctx->vertex_buffer_state.atom.dirty = true;
@@ -547,7 +544,7 @@ void r600_sampler_views_dirty(struct r600_context *rctx,
547 struct r600_samplerview_state *state) 544 struct r600_samplerview_state *state)
548{ 545{
549 if (state->dirty_mask) { 546 if (state->dirty_mask) {
550 rctx->flags |= R600_CONTEXT_TEX_FLUSH; 547 rctx->flags |= R600_CONTEXT_GPU_FLUSH;
551 state->atom.num_dw = (rctx->chip_class >= EVERGREEN ? 14 : 13) * 548 state->atom.num_dw = (rctx->chip_class >= EVERGREEN ? 14 : 13) *
552 util_bitcount(state->dirty_mask); 549 util_bitcount(state->dirty_mask);
553 state->atom.dirty = true; 550 state->atom.dirty = true;
@@ -889,7 +886,7 @@ static void r600_delete_vs_state(struct pipe_context *ctx, void *state)
889void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state) 886void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state)
890{ 887{
891 if (state->dirty_mask) { 888 if (state->dirty_mask) {
892 rctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH; 889 rctx->flags |= R600_CONTEXT_GPU_FLUSH;
893 state->atom.num_dw = rctx->chip_class >= EVERGREEN ? util_bitcount(state->dirty_mask)*20 890 state->atom.num_dw = rctx->chip_class >= EVERGREEN ? util_bitcount(state->dirty_mask)*20
894 : util_bitcount(state->dirty_mask)*19; 891 : util_bitcount(state->dirty_mask)*19;
895 state->atom.dirty = true; 892 state->atom.dirty = true;