diff options
author | Jerome Glisse <jglisse@redhat.com> | 2012-11-01 16:09:40 -0400 |
---|---|---|
committer | Jerome Glisse <jglisse@redhat.com> | 2012-12-20 18:23:31 -0500 |
commit | 24b1206ab2dcd506aaac3ef656aebc8bc20cd27a (patch) | |
tree | 33102906acd5f0f9c749db048aa261271dc12e3b | |
parent | cf5632094ba0c19d570ea47025cf6da75ef8457a (diff) |
r600g: rework flusing and synchronization pattern v7
This bring r600g allmost inline with closed source driver when
it comes to flushing and synchronization pattern.
v2-v4: history lost somewhere in outer space
v5: Fix compute size of flushing, use define for flags, update
worst case cs size requirement for flush, treat rs780 and
newer as r7xx when it comes to streamout.
v6: Fix num dw computation for framebuffer state, remove dead
code, use define instead of hardcoded value.
v7: Remove dead code
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
-rw-r--r-- | src/gallium/drivers/r600/evergreen_compute.c | 8 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_compute_internal.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_state.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600.h | 16 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_hw_context.c | 178 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_hw_context_priv.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state.c | 20 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state_common.c | 19 |
8 files changed, 89 insertions, 162 deletions
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 66b0cc6d92c..ea75d805e06 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c | |||
@@ -98,7 +98,7 @@ static void evergreen_cs_set_vertex_buffer( | |||
98 | 98 | ||
99 | /* The vertex instructions in the compute shaders use the texture cache, | 99 | /* The vertex instructions in the compute shaders use the texture cache, |
100 | * so we need to invalidate it. */ | 100 | * so we need to invalidate it. */ |
101 | rctx->flags |= R600_CONTEXT_TEX_FLUSH; | 101 | rctx->flags |= R600_CONTEXT_GPU_FLUSH; |
102 | state->enabled_mask |= 1 << vb_index; | 102 | state->enabled_mask |= 1 << vb_index; |
103 | state->dirty_mask |= 1 << vb_index; | 103 | state->dirty_mask |= 1 << vb_index; |
104 | state->atom.dirty = true; | 104 | state->atom.dirty = true; |
@@ -329,7 +329,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, | |||
329 | */ | 329 | */ |
330 | r600_emit_command_buffer(ctx->cs, &ctx->start_compute_cs_cmd); | 330 | r600_emit_command_buffer(ctx->cs, &ctx->start_compute_cs_cmd); |
331 | 331 | ||
332 | ctx->flags |= R600_CONTEXT_CB_FLUSH; | 332 | ctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV; |
333 | r600_flush_emit(ctx); | 333 | r600_flush_emit(ctx); |
334 | 334 | ||
335 | /* Emit colorbuffers. */ | 335 | /* Emit colorbuffers. */ |
@@ -409,7 +409,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, | |||
409 | 409 | ||
410 | /* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff | 410 | /* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff |
411 | */ | 411 | */ |
412 | ctx->flags |= R600_CONTEXT_CB_FLUSH; | 412 | ctx->flags |= R600_CONTEXT_GPU_FLUSH; |
413 | r600_flush_emit(ctx); | 413 | r600_flush_emit(ctx); |
414 | 414 | ||
415 | #if 0 | 415 | #if 0 |
@@ -468,7 +468,7 @@ void evergreen_emit_cs_shader( | |||
468 | r600_write_value(cs, r600_context_bo_reloc(rctx, kernel->code_bo, | 468 | r600_write_value(cs, r600_context_bo_reloc(rctx, kernel->code_bo, |
469 | RADEON_USAGE_READ)); | 469 | RADEON_USAGE_READ)); |
470 | 470 | ||
471 | rctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH; | 471 | rctx->flags |= R600_CONTEXT_GPU_FLUSH; |
472 | } | 472 | } |
473 | 473 | ||
474 | static void evergreen_launch_grid( | 474 | static void evergreen_launch_grid( |
diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c b/src/gallium/drivers/r600/evergreen_compute_internal.c index f7aebf22374..94f556f38b3 100644 --- a/src/gallium/drivers/r600/evergreen_compute_internal.c +++ b/src/gallium/drivers/r600/evergreen_compute_internal.c | |||
@@ -545,7 +545,7 @@ void evergreen_set_tex_resource( | |||
545 | util_format_get_blockwidth(tmp->resource.b.b.format) * | 545 | util_format_get_blockwidth(tmp->resource.b.b.format) * |
546 | view->base.texture->width0*height*depth; | 546 | view->base.texture->width0*height*depth; |
547 | 547 | ||
548 | pipe->ctx->flags |= R600_CONTEXT_TEX_FLUSH; | 548 | pipe->ctx->flags |= R600_CONTEXT_GPU_FLUSH; |
549 | 549 | ||
550 | evergreen_emit_force_reloc(res); | 550 | evergreen_emit_force_reloc(res); |
551 | evergreen_emit_force_reloc(res); | 551 | evergreen_emit_force_reloc(res); |
@@ -604,7 +604,7 @@ void evergreen_set_const_cache( | |||
604 | res->usage = RADEON_USAGE_READ; | 604 | res->usage = RADEON_USAGE_READ; |
605 | res->coher_bo_size = size; | 605 | res->coher_bo_size = size; |
606 | 606 | ||
607 | pipe->ctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH; | 607 | pipe->ctx->flags |= R600_CONTEXT_GPU_FLUSH; |
608 | } | 608 | } |
609 | 609 | ||
610 | struct r600_resource* r600_compute_buffer_alloc_vram( | 610 | struct r600_resource* r600_compute_buffer_alloc_vram( |
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 996c1b48b9a..58964c47675 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c | |||
@@ -1557,14 +1557,14 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, | |||
1557 | uint32_t i, log_samples; | 1557 | uint32_t i, log_samples; |
1558 | 1558 | ||
1559 | if (rctx->framebuffer.state.nr_cbufs) { | 1559 | if (rctx->framebuffer.state.nr_cbufs) { |
1560 | rctx->flags |= R600_CONTEXT_CB_FLUSH; | 1560 | rctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV; |
1561 | 1561 | ||
1562 | if (rctx->framebuffer.state.cbufs[0]->texture->nr_samples > 1) { | 1562 | if (rctx->framebuffer.state.cbufs[0]->texture->nr_samples > 1) { |
1563 | rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_CB_META; | 1563 | rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_CB_META; |
1564 | } | 1564 | } |
1565 | } | 1565 | } |
1566 | if (rctx->framebuffer.state.zsbuf) { | 1566 | if (rctx->framebuffer.state.zsbuf) { |
1567 | rctx->flags |= R600_CONTEXT_DB_FLUSH; | 1567 | rctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV; |
1568 | } | 1568 | } |
1569 | 1569 | ||
1570 | util_copy_framebuffer_state(&rctx->framebuffer.state, state); | 1570 | util_copy_framebuffer_state(&rctx->framebuffer.state, state); |
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index d15cd5256fa..c351982aa27 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h | |||
@@ -182,17 +182,11 @@ struct r600_so_target { | |||
182 | unsigned so_index; | 182 | unsigned so_index; |
183 | }; | 183 | }; |
184 | 184 | ||
185 | #define R600_CONTEXT_PS_PARTIAL_FLUSH (1 << 0) | 185 | #define R600_CONTEXT_GPU_FLUSH (1 << 0) |
186 | #define R600_CONTEXT_CB_FLUSH (1 << 1) | 186 | #define R600_CONTEXT_STREAMOUT_FLUSH (1 << 1) |
187 | #define R600_CONTEXT_DB_FLUSH (1 << 2) | 187 | #define R600_CONTEXT_WAIT_IDLE (1 << 2) |
188 | #define R600_CONTEXT_SHADERCONST_FLUSH (1 << 3) | 188 | #define R600_CONTEXT_FLUSH_AND_INV (1 << 3) |
189 | #define R600_CONTEXT_TEX_FLUSH (1 << 4) | 189 | #define R600_CONTEXT_FLUSH_AND_INV_CB_META (1 << 4) |
190 | #define R600_CONTEXT_VTX_FLUSH (1 << 5) | ||
191 | #define R600_CONTEXT_STREAMOUT_FLUSH (1 << 6) | ||
192 | #define R600_CONTEXT_WAIT_IDLE (1 << 7) | ||
193 | #define R600_CONTEXT_FLUSH_AND_INV (1 << 8) | ||
194 | #define R600_CONTEXT_HTILE_ERRATA (1 << 9) | ||
195 | #define R600_CONTEXT_FLUSH_AND_INV_CB_META (1 << 10) | ||
196 | 190 | ||
197 | struct r600_context; | 191 | struct r600_context; |
198 | struct r600_screen; | 192 | struct r600_screen; |
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index c7a357e15ed..8a22b885d2a 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c | |||
@@ -424,7 +424,7 @@ void r600_context_dirty_block(struct r600_context *ctx, | |||
424 | LIST_ADDTAIL(&block->list,&ctx->dirty); | 424 | LIST_ADDTAIL(&block->list,&ctx->dirty); |
425 | 425 | ||
426 | if (block->flags & REG_FLAG_FLUSH_CHANGE) { | 426 | if (block->flags & REG_FLAG_FLUSH_CHANGE) { |
427 | ctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; | 427 | ctx->flags |= R600_CONTEXT_WAIT_IDLE; |
428 | } | 428 | } |
429 | } | 429 | } |
430 | } | 430 | } |
@@ -595,16 +595,13 @@ out: | |||
595 | void r600_flush_emit(struct r600_context *rctx) | 595 | void r600_flush_emit(struct r600_context *rctx) |
596 | { | 596 | { |
597 | struct radeon_winsys_cs *cs = rctx->cs; | 597 | struct radeon_winsys_cs *cs = rctx->cs; |
598 | unsigned cp_coher_cntl = 0; | ||
599 | unsigned emit_flush = 0; | ||
598 | 600 | ||
599 | if (!rctx->flags) { | 601 | if (!rctx->flags) { |
600 | return; | 602 | return; |
601 | } | 603 | } |
602 | 604 | ||
603 | if (rctx->flags & R600_CONTEXT_PS_PARTIAL_FLUSH) { | ||
604 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); | ||
605 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); | ||
606 | } | ||
607 | |||
608 | if (rctx->chip_class >= R700 && | 605 | if (rctx->chip_class >= R700 && |
609 | (rctx->flags & R600_CONTEXT_FLUSH_AND_INV_CB_META)) { | 606 | (rctx->flags & R600_CONTEXT_FLUSH_AND_INV_CB_META)) { |
610 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); | 607 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); |
@@ -614,110 +611,54 @@ void r600_flush_emit(struct r600_context *rctx) | |||
614 | if (rctx->flags & R600_CONTEXT_FLUSH_AND_INV) { | 611 | if (rctx->flags & R600_CONTEXT_FLUSH_AND_INV) { |
615 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); | 612 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); |
616 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0); | 613 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0); |
617 | 614 | if (rctx->chip_class >= EVERGREEN) { | |
618 | /* DB flushes are special due to errata with hyperz, we need to | 615 | cp_coher_cntl = S_0085F0_CB0_DEST_BASE_ENA(1) | |
619 | * insert a no-op, so that the cache has time to really flush. | 616 | S_0085F0_CB1_DEST_BASE_ENA(1) | |
620 | */ | 617 | S_0085F0_CB2_DEST_BASE_ENA(1) | |
621 | if (rctx->chip_class <= R700 && | 618 | S_0085F0_CB3_DEST_BASE_ENA(1) | |
622 | rctx->flags & R600_CONTEXT_HTILE_ERRATA) { | 619 | S_0085F0_CB4_DEST_BASE_ENA(1) | |
623 | cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 31, 0); | 620 | S_0085F0_CB5_DEST_BASE_ENA(1) | |
624 | cs->buf[cs->cdw++] = 0xdeadcafe; | 621 | S_0085F0_CB6_DEST_BASE_ENA(1) | |
625 | cs->buf[cs->cdw++] = 0xdeadcafe; | 622 | S_0085F0_CB7_DEST_BASE_ENA(1) | |
626 | cs->buf[cs->cdw++] = 0xdeadcafe; | 623 | S_0085F0_CB8_DEST_BASE_ENA(1) | |
627 | cs->buf[cs->cdw++] = 0xdeadcafe; | 624 | S_0085F0_CB9_DEST_BASE_ENA(1) | |
628 | cs->buf[cs->cdw++] = 0xdeadcafe; | 625 | S_0085F0_CB10_DEST_BASE_ENA(1) | |
629 | cs->buf[cs->cdw++] = 0xdeadcafe; | 626 | S_0085F0_CB11_DEST_BASE_ENA(1) | |
630 | cs->buf[cs->cdw++] = 0xdeadcafe; | 627 | S_0085F0_DB_DEST_BASE_ENA(1) | |
631 | cs->buf[cs->cdw++] = 0xdeadcafe; | 628 | S_0085F0_TC_ACTION_ENA(1) | |
632 | cs->buf[cs->cdw++] = 0xdeadcafe; | 629 | S_0085F0_CB_ACTION_ENA(1) | |
633 | cs->buf[cs->cdw++] = 0xdeadcafe; | 630 | S_0085F0_DB_ACTION_ENA(1) | |
634 | cs->buf[cs->cdw++] = 0xdeadcafe; | 631 | S_0085F0_SH_ACTION_ENA(1) | |
635 | cs->buf[cs->cdw++] = 0xdeadcafe; | 632 | S_0085F0_SMX_ACTION_ENA(1) | |
636 | cs->buf[cs->cdw++] = 0xdeadcafe; | 633 | (1 << 20); /* unknown bit */ |
637 | cs->buf[cs->cdw++] = 0xdeadcafe; | 634 | } else { |
638 | cs->buf[cs->cdw++] = 0xdeadcafe; | 635 | cp_coher_cntl = S_0085F0_SMX_ACTION_ENA(1) | |
639 | cs->buf[cs->cdw++] = 0xdeadcafe; | 636 | S_0085F0_SH_ACTION_ENA(1) | |
640 | cs->buf[cs->cdw++] = 0xdeadcafe; | 637 | S_0085F0_VC_ACTION_ENA(1) | |
641 | cs->buf[cs->cdw++] = 0xdeadcafe; | 638 | S_0085F0_TC_ACTION_ENA(1) | |
642 | cs->buf[cs->cdw++] = 0xdeadcafe; | 639 | (1 << 20); /* unknown bit */ |
643 | cs->buf[cs->cdw++] = 0xdeadcafe; | ||
644 | cs->buf[cs->cdw++] = 0xdeadcafe; | ||
645 | cs->buf[cs->cdw++] = 0xdeadcafe; | ||
646 | cs->buf[cs->cdw++] = 0xdeadcafe; | ||
647 | cs->buf[cs->cdw++] = 0xdeadcafe; | ||
648 | cs->buf[cs->cdw++] = 0xdeadcafe; | ||
649 | cs->buf[cs->cdw++] = 0xdeadcafe; | ||
650 | cs->buf[cs->cdw++] = 0xdeadcafe; | ||
651 | cs->buf[cs->cdw++] = 0xdeadcafe; | ||
652 | cs->buf[cs->cdw++] = 0xdeadcafe; | ||
653 | cs->buf[cs->cdw++] = 0xdeadcafe; | ||
654 | cs->buf[cs->cdw++] = 0xdeadcafe; | ||
655 | cs->buf[cs->cdw++] = 0xdeadcafe; | ||
656 | } | 640 | } |
657 | } | 641 | } |
658 | 642 | ||
659 | if (rctx->flags & (R600_CONTEXT_CB_FLUSH | | 643 | if (rctx->flags & R600_CONTEXT_GPU_FLUSH) { |
660 | R600_CONTEXT_DB_FLUSH | | 644 | cp_coher_cntl |= S_0085F0_VC_ACTION_ENA(1) | |
661 | R600_CONTEXT_SHADERCONST_FLUSH | | 645 | S_0085F0_TC_ACTION_ENA(1) | |
662 | R600_CONTEXT_TEX_FLUSH | | 646 | (1 << 20); /* unknown bit */ |
663 | R600_CONTEXT_VTX_FLUSH | | 647 | emit_flush = 1; |
664 | R600_CONTEXT_STREAMOUT_FLUSH)) { | 648 | } |
665 | /* anything left (cb, vtx, shader, streamout) can be flushed | ||
666 | * using the surface sync packet | ||
667 | */ | ||
668 | unsigned flags = 0; | ||
669 | |||
670 | if (rctx->flags & R600_CONTEXT_CB_FLUSH) { | ||
671 | flags |= S_0085F0_CB_ACTION_ENA(1) | | ||
672 | S_0085F0_CB0_DEST_BASE_ENA(1) | | ||
673 | S_0085F0_CB1_DEST_BASE_ENA(1) | | ||
674 | S_0085F0_CB2_DEST_BASE_ENA(1) | | ||
675 | S_0085F0_CB3_DEST_BASE_ENA(1) | | ||
676 | S_0085F0_CB4_DEST_BASE_ENA(1) | | ||
677 | S_0085F0_CB5_DEST_BASE_ENA(1) | | ||
678 | S_0085F0_CB6_DEST_BASE_ENA(1) | | ||
679 | S_0085F0_CB7_DEST_BASE_ENA(1); | ||
680 | |||
681 | if (rctx->chip_class >= EVERGREEN) { | ||
682 | flags |= S_0085F0_CB8_DEST_BASE_ENA(1) | | ||
683 | S_0085F0_CB9_DEST_BASE_ENA(1) | | ||
684 | S_0085F0_CB10_DEST_BASE_ENA(1) | | ||
685 | S_0085F0_CB11_DEST_BASE_ENA(1); | ||
686 | } | ||
687 | |||
688 | /* RV670 errata | ||
689 | * (CB1_DEST_BASE_ENA is also required, which is | ||
690 | * included unconditionally above). */ | ||
691 | if (rctx->family == CHIP_RV670 || | ||
692 | rctx->family == CHIP_RS780 || | ||
693 | rctx->family == CHIP_RS880) { | ||
694 | flags |= S_0085F0_DEST_BASE_0_ENA(1); | ||
695 | } | ||
696 | } | ||
697 | |||
698 | if (rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) { | ||
699 | flags |= S_0085F0_SO0_DEST_BASE_ENA(1) | | ||
700 | S_0085F0_SO1_DEST_BASE_ENA(1) | | ||
701 | S_0085F0_SO2_DEST_BASE_ENA(1) | | ||
702 | S_0085F0_SO3_DEST_BASE_ENA(1) | | ||
703 | S_0085F0_SMX_ACTION_ENA(1); | ||
704 | |||
705 | /* RV670 errata */ | ||
706 | if (rctx->family == CHIP_RV670 || | ||
707 | rctx->family == CHIP_RS780 || | ||
708 | rctx->family == CHIP_RS880) { | ||
709 | flags |= S_0085F0_DEST_BASE_0_ENA(1); | ||
710 | } | ||
711 | } | ||
712 | 649 | ||
713 | flags |= (rctx->flags & R600_CONTEXT_DB_FLUSH) ? S_0085F0_DB_ACTION_ENA(1) | | 650 | if (rctx->family >= CHIP_RV770 && rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) { |
714 | S_0085F0_DB_DEST_BASE_ENA(1): 0; | 651 | cp_coher_cntl |= S_0085F0_SO0_DEST_BASE_ENA(1) | |
715 | flags |= (rctx->flags & R600_CONTEXT_SHADERCONST_FLUSH) ? S_0085F0_SH_ACTION_ENA(1) : 0; | 652 | S_0085F0_SO1_DEST_BASE_ENA(1) | |
716 | flags |= (rctx->flags & R600_CONTEXT_TEX_FLUSH) ? S_0085F0_TC_ACTION_ENA(1) : 0; | 653 | S_0085F0_SO2_DEST_BASE_ENA(1) | |
717 | flags |= (rctx->flags & R600_CONTEXT_VTX_FLUSH) ? S_0085F0_VC_ACTION_ENA(1) : 0; | 654 | S_0085F0_SO3_DEST_BASE_ENA(1) | |
655 | S_0085F0_SMX_ACTION_ENA(1); | ||
656 | emit_flush = 1; | ||
657 | } | ||
718 | 658 | ||
659 | if (emit_flush) { | ||
719 | cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); | 660 | cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); |
720 | cs->buf[cs->cdw++] = flags; /* CP_COHER_CNTL */ | 661 | cs->buf[cs->cdw++] = cp_coher_cntl; /* CP_COHER_CNTL */ |
721 | cs->buf[cs->cdw++] = 0xffffffff; /* CP_COHER_SIZE */ | 662 | cs->buf[cs->cdw++] = 0xffffffff; /* CP_COHER_SIZE */ |
722 | cs->buf[cs->cdw++] = 0; /* CP_COHER_BASE */ | 663 | cs->buf[cs->cdw++] = 0; /* CP_COHER_BASE */ |
723 | cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */ | 664 | cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */ |
@@ -758,16 +699,10 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) | |||
758 | ctx->streamout_suspended = true; | 699 | ctx->streamout_suspended = true; |
759 | } | 700 | } |
760 | 701 | ||
761 | /* partial flush is needed to avoid lockups on some chips with user fences */ | 702 | /* flush is needed to avoid lockups on some chips with user fences |
762 | ctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; | 703 | * this will also flush the framebuffer cache |
763 | 704 | */ | |
764 | /* flush the framebuffer */ | 705 | ctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV; |
765 | ctx->flags |= R600_CONTEXT_CB_FLUSH | R600_CONTEXT_DB_FLUSH; | ||
766 | |||
767 | /* R6xx errata */ | ||
768 | if (ctx->chip_class == R600) { | ||
769 | ctx->flags |= R600_CONTEXT_FLUSH_AND_INV; | ||
770 | } | ||
771 | 706 | ||
772 | r600_flush_emit(ctx); | 707 | r600_flush_emit(ctx); |
773 | 708 | ||
@@ -884,9 +819,7 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fen | |||
884 | va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo); | 819 | va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo); |
885 | va = va + (offset << 2); | 820 | va = va + (offset << 2); |
886 | 821 | ||
887 | ctx->flags &= ~R600_CONTEXT_PS_PARTIAL_FLUSH; | 822 | r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1)); |
888 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); | ||
889 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); | ||
890 | 823 | ||
891 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); | 824 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); |
892 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); | 825 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); |
@@ -1073,15 +1006,14 @@ void r600_context_streamout_end(struct r600_context *ctx) | |||
1073 | } | 1006 | } |
1074 | 1007 | ||
1075 | if (ctx->chip_class >= EVERGREEN) { | 1008 | if (ctx->chip_class >= EVERGREEN) { |
1009 | ctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH; | ||
1076 | evergreen_set_streamout_enable(ctx, 0); | 1010 | evergreen_set_streamout_enable(ctx, 0); |
1077 | } else { | 1011 | } else { |
1012 | if (ctx->chip_class >= R700) { | ||
1013 | ctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH; | ||
1014 | } | ||
1078 | r600_set_streamout_enable(ctx, 0); | 1015 | r600_set_streamout_enable(ctx, 0); |
1079 | } | 1016 | } |
1080 | ctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH; | 1017 | ctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV; |
1081 | |||
1082 | /* R6xx errata */ | ||
1083 | if (ctx->chip_class == R600) { | ||
1084 | ctx->flags |= R600_CONTEXT_FLUSH_AND_INV; | ||
1085 | } | ||
1086 | ctx->num_cs_dw_streamout_end = 0; | 1018 | ctx->num_cs_dw_streamout_end = 0; |
1087 | } | 1019 | } |
diff --git a/src/gallium/drivers/r600/r600_hw_context_priv.h b/src/gallium/drivers/r600/r600_hw_context_priv.h index c59a9abf3f9..050c472fa8c 100644 --- a/src/gallium/drivers/r600/r600_hw_context_priv.h +++ b/src/gallium/drivers/r600/r600_hw_context_priv.h | |||
@@ -29,7 +29,7 @@ | |||
29 | #include "r600_pipe.h" | 29 | #include "r600_pipe.h" |
30 | 30 | ||
31 | /* the number of CS dwords for flushing and drawing */ | 31 | /* the number of CS dwords for flushing and drawing */ |
32 | #define R600_MAX_FLUSH_CS_DWORDS 46 | 32 | #define R600_MAX_FLUSH_CS_DWORDS 12 |
33 | #define R600_MAX_DRAW_CS_DWORDS 34 | 33 | #define R600_MAX_DRAW_CS_DWORDS 34 |
34 | 34 | ||
35 | /* these flags are used in register flags and added into block flags */ | 35 | /* these flags are used in register flags and added into block flags */ |
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 9bfae4f2299..f969808603b 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c | |||
@@ -1452,7 +1452,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, | |||
1452 | unsigned i; | 1452 | unsigned i; |
1453 | 1453 | ||
1454 | if (rctx->framebuffer.state.nr_cbufs) { | 1454 | if (rctx->framebuffer.state.nr_cbufs) { |
1455 | rctx->flags |= R600_CONTEXT_CB_FLUSH; | 1455 | rctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV; |
1456 | 1456 | ||
1457 | if (rctx->chip_class >= R700 && | 1457 | if (rctx->chip_class >= R700 && |
1458 | rctx->framebuffer.state.cbufs[0]->texture->nr_samples > 1) { | 1458 | rctx->framebuffer.state.cbufs[0]->texture->nr_samples > 1) { |
@@ -1460,11 +1460,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, | |||
1460 | } | 1460 | } |
1461 | } | 1461 | } |
1462 | if (rctx->framebuffer.state.zsbuf) { | 1462 | if (rctx->framebuffer.state.zsbuf) { |
1463 | rctx->flags |= R600_CONTEXT_DB_FLUSH; | 1463 | rctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV; |
1464 | } | ||
1465 | /* R6xx errata */ | ||
1466 | if (rctx->chip_class == R600) { | ||
1467 | rctx->flags |= R600_CONTEXT_FLUSH_AND_INV; | ||
1468 | } | 1464 | } |
1469 | 1465 | ||
1470 | /* Set the new state. */ | 1466 | /* Set the new state. */ |
@@ -1558,7 +1554,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, | |||
1558 | 1554 | ||
1559 | } | 1555 | } |
1560 | if (rctx->framebuffer.state.zsbuf) { | 1556 | if (rctx->framebuffer.state.zsbuf) { |
1561 | rctx->framebuffer.atom.num_dw += 16; | 1557 | rctx->framebuffer.atom.num_dw += 18; |
1562 | } else if (rctx->screen->info.drm_minor >= 18) { | 1558 | } else if (rctx->screen->info.drm_minor >= 18) { |
1563 | rctx->framebuffer.atom.num_dw += 3; | 1559 | rctx->framebuffer.atom.num_dw += 3; |
1564 | } | 1560 | } |
@@ -1742,6 +1738,13 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a | |||
1742 | sbu |= SURFACE_BASE_UPDATE_COLOR_NUM(nr_cbufs); | 1738 | sbu |= SURFACE_BASE_UPDATE_COLOR_NUM(nr_cbufs); |
1743 | } | 1739 | } |
1744 | 1740 | ||
1741 | /* SURFACE_BASE_UPDATE */ | ||
1742 | if (rctx->family > CHIP_R600 && rctx->family < CHIP_RV770 && sbu) { | ||
1743 | r600_write_value(cs, PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0)); | ||
1744 | r600_write_value(cs, sbu); | ||
1745 | sbu = 0; | ||
1746 | } | ||
1747 | |||
1745 | /* Zbuffer. */ | 1748 | /* Zbuffer. */ |
1746 | if (state->zsbuf) { | 1749 | if (state->zsbuf) { |
1747 | struct r600_surface *surf = (struct r600_surface*)state->zsbuf; | 1750 | struct r600_surface *surf = (struct r600_surface*)state->zsbuf; |
@@ -1775,6 +1778,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a | |||
1775 | if (rctx->family > CHIP_R600 && rctx->family < CHIP_RV770 && sbu) { | 1778 | if (rctx->family > CHIP_R600 && rctx->family < CHIP_RV770 && sbu) { |
1776 | r600_write_value(cs, PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0)); | 1779 | r600_write_value(cs, PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0)); |
1777 | r600_write_value(cs, sbu); | 1780 | r600_write_value(cs, sbu); |
1781 | sbu = 0; | ||
1778 | } | 1782 | } |
1779 | 1783 | ||
1780 | /* Framebuffer dimensions. */ | 1784 | /* Framebuffer dimensions. */ |
@@ -2243,7 +2247,7 @@ bool r600_adjust_gprs(struct r600_context *rctx) | |||
2243 | if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp) { | 2247 | if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp) { |
2244 | rctx->config_state.sq_gpr_resource_mgmt_1 = tmp; | 2248 | rctx->config_state.sq_gpr_resource_mgmt_1 = tmp; |
2245 | rctx->config_state.atom.dirty = true; | 2249 | rctx->config_state.atom.dirty = true; |
2246 | rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; | 2250 | rctx->flags |= R600_CONTEXT_WAIT_IDLE; |
2247 | } | 2251 | } |
2248 | return true; | 2252 | return true; |
2249 | } | 2253 | } |
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index b20f6550c1c..c69149bf0b6 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c | |||
@@ -88,12 +88,9 @@ static void r600_texture_barrier(struct pipe_context *ctx) | |||
88 | { | 88 | { |
89 | struct r600_context *rctx = (struct r600_context *)ctx; | 89 | struct r600_context *rctx = (struct r600_context *)ctx; |
90 | 90 | ||
91 | rctx->flags |= R600_CONTEXT_CB_FLUSH | R600_CONTEXT_TEX_FLUSH; | 91 | rctx->flags |= R600_CONTEXT_WAIT_IDLE; |
92 | 92 | rctx->flags |= R600_CONTEXT_GPU_FLUSH; | |
93 | /* R6xx errata */ | 93 | rctx->flags |= R600_CONTEXT_FLUSH_AND_INV; |
94 | if (rctx->chip_class == R600) { | ||
95 | rctx->flags |= R600_CONTEXT_FLUSH_AND_INV; | ||
96 | } | ||
97 | } | 94 | } |
98 | 95 | ||
99 | static unsigned r600_conv_pipe_prim(unsigned prim) | 96 | static unsigned r600_conv_pipe_prim(unsigned prim) |
@@ -360,7 +357,7 @@ void r600_sampler_states_dirty(struct r600_context *rctx, | |||
360 | { | 357 | { |
361 | if (state->dirty_mask) { | 358 | if (state->dirty_mask) { |
362 | if (state->dirty_mask & state->has_bordercolor_mask) { | 359 | if (state->dirty_mask & state->has_bordercolor_mask) { |
363 | rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; | 360 | rctx->flags |= R600_CONTEXT_WAIT_IDLE; |
364 | } | 361 | } |
365 | state->atom.num_dw = | 362 | state->atom.num_dw = |
366 | util_bitcount(state->dirty_mask & state->has_bordercolor_mask) * 11 + | 363 | util_bitcount(state->dirty_mask & state->has_bordercolor_mask) * 11 + |
@@ -423,7 +420,7 @@ static void r600_bind_sampler_states(struct pipe_context *pipe, | |||
423 | seamless_cube_map != -1 && | 420 | seamless_cube_map != -1 && |
424 | seamless_cube_map != rctx->seamless_cube_map.enabled) { | 421 | seamless_cube_map != rctx->seamless_cube_map.enabled) { |
425 | /* change in TA_CNTL_AUX need a pipeline flush */ | 422 | /* change in TA_CNTL_AUX need a pipeline flush */ |
426 | rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; | 423 | rctx->flags |= R600_CONTEXT_WAIT_IDLE; |
427 | rctx->seamless_cube_map.enabled = seamless_cube_map; | 424 | rctx->seamless_cube_map.enabled = seamless_cube_map; |
428 | rctx->seamless_cube_map.atom.dirty = true; | 425 | rctx->seamless_cube_map.atom.dirty = true; |
429 | } | 426 | } |
@@ -491,7 +488,7 @@ static void r600_set_index_buffer(struct pipe_context *ctx, | |||
491 | void r600_vertex_buffers_dirty(struct r600_context *rctx) | 488 | void r600_vertex_buffers_dirty(struct r600_context *rctx) |
492 | { | 489 | { |
493 | if (rctx->vertex_buffer_state.dirty_mask) { | 490 | if (rctx->vertex_buffer_state.dirty_mask) { |
494 | rctx->flags |= rctx->has_vertex_cache ? R600_CONTEXT_VTX_FLUSH : R600_CONTEXT_TEX_FLUSH; | 491 | rctx->flags |= R600_CONTEXT_GPU_FLUSH; |
495 | rctx->vertex_buffer_state.atom.num_dw = (rctx->chip_class >= EVERGREEN ? 12 : 11) * | 492 | rctx->vertex_buffer_state.atom.num_dw = (rctx->chip_class >= EVERGREEN ? 12 : 11) * |
496 | util_bitcount(rctx->vertex_buffer_state.dirty_mask); | 493 | util_bitcount(rctx->vertex_buffer_state.dirty_mask); |
497 | rctx->vertex_buffer_state.atom.dirty = true; | 494 | rctx->vertex_buffer_state.atom.dirty = true; |
@@ -547,7 +544,7 @@ void r600_sampler_views_dirty(struct r600_context *rctx, | |||
547 | struct r600_samplerview_state *state) | 544 | struct r600_samplerview_state *state) |
548 | { | 545 | { |
549 | if (state->dirty_mask) { | 546 | if (state->dirty_mask) { |
550 | rctx->flags |= R600_CONTEXT_TEX_FLUSH; | 547 | rctx->flags |= R600_CONTEXT_GPU_FLUSH; |
551 | state->atom.num_dw = (rctx->chip_class >= EVERGREEN ? 14 : 13) * | 548 | state->atom.num_dw = (rctx->chip_class >= EVERGREEN ? 14 : 13) * |
552 | util_bitcount(state->dirty_mask); | 549 | util_bitcount(state->dirty_mask); |
553 | state->atom.dirty = true; | 550 | state->atom.dirty = true; |
@@ -889,7 +886,7 @@ static void r600_delete_vs_state(struct pipe_context *ctx, void *state) | |||
889 | void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state) | 886 | void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state) |
890 | { | 887 | { |
891 | if (state->dirty_mask) { | 888 | if (state->dirty_mask) { |
892 | rctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH; | 889 | rctx->flags |= R600_CONTEXT_GPU_FLUSH; |
893 | state->atom.num_dw = rctx->chip_class >= EVERGREEN ? util_bitcount(state->dirty_mask)*20 | 890 | state->atom.num_dw = rctx->chip_class >= EVERGREEN ? util_bitcount(state->dirty_mask)*20 |
894 | : util_bitcount(state->dirty_mask)*19; | 891 | : util_bitcount(state->dirty_mask)*19; |
895 | state->atom.dirty = true; | 892 | state->atom.dirty = true; |