summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerome Glisse <jglisse@redhat.com>2013-01-29 12:52:17 -0500
committerJerome Glisse <jglisse@redhat.com>2013-01-31 14:23:52 -0500
commit5e0c956cb219e54dfc22e64ac3f00e22619c763f (patch)
tree90ed914596c27cb1d6fdf453b2fc4bcf91cea8de
parent5c86a728d4f688c0fe7fbf9f4b8f88060b65c4ee (diff)
r600g: add cs memory usage accounting and limit it v3
We are now seing cs that can go over the vram+gtt size to avoid failing flush early cs that goes over 70% (gtt+vram) usage. 70% is use to allow some fragmentation. The idea is to compute a gross estimate of memory requirement of each draw call. After each draw call, memory will be precisely accounted. So the uncertainty is only on the current draw call. In practice this gave very good estimate (+/- 10% of the target memory limit). v2: Remove left over from testing version, remove useless NULL checking. Improve commit message. v3: Add comment to code on memory accounting precision Signed-off-by: Jerome Glisse <jglisse@redhat.com> Reviewed-by: Marek Olšák <maraeo@gmail.com>
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c4
-rw-r--r--src/gallium/drivers/r600/r600_hw_context.c12
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h28
-rw-r--r--src/gallium/drivers/r600/r600_state.c3
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c13
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_cs.c11
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_winsys.h10
7 files changed, 80 insertions, 1 deletions
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 0a3861f30f3..5dd8b130e9e 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1668,6 +1668,8 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
1668 surf = (struct r600_surface*)state->cbufs[i]; 1668 surf = (struct r600_surface*)state->cbufs[i];
1669 rtex = (struct r600_texture*)surf->base.texture; 1669 rtex = (struct r600_texture*)surf->base.texture;
1670 1670
1671 r600_context_add_resource_size(ctx, state->cbufs[i]->texture);
1672
1671 if (!surf->color_initialized) { 1673 if (!surf->color_initialized) {
1672 evergreen_init_color_surface(rctx, surf); 1674 evergreen_init_color_surface(rctx, surf);
1673 } 1675 }
@@ -1699,6 +1701,8 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
1699 if (state->zsbuf) { 1701 if (state->zsbuf) {
1700 surf = (struct r600_surface*)state->zsbuf; 1702 surf = (struct r600_surface*)state->zsbuf;
1701 1703
1704 r600_context_add_resource_size(ctx, state->zsbuf->texture);
1705
1702 if (!surf->depth_initialized) { 1706 if (!surf->depth_initialized) {
1703 evergreen_init_depth_surface(rctx, surf); 1707 evergreen_init_depth_surface(rctx, surf);
1704 } 1708 }
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 23f488a5e7c..a89f23069d3 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -359,6 +359,16 @@ out_err:
359void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, 359void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
360 boolean count_draw_in) 360 boolean count_draw_in)
361{ 361{
362 if (!ctx->ws->cs_memory_below_limit(ctx->rings.gfx.cs, ctx->vram, ctx->gtt)) {
363 ctx->gtt = 0;
364 ctx->vram = 0;
365 ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC);
366 return;
367 }
368 /* all will be accounted once relocation are emited */
369 ctx->gtt = 0;
370 ctx->vram = 0;
371
362 /* The number of dwords we already used in the CS so far. */ 372 /* The number of dwords we already used in the CS so far. */
363 num_dw += ctx->rings.gfx.cs->cdw; 373 num_dw += ctx->rings.gfx.cs->cdw;
364 374
@@ -784,6 +794,8 @@ void r600_begin_new_cs(struct r600_context *ctx)
784 794
785 ctx->pm4_dirty_cdwords = 0; 795 ctx->pm4_dirty_cdwords = 0;
786 ctx->flags = 0; 796 ctx->flags = 0;
797 ctx->gtt = 0;
798 ctx->vram = 0;
787 799
788 /* Begin a new CS. */ 800 /* Begin a new CS. */
789 r600_emit_command_buffer(ctx->rings.gfx.cs, &ctx->start_cs_cmd); 801 r600_emit_command_buffer(ctx->rings.gfx.cs, &ctx->start_cs_cmd);
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 3ff42d38f15..ec59c929524 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -447,6 +447,10 @@ struct r600_context {
447 unsigned backend_mask; 447 unsigned backend_mask;
448 unsigned max_db; /* for OQ */ 448 unsigned max_db; /* for OQ */
449 449
450 /* current unaccounted memory usage */
451 uint64_t vram;
452 uint64_t gtt;
453
450 /* Miscellaneous state objects. */ 454 /* Miscellaneous state objects. */
451 void *custom_dsa_flush; 455 void *custom_dsa_flush;
452 void *custom_blend_resolve; 456 void *custom_blend_resolve;
@@ -998,4 +1002,28 @@ static INLINE unsigned u_max_layer(struct pipe_resource *r, unsigned level)
998 } 1002 }
999} 1003}
1000 1004
1005static INLINE void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
1006{
1007 struct r600_context *rctx = (struct r600_context *)ctx;
1008 struct r600_resource *rr = (struct r600_resource *)r;
1009
1010 if (r == NULL) {
1011 return;
1012 }
1013
1014 /*
1015 * The idea is to compute a gross estimate of memory requirement of
1016 * each draw call. After each draw call, memory will be precisely
1017 * accounted. So the uncertainty is only on the current draw call.
1018 * In practice this gave very good estimate (+/- 10% of the target
1019 * memory limit).
1020 */
1021 if (rr->domains & RADEON_DOMAIN_GTT) {
1022 rctx->gtt += rr->buf->size;
1023 }
1024 if (rr->domains & RADEON_DOMAIN_VRAM) {
1025 rctx->vram += rr->buf->size;
1026 }
1027}
1028
1001#endif 1029#endif
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index c0bc2a5b9c1..44cd00ead8a 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1544,6 +1544,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
1544 1544
1545 surf = (struct r600_surface*)state->cbufs[i]; 1545 surf = (struct r600_surface*)state->cbufs[i];
1546 rtex = (struct r600_texture*)surf->base.texture; 1546 rtex = (struct r600_texture*)surf->base.texture;
1547 r600_context_add_resource_size(ctx, state->cbufs[i]->texture);
1547 1548
1548 if (!surf->color_initialized || force_cmask_fmask) { 1549 if (!surf->color_initialized || force_cmask_fmask) {
1549 r600_init_color_surface(rctx, surf, force_cmask_fmask); 1550 r600_init_color_surface(rctx, surf, force_cmask_fmask);
@@ -1576,6 +1577,8 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
1576 if (state->zsbuf) { 1577 if (state->zsbuf) {
1577 surf = (struct r600_surface*)state->zsbuf; 1578 surf = (struct r600_surface*)state->zsbuf;
1578 1579
1580 r600_context_add_resource_size(ctx, state->zsbuf->texture);
1581
1579 if (!surf->depth_initialized) { 1582 if (!surf->depth_initialized) {
1580 r600_init_depth_surface(rctx, surf); 1583 r600_init_depth_surface(rctx, surf);
1581 } 1584 }
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 9386f618b3a..33200a6d178 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -479,7 +479,8 @@ static void r600_set_index_buffer(struct pipe_context *ctx,
479 479
480 if (ib) { 480 if (ib) {
481 pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer); 481 pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer);
482 memcpy(&rctx->index_buffer, ib, sizeof(*ib)); 482 memcpy(&rctx->index_buffer, ib, sizeof(*ib));
483 r600_context_add_resource_size(ctx, ib->buffer);
483 } else { 484 } else {
484 pipe_resource_reference(&rctx->index_buffer.buffer, NULL); 485 pipe_resource_reference(&rctx->index_buffer.buffer, NULL);
485 } 486 }
@@ -516,6 +517,7 @@ static void r600_set_vertex_buffers(struct pipe_context *ctx,
516 vb[i].buffer_offset = input[i].buffer_offset; 517 vb[i].buffer_offset = input[i].buffer_offset;
517 pipe_resource_reference(&vb[i].buffer, input[i].buffer); 518 pipe_resource_reference(&vb[i].buffer, input[i].buffer);
518 new_buffer_mask |= 1 << i; 519 new_buffer_mask |= 1 << i;
520 r600_context_add_resource_size(ctx, input[i].buffer);
519 } else { 521 } else {
520 pipe_resource_reference(&vb[i].buffer, NULL); 522 pipe_resource_reference(&vb[i].buffer, NULL);
521 disable_mask |= 1 << i; 523 disable_mask |= 1 << i;
@@ -613,6 +615,7 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
613 615
614 pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], views[i]); 616 pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], views[i]);
615 new_mask |= 1 << i; 617 new_mask |= 1 << i;
618 r600_context_add_resource_size(pipe, views[i]->texture);
616 } else { 619 } else {
617 pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], NULL); 620 pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], NULL);
618 disable_mask |= 1 << i; 621 disable_mask |= 1 << i;
@@ -806,6 +809,8 @@ static void r600_bind_ps_state(struct pipe_context *ctx, void *state)
806 rctx->ps_shader = (struct r600_pipe_shader_selector *)state; 809 rctx->ps_shader = (struct r600_pipe_shader_selector *)state;
807 r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate); 810 r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate);
808 811
812 r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->ps_shader->current->bo);
813
809 if (rctx->chip_class <= R700) { 814 if (rctx->chip_class <= R700) {
810 bool multiwrite = rctx->ps_shader->current->shader.fs_write_all; 815 bool multiwrite = rctx->ps_shader->current->shader.fs_write_all;
811 816
@@ -835,6 +840,8 @@ static void r600_bind_vs_state(struct pipe_context *ctx, void *state)
835 if (state) { 840 if (state) {
836 r600_context_pipe_state_set(rctx, &rctx->vs_shader->current->rstate); 841 r600_context_pipe_state_set(rctx, &rctx->vs_shader->current->rstate);
837 842
843 r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->vs_shader->current->bo);
844
838 /* Update clip misc state. */ 845 /* Update clip misc state. */
839 if (rctx->vs_shader->current->pa_cl_vs_out_cntl != rctx->clip_misc_state.pa_cl_vs_out_cntl || 846 if (rctx->vs_shader->current->pa_cl_vs_out_cntl != rctx->clip_misc_state.pa_cl_vs_out_cntl ||
840 rctx->vs_shader->current->shader.clip_dist_write != rctx->clip_misc_state.clip_dist_write) { 847 rctx->vs_shader->current->shader.clip_dist_write != rctx->clip_misc_state.clip_dist_write) {
@@ -938,10 +945,13 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint
938 } else { 945 } else {
939 u_upload_data(rctx->uploader, 0, input->buffer_size, ptr, &cb->buffer_offset, &cb->buffer); 946 u_upload_data(rctx->uploader, 0, input->buffer_size, ptr, &cb->buffer_offset, &cb->buffer);
940 } 947 }
948 /* account it in gtt */
949 rctx->gtt += input->buffer_size;
941 } else { 950 } else {
942 /* Setup the hw buffer. */ 951 /* Setup the hw buffer. */
943 cb->buffer_offset = input->buffer_offset; 952 cb->buffer_offset = input->buffer_offset;
944 pipe_resource_reference(&cb->buffer, input->buffer); 953 pipe_resource_reference(&cb->buffer, input->buffer);
954 r600_context_add_resource_size(ctx, input->buffer);
945 } 955 }
946 956
947 state->enabled_mask |= 1 << index; 957 state->enabled_mask |= 1 << index;
@@ -1004,6 +1014,7 @@ static void r600_set_so_targets(struct pipe_context *ctx,
1004 /* Set the new targets. */ 1014 /* Set the new targets. */
1005 for (i = 0; i < num_targets; i++) { 1015 for (i = 0; i < num_targets; i++) {
1006 pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], targets[i]); 1016 pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], targets[i]);
1017 r600_context_add_resource_size(ctx, targets[i]->buffer);
1007 } 1018 }
1008 for (; i < rctx->num_so_targets; i++) { 1019 for (; i < rctx->num_so_targets; i++) {
1009 pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], NULL); 1020 pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], NULL);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index cab27040bba..6a7115ba76b 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -383,6 +383,16 @@ static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
383 return status; 383 return status;
384} 384}
385 385
386static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
387{
388 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
389 boolean status =
390 (cs->csc->used_gart + gtt) < cs->ws->info.gart_size * 0.7 &&
391 (cs->csc->used_vram + vram) < cs->ws->info.vram_size * 0.7;
392
393 return status;
394}
395
386static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs, 396static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs,
387 struct radeon_winsys_cs_handle *buf) 397 struct radeon_winsys_cs_handle *buf)
388{ 398{
@@ -575,6 +585,7 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
575 ws->base.cs_destroy = radeon_drm_cs_destroy; 585 ws->base.cs_destroy = radeon_drm_cs_destroy;
576 ws->base.cs_add_reloc = radeon_drm_cs_add_reloc; 586 ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
577 ws->base.cs_validate = radeon_drm_cs_validate; 587 ws->base.cs_validate = radeon_drm_cs_validate;
588 ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
578 ws->base.cs_write_reloc = radeon_drm_cs_write_reloc; 589 ws->base.cs_write_reloc = radeon_drm_cs_write_reloc;
579 ws->base.cs_flush = radeon_drm_cs_flush; 590 ws->base.cs_flush = radeon_drm_cs_flush;
580 ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush; 591 ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush;
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index 7fdef3fad87..8b64ef2e1a1 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -393,6 +393,16 @@ struct radeon_winsys {
393 boolean (*cs_validate)(struct radeon_winsys_cs *cs); 393 boolean (*cs_validate)(struct radeon_winsys_cs *cs);
394 394
395 /** 395 /**
396 * Return TRUE if there is enough memory in VRAM and GTT for the relocs
397 * added so far.
398 *
399 * \param cs A command stream to validate.
400 * \param vram VRAM memory size pending to be use
401 * \param gtt GTT memory size pending to be use
402 */
403 boolean (*cs_memory_below_limit)(struct radeon_winsys_cs *cs, uint64_t vram, uint64_t gtt);
404
405 /**
396 * Write a relocated dword to a command buffer. 406 * Write a relocated dword to a command buffer.
397 * 407 *
398 * \param cs A command stream the relocation is written to. 408 * \param cs A command stream the relocation is written to.