summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarek Olšák <maraeo@gmail.com>2013-01-12 03:29:40 +0100
committerMarek Olšák <maraeo@gmail.com>2013-01-15 21:48:58 +0100
commitca2c28859eca83f8fbf1f43616f5ef861e95e8d6 (patch)
treece839793a720913a19cf69f1e6734fdab3e016ab
parent1dfe8eead95613a7db62dd17d3da56884b5a887e (diff)
r300g: implement MSAA compression and fast MSAA color clear
These are optimizations which make MSAA a lot faster. The MSAA work is complete with this commit. (except for enablement of AA optimizations for RGBA16F, for which a patch is ready and waiting until the kernel CS checker fix lands) MSAA can't be made any faster as far as hw programming is concerned. The catch is only one process and one colorbuffer can use the optimizations at a time. There usually is only one MSAA colorbuffer, so it shouldn't be an issue. Also, there is a limit on the size of MSAA colorbuffer resolution in terms of megapixels. If the limit is surpassed, the AA optimizations are disabled. The limit is: - 1 Mpix on low-end and some mid-level chipsets (1024x768 and 1280x720) - 2 Mpix on some mid-level chipsets (1600x1200 and 1920x1080) - 3 or 4 Mpix on high-end chipsets (2048x1536 or 2560x1600, respectively) It corresponds to the number of raster pipes (= GB pipes) available, each pipe can hold 1 Mpix of AA compression data. If it's enabled, the driver prints to stdout: radeon: Acquired access to AA optimizations.
-rw-r--r--src/gallium/drivers/r300/r300_blit.c68
-rw-r--r--src/gallium/drivers/r300/r300_chipset.h1
-rw-r--r--src/gallium/drivers/r300/r300_context.c4
-rw-r--r--src/gallium/drivers/r300/r300_context.h17
-rw-r--r--src/gallium/drivers/r300/r300_debug.c1
-rw-r--r--src/gallium/drivers/r300/r300_emit.c37
-rw-r--r--src/gallium/drivers/r300/r300_emit.h1
-rw-r--r--src/gallium/drivers/r300/r300_reg.h18
-rw-r--r--src/gallium/drivers/r300/r300_screen.c3
-rw-r--r--src/gallium/drivers/r300/r300_screen.h6
-rw-r--r--src/gallium/drivers/r300/r300_state.c9
-rw-r--r--src/gallium/drivers/r300/r300_texture.c9
-rw-r--r--src/gallium/drivers/r300/r300_texture_desc.c43
13 files changed, 211 insertions, 6 deletions
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index f8d3b1fd1d1..2bb6063846b 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -28,2 +28,3 @@
#include "util/u_format.h"
+#include "util/u_half.h"
#include "util/u_pack_color.h"
@@ -178,2 +179,21 @@ static uint32_t r300_hiz_clear_value(double depth)
+static void r300_set_clear_color(struct r300_context *r300,
+ const union pipe_color_union *color)
+{
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ union util_color uc;
+
+ memset(&uc, 0, sizeof(uc));
+ util_pack_color(color->f, fb->cbufs[0]->format, &uc);
+
+ if (fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT) {
+ /* (0,1,2,3) maps to (B,G,R,A) */
+ r300->color_clear_value_gb = uc.h[0] | ((uint32_t)uc.h[1] << 16);
+ r300->color_clear_value_ar = uc.h[2] | ((uint32_t)uc.h[3] << 16);
+ } else {
+ r300->color_clear_value = uc.ui;
+ }
+}
+
DEBUG_GET_ONCE_BOOL_OPTION(hyperz, "RADEON_HYPERZ", FALSE)
@@ -289,4 +309,40 @@ static void r300_clear(struct pipe_context* pipe,
+ /* Use fast color clear for an AA colorbuffer.
+ * The CMASK is shared between all colorbuffers, so we use it
+ * if there is only one colorbuffer bound. */
+ if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs == 1 &&
+ r300_resource(fb->cbufs[0]->texture)->tex.cmask_dwords) {
+ /* Try to obtain the access to the CMASK if we don't have one. */
+ if (!r300->cmask_access) {
+ r300->cmask_access =
+ r300->rws->cs_request_feature(r300->cs,
+ RADEON_FID_R300_CMASK_ACCESS,
+ TRUE);
+ }
+
+ /* Setup the clear. */
+ if (r300->cmask_access) {
+ /* Pair the resource with the CMASK to avoid other resources
+ * accessing it. */
+ if (!r300->screen->cmask_resource) {
+ pipe_mutex_lock(r300->screen->cmask_mutex);
+ /* Double checking (first unlocked, then locked). */
+ if (!r300->screen->cmask_resource) {
+ /* Don't reference this, so that the texture can be
+ * destroyed while set in cmask_resource.
+ * Then in texture_destroy, we set cmask_resource to NULL. */
+ r300->screen->cmask_resource = fb->cbufs[0]->texture;
+ }
+ pipe_mutex_unlock(r300->screen->cmask_mutex);
+ }
+
+ if (r300->screen->cmask_resource == fb->cbufs[0]->texture) {
+ r300_set_clear_color(r300, color);
+ r300_mark_atom_dirty(r300, &r300->cmask_clear);
+ buffers &= ~PIPE_CLEAR_COLOR;
+ }
+ }
+ }
/* Enable CBZB clear. */
- if (r300_cbzb_clear_allowed(r300, buffers)) {
+ else if (r300_cbzb_clear_allowed(r300, buffers)) {
struct r300_surface *surf = r300_surface(fb->cbufs[0]);
@@ -314,3 +370,5 @@ static void r300_clear(struct pipe_context* pipe,
r300_blitter_end(r300);
- } else if (r300->zmask_clear.dirty || r300->hiz_clear.dirty) {
+ } else if (r300->zmask_clear.dirty ||
+ r300->hiz_clear.dirty ||
+ r300->cmask_clear.dirty) {
/* Just clear zmask and hiz now, this does not use the standard draw
@@ -321,2 +379,3 @@ static void r300_clear(struct pipe_context* pipe,
(r300->hiz_clear.dirty ? r300->hiz_clear.size : 0) +
+ (r300->cmask_clear.dirty ? r300->cmask_clear.size : 0) +
r300_get_num_cs_end_dwords(r300);
@@ -339,2 +398,7 @@ static void r300_clear(struct pipe_context* pipe,
}
+ if (r300->cmask_clear.dirty) {
+ r300_emit_cmask_clear(r300, r300->cmask_clear.size,
+ r300->cmask_clear.state);
+ r300->cmask_clear.dirty = FALSE;
+ }
} else {
diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h
index f8b5d4e3d3e..996491e9431 100644
--- a/src/gallium/drivers/r300/r300_chipset.h
+++ b/src/gallium/drivers/r300/r300_chipset.h
@@ -32,2 +32,3 @@
/* rv3xx have only one pipe */
+#define PIPE_CMASK_SIZE 4096
#define PIPE_ZMASK_SIZE 4096
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index c744fea10cf..a6fccc6e02c 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -77,2 +77,5 @@ static void r300_destroy_context(struct pipe_context* context)
}
+ if (r300->cs && r300->cmask_access) {
+ r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_CMASK_ACCESS, FALSE);
+ }
@@ -205,2 +208,3 @@ static boolean r300_setup_atoms(struct r300_context* r300)
R300_INIT_ATOM(zmask_clear, r300->screen->caps.zmask_ram > 0 ? 6 : 0);
+ R300_INIT_ATOM(cmask_clear, 6);
/* ZB (unpipelined), SU. */
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 1b912c3eeee..33851519bd8 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -311,2 +311,3 @@ struct r300_surface {
uint32_t pitch_hiz; /* HIZ_PITCH */
+ uint32_t pitch_cmask; /* CMASK_PITCH */
uint32_t format; /* US_OUT_FMT or ZB_FORMAT. */
@@ -382,2 +383,6 @@ struct r300_texture_desc {
unsigned hiz_stride_in_pixels[R300_MAX_TEXTURE_LEVELS];
+
+ /* CMASK info for AA buffers (no mipmapping). */
+ unsigned cmask_dwords;
+ unsigned cmask_stride_in_pixels;
};
@@ -538,2 +543,4 @@ struct r300_context {
struct r300_atom zmask_clear;
+ /* cmask clear */
+ struct r300_atom cmask_clear;
/* Occlusion query. */
@@ -618,2 +625,9 @@ struct r300_context {
+ /* CMASK state. */
+ boolean cmask_access;
+ boolean cmask_in_use;
+ uint32_t color_clear_value; /* RGBA8 or RGBA1010102 */
+ uint32_t color_clear_value_ar; /* RGBA16F */
+ uint32_t color_clear_value_gb; /* RGBA16F */
+
/* Compiler state. */
@@ -724,3 +738,4 @@ enum r300_fb_state_change {
R300_CHANGED_HYPERZ_FLAG,
- R300_CHANGED_MULTIWRITE
+ R300_CHANGED_MULTIWRITE,
+ R300_CHANGED_CMASK_ENABLE,
};
diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c
index 5ec2cf9e4ae..ff1f16d913a 100644
--- a/src/gallium/drivers/r300/r300_debug.c
+++ b/src/gallium/drivers/r300/r300_debug.c
@@ -52,2 +52,3 @@ static const struct debug_named_value debug_options[] = {
{ "nohiz", DBG_NO_HIZ, "Disable hierarchical zbuffer" },
+ { "nocmask", DBG_NO_CMASK, "Disable AA compression and fast AA clear" },
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index cb6c46e5e6c..9ea084fac2d 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -401,4 +401,2 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
- /* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers, which is not
- * what we usually want. */
if (r300->screen->caps.is_r500) {
@@ -406,2 +404,3 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
}
+ /* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers. */
if (fb->nr_cbufs && r300->fb_multiwrite) {
@@ -409,2 +408,6 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
}
+ if (r300->cmask_in_use) {
+ rb3d_cctl |= R300_RB3D_CCTL_AA_COMPRESSION_ENABLE |
+ R300_RB3D_CCTL_CMASK_ENABLE;
+ }
@@ -421,2 +424,8 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
OUT_CS_RELOC(surf);
+
+ if (r300->cmask_in_use && i == 0) {
+ OUT_CS_REG(R300_RB3D_CMASK_OFFSET0, 0);
+ OUT_CS_REG(R300_RB3D_CMASK_PITCH0, surf->pitch_cmask);
+ OUT_CS_REG(R300_RB3D_COLOR_CLEAR_VALUE, r300->color_clear_value);
+ }
}
@@ -1242,2 +1251,26 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state
+void r300_emit_cmask_clear(struct r300_context *r300, unsigned size, void *state)
+{
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ struct r300_resource *tex;
+ CS_LOCALS(r300);
+
+ tex = r300_resource(fb->cbufs[0]->texture);
+
+ BEGIN_CS(size);
+ OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT,
+ R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
+ R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
+ OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_CMASK, 2);
+ OUT_CS(0);
+ OUT_CS(tex->tex.cmask_dwords);
+ OUT_CS(0);
+ END_CS;
+
+ /* Mark the current zbuffer's zmask as in use. */
+ r300->cmask_in_use = TRUE;
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_CMASK_ENABLE);
+}
+
void r300_emit_ztop_state(struct r300_context* r300,
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index a58ab857f56..eaa0a6c4ac0 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -119,2 +119,3 @@ void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state);
void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state);
+void r300_emit_cmask_clear(struct r300_context *r300, unsigned size, void *state);
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index 5e1d8101910..8342ef532d3 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -2392,3 +2392,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-#define RB3D_COLOR_CLEAR_VALUE 0x4e14
+#define R300_RB3D_COLOR_CLEAR_VALUE 0x4E14
+/* For FP16 AA. */
+#define R500_RB3D_COLOR_CLEAR_VALUE_AR 0x46C0
+#define R500_RB3D_COLOR_CLEAR_VALUE_GB 0x46C4
@@ -2487,2 +2490,14 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
+#define R300_RB3D_CMASK_OFFSET0 0x4E54
+#define R300_RB3D_CMASK_OFFSET1 0x4E58
+#define R300_RB3D_CMASK_OFFSET2 0x4E5C
+#define R300_RB3D_CMASK_OFFSET3 0x4E60
+#define R300_RB3D_CMASK_PITCH0 0x4E64
+#define R300_RB3D_CMASK_PITCH1 0x4E68
+#define R300_RB3D_CMASK_PITCH2 0x4E6C
+#define R300_RB3D_CMASK_PITCH3 0x4E70
+#define R300_RB3D_CMASK_WRINDEX 0x4E74
+#define R300_RB3D_CMASK_DWORD 0x4E78
+#define R300_RB3D_CMASK_RDINDEX 0x4E7C
+
/* Resolve buffer destination address. The cache must be empty before changing
@@ -3506,2 +3521,3 @@ enum {
#define R300_PACKET3_3D_CLEAR_HIZ 0x00003700
+#define R300_PACKET3_3D_CLEAR_CMASK 0x00003800
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 3af5774ea51..d0f00700f81 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -524,2 +524,4 @@ static void r300_destroy_screen(struct pipe_screen* pscreen)
+ pipe_mutex_destroy(r300screen->cmask_mutex);
+
if (rws)
@@ -614,2 +616,3 @@ struct pipe_screen* r300_screen_create(struct radeon_winsys *rws)
util_format_s3tc_init();
+ pipe_mutex_init(r300screen->cmask_mutex);
diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
index d2bed8d528e..e129cee57c7 100644
--- a/src/gallium/drivers/r300/r300_screen.h
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -30,2 +30,3 @@
#include "util/u_slab.h"
+#include "os/os_thread.h"
#include <stdio.h>
@@ -44,2 +45,6 @@ struct r300_screen {
unsigned debug;
+
+ /* The MSAA texture with CMASK access; */
+ struct pipe_resource *cmask_resource;
+ pipe_mutex cmask_mutex;
};
@@ -94,2 +99,3 @@ radeon_winsys(struct pipe_screen *screen) {
#define DBG_NO_HIZ (1 << 22)
+#define DBG_NO_CMASK (1 << 23)
/* Statistics. */
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 4a5a5a89b84..fa256aa2c55 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -827,2 +827,6 @@ void r300_mark_fb_state_dirty(struct r300_context *r300,
+ if (r300->cmask_in_use) {
+ r300->fb_state.size += 6;
+ }
+
/* The size of the rest of atoms stays the same. */
@@ -902,2 +906,7 @@ r300_set_framebuffer_state(struct pipe_context* pipe,
+ /* Set whether CMASK can be used. */
+ r300->cmask_in_use =
+ state->nr_cbufs == 1 &&
+ r300->screen->cmask_resource == state->cbufs[0]->texture;
+
/* Need to reset clamping or colormask. */
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 4be6d5067d2..6816fd01ab3 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -908,2 +908,3 @@ static void r300_texture_setup_fb_state(struct r300_surface *surf)
r300_translate_colormask_swizzle(surf->base.format);
+ surf->pitch_cmask = tex->tex.cmask_stride_in_pixels;
}
@@ -914,4 +915,12 @@ static void r300_texture_destroy(struct pipe_screen *screen,
{
+ struct r300_screen *rscreen = r300_screen(screen);
struct r300_resource* tex = (struct r300_resource*)texture;
+ if (tex->tex.cmask_dwords) {
+ pipe_mutex_lock(rscreen->cmask_mutex);
+ if (texture == rscreen->cmask_resource) {
+ rscreen->cmask_resource = NULL;
+ }
+ pipe_mutex_unlock(rscreen->cmask_mutex);
+ }
pb_reference(&tex->buf, NULL);
diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c
index 8928f73f7cf..46f07e2c522 100644
--- a/src/gallium/drivers/r300/r300_texture_desc.c
+++ b/src/gallium/drivers/r300/r300_texture_desc.c
@@ -411,2 +411,44 @@ static void r300_setup_hyperz_properties(struct r300_screen *screen,
+static void r300_setup_cmask_properties(struct r300_screen *screen,
+ struct r300_resource *tex)
+{
+ static unsigned cmask_align_x[4] = {16, 32, 48, 32};
+ static unsigned cmask_align_y[4] = {16, 16, 16, 32};
+ unsigned pipes, stride, cmask_num_dw;
+
+ /* We need an AA colorbuffer, no mipmaps. */
+ if (tex->b.b.nr_samples <= 1 ||
+ tex->b.b.last_level > 0 ||
+ util_format_is_depth_or_stencil(tex->b.b.format)) {
+ return;
+ }
+
+ if (tex->b.b.format == PIPE_FORMAT_R16G16B16A16_FLOAT) {
+ return;
+ }
+
+ if (SCREEN_DBG_ON(screen, DBG_NO_CMASK)) {
+ return;
+ }
+
+ /* CMASK is part of raster pipes. The number of Z pipes doesn't matter. */
+ pipes = screen->info.r300_num_gb_pipes;
+
+ stride = r300_stride_to_width(tex->b.b.format,
+ tex->tex.stride_in_bytes[0]);
+ stride = align(stride, 16);
+
+ /* Get the CMASK size in dwords. */
+ cmask_num_dw = r300_pixels_to_dwords(stride, tex->b.b.height0,
+ cmask_align_x[pipes-1],
+ cmask_align_y[pipes-1]);
+
+ /* Check the CMASK size against the CMASK memory limit. */
+ if (cmask_num_dw <= PIPE_CMASK_SIZE * pipes) {
+ tex->tex.cmask_dwords = cmask_num_dw;
+ tex->tex.cmask_stride_in_pixels =
+ util_align_npot(stride, cmask_align_x[pipes-1]);
+ }
+}
+
static void r300_setup_tiling(struct r300_screen *screen,
@@ -534,2 +576,3 @@ void r300_texture_desc_init(struct r300_screen *rscreen,
r300_setup_hyperz_properties(rscreen, tex);
+ r300_setup_cmask_properties(rscreen, tex);