summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarek Olšák <maraeo@gmail.com>2010-01-18 00:15:52 +0100
committerCorbin Simpson <MostAwesomeDude@gmail.com>2010-01-24 23:03:29 -0800
commit112239e9a66a155d36fe2ad0ab130e6f26eff298 (patch)
tree69fc54ff75f20ae8d4dac16e9bef16c23e25dfae
parent9f8ec533123f07f29b084e8a46fc35c498b3a670 (diff)
r300g,radeong: finish and enable the immediate mode
Nearly 100% performance increase in glxgears.
-rw-r--r--src/gallium/drivers/r300/r300_emit.c22
-rw-r--r--src/gallium/drivers/r300/r300_render.c141
-rw-r--r--src/gallium/drivers/r300/r300_state.c22
-rw-r--r--src/gallium/winsys/drm/radeon/core/radeon_buffer.c83
-rw-r--r--src/gallium/winsys/drm/radeon/core/radeon_buffer.h2
-rw-r--r--src/gallium/winsys/drm/radeon/core/radeon_drm.h2
-rw-r--r--src/gallium/winsys/drm/radeon/core/radeon_r300.c8
-rw-r--r--src/gallium/winsys/drm/radeon/core/radeon_winsys.h6
8 files changed, 219 insertions, 67 deletions
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 36d2c64b587..badbf3715c7 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -772,22 +772,6 @@ void r300_emit_texture(struct r300_context* r300,
END_CS;
}
-static boolean r300_validate_aos(struct r300_context *r300)
-{
- struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
- struct pipe_vertex_element *velem = r300->vertex_element;
- int i;
-
- /* Check if formats and strides are aligned to the size of DWORD. */
- for (i = 0; i < r300->vertex_element_count; i++) {
- if (vbuf[velem[i].vertex_buffer_index].stride % 4 != 0 ||
- util_format_get_blocksize(velem[i].src_format) % 4 != 0) {
- return FALSE;
- }
- }
- return TRUE;
-}
-
void r300_emit_aos(struct r300_context* r300, unsigned offset)
{
struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer;
@@ -797,12 +781,6 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset)
unsigned packet_size = (aos_count * 3 + 1) / 2;
CS_LOCALS(r300);
- /* XXX Move this checking to a more approriate place. */
- if (!r300_validate_aos(r300)) {
- /* XXX We should fallback using Draw. */
- assert(0);
- }
-
BEGIN_CS(2 + packet_size + aos_count * 2);
OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size);
OUT_CS(aos_count);
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 677031ef04e..7f095bffe7c 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -28,6 +28,7 @@
#include "pipe/p_inlines.h"
+#include "util/u_format.h"
#include "util/u_memory.h"
#include "util/u_prim.h"
@@ -114,20 +115,53 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300,
return color_control;
}
-static void r300_emit_draw_immediate(struct r300_context *r300,
- unsigned mode,
- unsigned start,
- unsigned count)
+
+static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
+ unsigned mode,
+ unsigned start,
+ unsigned count)
{
- struct pipe_buffer* vbo = r300->vertex_buffer[0].buffer;
- unsigned vertex_size = r300->vertex_buffer[0].stride / sizeof(float);
- unsigned i;
- uint32_t* map;
+ struct pipe_vertex_element* velem;
+ struct pipe_vertex_buffer* vbuf;
+ unsigned vertex_element_count = r300->vertex_element_count;
+ unsigned i, v, vbi, dw, elem_offset;
+
+ /* Size of the vertex, in dwords. */
+ unsigned vertex_size = 0;
+
+ /* Offsets of the attribute, in dwords, from the start of the vertex. */
+ unsigned offset[PIPE_MAX_ATTRIBS];
+
+ /* Size of the vertex element, in dwords. */
+ unsigned size[PIPE_MAX_ATTRIBS];
+
+ /* Stride to the same attrib in the next vertex in the vertex buffer,
+ * in dwords. */
+ unsigned stride[PIPE_MAX_ATTRIBS];
+
+ /* Mapped vertex buffers. */
+ uint32_t* map[PIPE_MAX_ATTRIBS] = {0};
+
CS_LOCALS(r300);
- map = (uint32_t*)pipe_buffer_map_range(r300->context.screen, vbo,
- start * vertex_size, count * vertex_size,
- PIPE_BUFFER_USAGE_CPU_READ);
+ /* Calculate the vertex size, offsets, strides etc. and map the buffers. */
+ for (i = 0; i < vertex_element_count; i++) {
+ velem = &r300->vertex_element[i];
+ offset[i] = velem->src_offset >> 2;
+ size[i] = util_format_get_blocksize(velem->src_format) >> 2;
+ vertex_size += size[i];
+ vbi = velem->vertex_buffer_index;
+
+ /* Map the buffer. */
+ if (!map[vbi]) {
+ vbuf = &r300->vertex_buffer[vbi];
+ map[vbi] = (uint32_t*)pipe_buffer_map(r300->context.screen,
+ vbuf->buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ map[vbi] += vbuf->buffer_offset >> 2;
+ stride[vbi] = vbuf->stride >> 2;
+ }
+ }
BEGIN_CS(10 + count * vertex_size);
OUT_CS_REG(R300_GA_COLOR_CONTROL,
@@ -138,18 +172,31 @@ static void r300_emit_draw_immediate(struct r300_context *r300,
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size);
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) |
r300_translate_primitive(mode));
- //debug_printf("r300: Immd %d verts, %d attrs\n", count, vertex_size);
- for (i = 0; i < count * vertex_size; i++) {
- if (i % vertex_size == 0) {
- //debug_printf("r300: -- vert --\n");
+
+ /* Emit vertices. */
+ for (v = 0; v < count; v++) {
+ for (i = 0; i < vertex_element_count; i++) {
+ velem = &r300->vertex_element[i];
+ vbi = velem->vertex_buffer_index;
+ elem_offset = offset[i] + stride[vbi] * (v + start);
+
+ for (dw = 0; dw < size[i]; dw++) {
+ OUT_CS(map[vbi][elem_offset + dw]);
+ }
}
- //debug_printf("r300: 0x%08x\n", *map);
- OUT_CS(*map);
- map++;
}
END_CS;
- pipe_buffer_unmap(r300->context.screen, vbo);
+ /* Unmap buffers. */
+ for (i = 0; i < vertex_element_count; i++) {
+ vbi = r300->vertex_element[i].vertex_buffer_index;
+
+ if (map[vbi]) {
+ vbuf = &r300->vertex_buffer[vbi];
+ pipe_buffer_unmap(r300->context.screen, vbuf->buffer);
+ map[vbi] = 0;
+ }
+ }
}
static void r300_emit_draw_arrays(struct r300_context *r300,
@@ -222,16 +269,49 @@ static void r300_emit_draw_elements(struct r300_context *r300,
}
+static boolean r300_setup_local_vertex_buffers(struct r300_context *r300)
+{
+ struct pipe_vertex_buffer *vb;
+ boolean found_local_bo = FALSE, found_managed_bo = FALSE;
+ unsigned i;
+
+ /* See what buffers we got. */
+ for (i = 0; i < r300->vertex_element_count; i++) {
+ vb = &r300->vertex_buffer[r300->vertex_element[i].vertex_buffer_index];
+ if (r300->winsys->buffer_is_local(r300->winsys, vb->buffer)) {
+ found_local_bo = TRUE;
+ } else {
+ found_managed_bo = TRUE;
+ }
+ }
+
+ /* If we found both local and managed buffers, make local buffers managed
+ * because we shouldn't use the immediate mode in case a managed buffer is
+ * present, due to performance reasons. */
+ if (found_local_bo && found_managed_bo) {
+ for (i = 0; i < r300->vertex_element_count; i++) {
+ vb = &r300->vertex_buffer[r300->vertex_element[i].vertex_buffer_index];
+ if (r300->winsys->buffer_is_local(r300->winsys, vb->buffer)) {
+ r300->winsys->buffer_make_managed(r300->winsys, vb->buffer);
+ }
+ }
+ }
+
+ return !found_managed_bo;
+}
+
static boolean r300_setup_vertex_buffers(struct r300_context *r300)
{
struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
struct pipe_vertex_element *velem = r300->vertex_element;
+ struct pipe_buffer *pbuf;
validate:
for (int i = 0; i < r300->vertex_element_count; i++) {
- if (!r300->winsys->add_buffer(r300->winsys,
- vbuf[velem[i].vertex_buffer_index].buffer,
- RADEON_GEM_DOMAIN_GTT, 0)) {
+ pbuf = vbuf[velem[i].vertex_buffer_index].buffer;
+
+ if (!r300->winsys->add_buffer(r300->winsys, pbuf,
+ RADEON_GEM_DOMAIN_GTT, 0)) {
r300->context.flush(&r300->context, 0, NULL);
goto validate;
}
@@ -245,6 +325,7 @@ validate:
return TRUE;
}
+
static void r300_shorten_ubyte_elts(struct r300_context* r300,
struct pipe_buffer** elts,
unsigned count)
@@ -365,15 +446,15 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
r300_emit_buffer_validate(r300);
- if (!r300_setup_vertex_buffers(r300)) {
- return;
- }
-
- r300_emit_dirty_state(r300);
-
- if (FALSE && count <= 4 && r300->vertex_buffer_count == 1) {
- r300_emit_draw_immediate(r300, mode, start, count);
+ if (r300_setup_local_vertex_buffers(r300)) {
+ r300_emit_dirty_state(r300);
+ r300_emit_draw_arrays_immediate(r300, mode, start, count);
} else {
+ if (!r300_setup_vertex_buffers(r300)) {
+ return;
+ }
+
+ r300_emit_dirty_state(r300);
r300_emit_aos(r300, start);
r300_emit_draw_arrays(r300, mode, count);
}
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index e2ec0bc5bd2..641e95e7fca 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -924,6 +924,22 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
r300->dirty_state |= R300_NEW_VERTEX_FORMAT;
}
+static boolean r300_validate_aos(struct r300_context *r300)
+{
+ struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
+ struct pipe_vertex_element *velem = r300->vertex_element;
+ int i;
+
+ /* Check if formats and strides are aligned to the size of DWORD. */
+ for (i = 0; i < r300->vertex_element_count; i++) {
+ if (vbuf[velem[i].vertex_buffer_index].stride % 4 != 0 ||
+ util_format_get_blocksize(velem[i].src_format) % 4 != 0) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
static void r300_set_vertex_elements(struct pipe_context* pipe,
unsigned count,
const struct pipe_vertex_element* elements)
@@ -939,6 +955,12 @@ static void r300_set_vertex_elements(struct pipe_context* pipe,
draw_flush(r300->draw);
draw_set_vertex_elements(r300->draw, count, elements);
}
+
+ if (!r300_validate_aos(r300)) {
+ /* XXX We should fallback using draw. */
+ assert(0);
+ abort();
+ }
}
static void* r300_create_vs_state(struct pipe_context* pipe,
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
index 25e1cdcdb6d..5214b6d8bcb 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
@@ -51,6 +51,23 @@ static const char *radeon_get_name(struct pipe_winsys *ws)
return "Radeon/GEM+KMS";
}
+uint32_t radeon_domain_from_usage(unsigned usage)
+{
+ uint32_t domain = 0;
+
+ if (usage & PIPE_BUFFER_USAGE_PIXEL) {
+ domain |= RADEON_GEM_DOMAIN_VRAM;
+ }
+ if (usage & PIPE_BUFFER_USAGE_VERTEX) {
+ domain |= RADEON_GEM_DOMAIN_GTT;
+ }
+ if (usage & PIPE_BUFFER_USAGE_INDEX) {
+ domain |= RADEON_GEM_DOMAIN_GTT;
+ }
+
+ return domain;
+}
+
static struct pipe_buffer *radeon_buffer_create(struct pipe_winsys *ws,
unsigned alignment,
unsigned usage,
@@ -71,25 +88,17 @@ static struct pipe_buffer *radeon_buffer_create(struct pipe_winsys *ws,
radeon_buffer->base.usage = usage;
radeon_buffer->base.size = size;
- if (usage == PIPE_BUFFER_USAGE_CONSTANT && is_r3xx(radeon_ws->pci_id)) {
+ if ((usage == PIPE_BUFFER_USAGE_CONSTANT && is_r3xx(radeon_ws->pci_id)) ||
+ (usage == PIPE_BUFFER_USAGE_VERTEX && size < 512)) {
/* Don't bother allocating a BO, as it'll never get to the card. */
+ /* Also, create small vertex buffers in RAM. */
desc.alignment = alignment;
desc.usage = usage;
radeon_buffer->pb = pb_malloc_buffer_create(size, &desc);
return &radeon_buffer->base;
}
- domain = 0;
-
- if (usage & PIPE_BUFFER_USAGE_PIXEL) {
- domain |= RADEON_GEM_DOMAIN_VRAM;
- }
- if (usage & PIPE_BUFFER_USAGE_VERTEX) {
- domain |= RADEON_GEM_DOMAIN_GTT;
- }
- if (usage & PIPE_BUFFER_USAGE_INDEX) {
- domain |= RADEON_GEM_DOMAIN_GTT;
- }
+ domain = radeon_domain_from_usage(usage);
radeon_buffer->bo = radeon_bo_open(radeon_ws->priv->bom, 0, size,
alignment, domain, 0);
@@ -222,6 +231,54 @@ static void radeon_buffer_set_tiling(struct radeon_winsys *ws,
radeon_bo_set_tiling(radeon_buffer->bo, flags, pitch);
}
+static boolean radeon_buffer_is_local(struct radeon_winsys *ws,
+ struct pipe_buffer *buffer)
+{
+ struct radeon_pipe_buffer *radeon_buffer =
+ (struct radeon_pipe_buffer*)buffer;
+
+ return radeon_buffer->pb != NULL;
+}
+
+static void radeon_buffer_make_managed(struct radeon_winsys *ws,
+ struct pipe_buffer *buffer)
+{
+ struct radeon_pipe_buffer* radeon_buffer =
+ (struct radeon_pipe_buffer*)buffer;
+ uint32_t domain;
+ void *map;
+
+ if (radeon_buffer->pb) {
+ domain = radeon_domain_from_usage(buffer->usage);
+
+ /* Create a managed buffer. */
+ radeon_buffer->bo = radeon_bo_open(ws->priv->bom, 0,
+ buffer->size, buffer->alignment,
+ domain, 0);
+ if (radeon_buffer->bo == NULL) {
+ /* XXX What now? */
+ fprintf(stderr, "radeon: cannot create a buffer in function %s\n",
+ __FUNCTION__);
+ assert(0);
+ abort();
+ }
+
+ /* Move data. */
+ radeon_bo_map(radeon_buffer->bo, 1);
+ map = pb_map(radeon_buffer->pb, PIPE_BUFFER_USAGE_CPU_READ);
+
+ memcpy(radeon_buffer->bo->ptr, map, buffer->size);
+
+ pb_unmap(radeon_buffer->pb);
+ radeon_bo_unmap(radeon_buffer->bo);
+
+ /* Release the locally-created buffer. */
+ pipe_reference_init(&radeon_buffer->pb->base.reference, 0);
+ pb_destroy(radeon_buffer->pb);
+ radeon_buffer->pb = 0;
+ }
+}
+
static void radeon_fence_reference(struct pipe_winsys *ws,
struct pipe_fence_handle **ptr,
struct pipe_fence_handle *pfence)
@@ -325,6 +382,8 @@ struct radeon_winsys* radeon_pipe_winsys(int fd)
radeon_ws->base.get_name = radeon_get_name;
radeon_ws->buffer_set_tiling = radeon_buffer_set_tiling;
+ radeon_ws->buffer_is_local = radeon_buffer_is_local;
+ radeon_ws->buffer_make_managed = radeon_buffer_make_managed;
return radeon_ws;
}
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h
index de71cb2f42d..c46abff793e 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h
+++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h
@@ -77,6 +77,8 @@ struct radeon_winsys_priv {
void *flush_data;
};
+uint32_t radeon_domain_from_usage(unsigned usage);
+
struct radeon_winsys* radeon_pipe_winsys(int fb);
#if 0
struct pipe_surface *radeon_surface_from_handle(struct radeon_context *radeon_context,
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.h b/src/gallium/winsys/drm/radeon/core/radeon_drm.h
index ddd7983824a..077388ee028 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_drm.h
+++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.h
@@ -81,7 +81,7 @@ void radeon_destroy_drm_api(struct drm_api* api);
/* Guess at whether this chipset should use r300g.
*
* I believe that this check is valid, but I haven't been exhaustive. */
-static boolean is_r3xx(int pciid)
+static INLINE boolean is_r3xx(int pciid)
{
return (pciid > 0x3150) && (pciid < 0x796f);
}
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.c b/src/gallium/winsys/drm/radeon/core/radeon_r300.c
index 0253bc2527e..d759beaba13 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_r300.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.c
@@ -81,9 +81,13 @@ static void radeon_write_cs_reloc(struct radeon_winsys* winsys,
uint32_t flags)
{
int retval = 0;
+ struct radeon_pipe_buffer* radeon_buffer =
+ (struct radeon_pipe_buffer*)pbuffer;
- retval = radeon_cs_write_reloc(winsys->priv->cs,
- ((struct radeon_pipe_buffer*)pbuffer)->bo, rd, wd, flags);
+ assert(!radeon_buffer->pb);
+
+ retval = radeon_cs_write_reloc(winsys->priv->cs, radeon_buffer->bo,
+ rd, wd, flags);
if (retval) {
debug_printf("radeon: Relocation of %p (%d, %d, %d) failed!\n",
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_winsys.h b/src/gallium/winsys/drm/radeon/core/radeon_winsys.h
index 864082b99b3..462fba844ef 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_winsys.h
+++ b/src/gallium/winsys/drm/radeon/core/radeon_winsys.h
@@ -106,6 +106,12 @@ struct radeon_winsys {
uint32_t pitch,
boolean microtiled,
boolean macrotiled);
+
+ boolean (*buffer_is_local)(struct radeon_winsys* winsys,
+ struct pipe_buffer* buffer);
+
+ void (*buffer_make_managed)(struct radeon_winsys* winsys,
+ struct pipe_buffer* buffer);
};
#endif