summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2009-11-25 10:33:17 +1000
committerDave Airlie <airlied@redhat.com>2009-11-25 10:35:37 +1000
commit3a460a14b9603159f10d89da27b559c36a184e27 (patch)
treecc53eaab10c9bf5f532bd77e9ac5650bb3f0422c
parent797a3f0c71c94477eec565ea2c95553c6f66d9fd (diff)
r600: refactor code to help future acceleration speedups.
This changes the vertex buffer index to be an offset, and records the start of the vb for each operation and uses that to set the operations up. This still flushes after each operation to make sure we have no regressions in non-kms/kms cases. Signed-off-by: Dave Airlie <airlied@redhat.com>
-rw-r--r--src/r600_exa.c99
-rw-r--r--src/r600_textured_videofuncs.c18
-rw-r--r--src/r6xx_accel.c4
-rw-r--r--src/radeon.h3
-rw-r--r--src/radeon_kms.c6
5 files changed, 83 insertions, 47 deletions
diff --git a/src/r600_exa.c b/src/r600_exa.c
index 4c63378a..331711c8 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -359,6 +359,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
ErrorF("PM: 0x%08x\n", pm);
#endif
+ accel_state->vb_start_op = accel_state->vb_offset;
return TRUE;
}
@@ -371,12 +372,14 @@ R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
struct radeon_accel_state *accel_state = info->accel_state;
float *vb;
- if (((accel_state->vb_index + 3) * 8) > accel_state->vb_total) {
+ if ((accel_state->vb_offset + (3 * 8)) > accel_state->vb_total) {
R600DoneSolid(pPix);
+ if (info->cs)
+ radeon_cs_flush_indirect(pScrn);
r600_cp_start(pScrn);
}
- vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*8);
+ vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_offset);
vb[0] = (float)x1;
vb[1] = (float)y1;
@@ -387,7 +390,7 @@ R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
vb[4] = (float)x2;
vb[5] = (float)y2;
- accel_state->vb_index += 3;
+ accel_state->vb_offset += (3*8);
}
@@ -403,13 +406,13 @@ R600DoneSolid(PixmapPtr pPix)
CLEAR (draw_conf);
CLEAR (vtx_res);
- if (accel_state->vb_index == 0) {
+ if (accel_state->vb_offset == 0) {
R600IBDiscard(pScrn, accel_state->ib);
r600_vb_discard(pScrn);
return;
}
- accel_state->vb_size = accel_state->vb_index * 8;
+ accel_state->vb_size = accel_state->vb_offset;
/* flush vertex cache */
if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
@@ -426,11 +429,12 @@ R600DoneSolid(PixmapPtr pPix)
accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0);
/* Vertex buffer setup */
+ accel_state->vb_size -= accel_state->vb_start_op;
vtx_res.id = SQ_VTX_RESOURCE_vs;
vtx_res.vtx_size_dw = 8 / 4;
vtx_res.vtx_num_entries = accel_state->vb_size / 4;
vtx_res.mem_req_size = 1;
- vtx_res.vb_addr = accel_state->vb_mc_addr;
+ vtx_res.vb_addr = accel_state->vb_mc_addr + accel_state->vb_start_op;
vtx_res.bo = accel_state->vb_bo;
set_vtx_resource (pScrn, accel_state->ib, &vtx_res);
@@ -450,6 +454,8 @@ R600DoneSolid(PixmapPtr pPix)
accel_state->dst_size, accel_state->dst_mc_addr,
accel_state->dst_bo, RADEON_GEM_DOMAIN_VRAM, 0);
+ accel_state->vb_start_op = 0;
+
R600CPFlushIndirect(pScrn, accel_state->ib);
}
@@ -651,6 +657,8 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn,
SEL_CENTROID_bit));
EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0);
END_BATCH();
+
+ accel_state->vb_start_op = accel_state->vb_offset;
}
static void
@@ -664,13 +672,13 @@ R600DoCopy(ScrnInfoPtr pScrn)
CLEAR (draw_conf);
CLEAR (vtx_res);
- if (accel_state->vb_index == 0) {
+ if (accel_state->vb_offset == 0) {
R600IBDiscard(pScrn, accel_state->ib);
r600_vb_discard(pScrn);
return;
}
- accel_state->vb_size = accel_state->vb_index * 16;
+ accel_state->vb_size = accel_state->vb_offset;
/* flush vertex cache */
if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
@@ -687,11 +695,13 @@ R600DoCopy(ScrnInfoPtr pScrn)
accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0);
/* Vertex buffer setup */
+ accel_state->vb_size -= accel_state->vb_start_op;
+
vtx_res.id = SQ_VTX_RESOURCE_vs;
vtx_res.vtx_size_dw = 16 / 4;
vtx_res.vtx_num_entries = accel_state->vb_size / 4;
vtx_res.mem_req_size = 1;
- vtx_res.vb_addr = accel_state->vb_mc_addr;
+ vtx_res.vb_addr = accel_state->vb_mc_addr + accel_state->vb_start_op;
vtx_res.bo = accel_state->vb_bo;
set_vtx_resource (pScrn, accel_state->ib, &vtx_res);
@@ -710,6 +720,7 @@ R600DoCopy(ScrnInfoPtr pScrn)
accel_state->dst_size, accel_state->dst_mc_addr,
accel_state->dst_bo, RADEON_GEM_DOMAIN_VRAM, 0);
+ accel_state->vb_start_op = 0;
R600CPFlushIndirect(pScrn, accel_state->ib);
}
@@ -723,12 +734,14 @@ R600AppendCopyVertex(ScrnInfoPtr pScrn,
struct radeon_accel_state *accel_state = info->accel_state;
float *vb;
- if (((accel_state->vb_index + 3) * 16) > accel_state->vb_total) {
+ if ((accel_state->vb_offset + (3 * 16)) > accel_state->vb_total) {
R600DoCopy(pScrn);
+ if (info->cs)
+ radeon_cs_flush_indirect(pScrn);
r600_cp_start(pScrn);
}
- vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*16);
+ vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_offset);
vb[0] = (float)dstX;
vb[1] = (float)dstY;
@@ -745,7 +758,7 @@ R600AppendCopyVertex(ScrnInfoPtr pScrn,
vb[10] = (float)(srcX + w);
vb[11] = (float)(srcY + h);
- accel_state->vb_index += 3;
+ accel_state->vb_offset += (3 * 16);
}
static Bool
@@ -1888,6 +1901,8 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0);
END_BATCH();
+ accel_state->vb_start_op = accel_state->vb_offset;
+
return TRUE;
}
@@ -1906,12 +1921,14 @@ static void R600Composite(PixmapPtr pDst,
srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
if (accel_state->msk_pic) {
- if (((accel_state->vb_index + 3) * 24) > accel_state->vb_total) {
+ if ((accel_state->vb_offset + (3 * 24)) > accel_state->vb_total) {
R600DoneComposite(pDst);
+ if (info->cs)
+ radeon_cs_flush_indirect(pScrn);
r600_cp_start(pScrn);
}
- vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*24);
+ vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_offset);
vb[0] = (float)dstX;
vb[1] = (float)dstY;
@@ -1934,13 +1951,16 @@ static void R600Composite(PixmapPtr pDst,
vb[16] = (float)(maskX + w);
vb[17] = (float)(maskY + h);
+ accel_state->vb_offset += 3 * 24;
} else {
- if (((accel_state->vb_index + 3) * 16) > accel_state->vb_total) {
+ if ((accel_state->vb_offset + (3 * 16)) > accel_state->vb_total) {
R600DoneComposite(pDst);
+ if (info->cs)
+ radeon_cs_flush_indirect(pScrn);
r600_cp_start(pScrn);
}
- vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*16);
+ vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_offset);
vb[0] = (float)dstX;
vb[1] = (float)dstY;
@@ -1956,9 +1976,10 @@ static void R600Composite(PixmapPtr pDst,
vb[9] = (float)(dstY + h);
vb[10] = (float)(srcX + w);
vb[11] = (float)(srcY + h);
+ accel_state->vb_offset += 3 * 16;
+
}
- accel_state->vb_index += 3;
}
@@ -1973,30 +1994,13 @@ static void R600DoneComposite(PixmapPtr pDst)
CLEAR (draw_conf);
CLEAR (vtx_res);
- if (accel_state->vb_index == 0) {
+ if (accel_state->vb_offset == 0) {
R600IBDiscard(pScrn, accel_state->ib);
r600_vb_discard(pScrn);
return;
}
- /* Vertex buffer setup */
- if (accel_state->msk_pic) {
- accel_state->vb_size = accel_state->vb_index * 24;
- vtx_res.id = SQ_VTX_RESOURCE_vs;
- vtx_res.vtx_size_dw = 24 / 4;
- vtx_res.vtx_num_entries = accel_state->vb_size / 4;
- vtx_res.mem_req_size = 1;
- vtx_res.vb_addr = accel_state->vb_mc_addr;
- vtx_res.bo = accel_state->vb_bo;
- } else {
- accel_state->vb_size = accel_state->vb_index * 16;
- vtx_res.id = SQ_VTX_RESOURCE_vs;
- vtx_res.vtx_size_dw = 16 / 4;
- vtx_res.vtx_num_entries = accel_state->vb_size / 4;
- vtx_res.mem_req_size = 1;
- vtx_res.vb_addr = accel_state->vb_mc_addr;
- vtx_res.bo = accel_state->vb_bo;
- }
+ accel_state->vb_size = accel_state->vb_offset;
/* flush vertex cache */
if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
(info->ChipFamily == CHIP_FAMILY_RV620) ||
@@ -2011,6 +2015,24 @@ static void R600DoneComposite(PixmapPtr pDst)
accel_state->vb_size, accel_state->vb_mc_addr,
accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0);
+ accel_state->vb_size -= accel_state->vb_start_op;
+
+ /* Vertex buffer setup */
+ if (accel_state->msk_pic) {
+ vtx_res.id = SQ_VTX_RESOURCE_vs;
+ vtx_res.vtx_size_dw = 24 / 4;
+ vtx_res.vtx_num_entries = accel_state->vb_size / 4;
+ vtx_res.mem_req_size = 1;
+ vtx_res.vb_addr = accel_state->vb_mc_addr + accel_state->vb_start_op;
+ vtx_res.bo = accel_state->vb_bo;
+ } else {
+ vtx_res.id = SQ_VTX_RESOURCE_vs;
+ vtx_res.vtx_size_dw = 16 / 4;
+ vtx_res.vtx_num_entries = accel_state->vb_size / 4;
+ vtx_res.mem_req_size = 1;
+ vtx_res.vb_addr = accel_state->vb_mc_addr + accel_state->vb_start_op;
+ vtx_res.bo = accel_state->vb_bo;
+ }
set_vtx_resource(pScrn, accel_state->ib, &vtx_res);
draw_conf.prim_type = DI_PT_RECTLIST;
@@ -2027,8 +2049,8 @@ static void R600DoneComposite(PixmapPtr pDst)
accel_state->dst_size, accel_state->dst_mc_addr,
accel_state->dst_bo, RADEON_GEM_DOMAIN_VRAM, 0);
+ accel_state->vb_start_op = 0;
R600CPFlushIndirect(pScrn, accel_state->ib);
-
}
Bool
@@ -2336,6 +2358,9 @@ R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
3, 0xffffffff);
R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h);
R600DoCopy(pScrn);
+
+ if (info->cs)
+ radeon_cs_flush_indirect(pScrn);
r = radeon_bo_map(scratch, 0);
if (r) {
diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c
index 7598429f..7d0cfa78 100644
--- a/src/r600_textured_videofuncs.c
+++ b/src/r600_textured_videofuncs.c
@@ -65,13 +65,13 @@ R600DoneTexturedVideo(ScrnInfoPtr pScrn)
CLEAR (draw_conf);
CLEAR (vtx_res);
- if (accel_state->vb_index == 0) {
+ if (accel_state->vb_offset == 0) {
R600IBDiscard(pScrn, accel_state->ib);
r600_vb_discard(pScrn);
return;
}
- accel_state->vb_size = accel_state->vb_index * 16;
+ accel_state->vb_size = accel_state->vb_offset;
/* flush vertex cache */
if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
@@ -88,11 +88,12 @@ R600DoneTexturedVideo(ScrnInfoPtr pScrn)
accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0);
/* Vertex buffer setup */
+ accel_state->vb_size -= accel_state->vb_start_op;
vtx_res.id = SQ_VTX_RESOURCE_vs;
vtx_res.vtx_size_dw = 16 / 4;
vtx_res.vtx_num_entries = accel_state->vb_size / 4;
vtx_res.mem_req_size = 1;
- vtx_res.vb_addr = accel_state->vb_mc_addr;
+ vtx_res.vb_addr = accel_state->vb_mc_addr + accel_state->vb_start_op;
vtx_res.bo = accel_state->vb_bo;
set_vtx_resource (pScrn, accel_state->ib, &vtx_res);
@@ -111,6 +112,7 @@ R600DoneTexturedVideo(ScrnInfoPtr pScrn)
accel_state->dst_size, accel_state->dst_mc_addr,
accel_state->dst_bo, 0, RADEON_GEM_DOMAIN_VRAM);
+ accel_state->vb_start_op = 0;
R600CPFlushIndirect(pScrn, accel_state->ib);
}
@@ -564,6 +566,8 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0);
END_BATCH();
+ accel_state->vb_start_op = accel_state->vb_offset;
+
vs_alu_consts[0] = 1.0 / pPriv->w;
vs_alu_consts[1] = 1.0 / pPriv->h;
vs_alu_consts[2] = 0.0;
@@ -595,12 +599,14 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
int dstX, dstY, dstw, dsth;
float *vb;
- if (((accel_state->vb_index + 3) * 16) > accel_state->vb_total) {
+ if ((accel_state->vb_offset + (3 * 16)) > accel_state->vb_total) {
R600DoneTexturedVideo(pScrn);
+ if (info->cs)
+ radeon_cs_flush_indirect(pScrn);
r600_cp_start(pScrn);
}
- vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*16);
+ vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_offset);
dstX = pBox->x1 + dstxoff;
dstY = pBox->y1 + dstyoff;
@@ -632,7 +638,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
vb[10] = (float)(srcX + srcw);
vb[11] = (float)(srcY + srch);
- accel_state->vb_index += 3;
+ accel_state->vb_offset += 3 * 16;
pBox++;
}
diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c
index a89bfb3f..50afaed6 100644
--- a/src/r6xx_accel.c
+++ b/src/r6xx_accel.c
@@ -86,6 +86,8 @@ void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib)
if (info->accel_state->vb_ptr) {
radeon_bo_unmap(info->accel_state->vb_bo);
info->accel_state->vb_ptr = NULL;
+ info->accel_state->vb_offset = 0;
+ info->accel_state->vb_start_op = 0;
}
if (CS_FULL(info->cs)) {
radeon_cs_flush_indirect(pScrn);
@@ -1183,8 +1185,8 @@ r600_vb_get(ScrnInfoPtr pScrn)
accel_state->vb_total = (accel_state->ib->total / 2);
accel_state->vb_ptr = (pointer)((char*)accel_state->ib->address +
(accel_state->ib->total / 2));
+ accel_state->vb_offset = 0;
}
- accel_state->vb_index = 0;
return TRUE;
}
diff --git a/src/radeon.h b/src/radeon.h
index 0dbaa52a..745ee8e5 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -703,12 +703,13 @@ struct radeon_accel_state {
Bool vsync;
drmBufPtr ib;
- int vb_index;
+ int vb_offset;
uint64_t vb_mc_addr;
int vb_total;
void *vb_ptr;
uint32_t vb_size;
struct radeon_bo *vb_bo;
+ uint32_t vb_start_op;
// shader storage
ExaOffscreenArea *shaders;
diff --git a/src/radeon_kms.c b/src/radeon_kms.c
index cafc3298..b9228c11 100644
--- a/src/radeon_kms.c
+++ b/src/radeon_kms.c
@@ -80,8 +80,10 @@ void radeon_cs_flush_indirect(ScrnInfoPtr pScrn)
return;
if (info->accel_state->vb_ptr) {
- radeon_bo_unmap(info->accel_state->vb_bo);
- info->accel_state->vb_ptr = NULL;
+ radeon_bo_unmap(info->accel_state->vb_bo);
+ info->accel_state->vb_ptr = NULL;
+ info->accel_state->vb_start_op = 0;
+ info->accel_state->vb_offset = 0;
}
radeon_cs_emit(info->cs);