summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Graunke <kenneth@whitecape.org>2011-04-12 15:42:06 -0700
committerKenneth Graunke <kenneth@whitecape.org>2011-04-18 15:26:34 -0700
commit42a805700039e81a9245f46f153e2cd9705cd0d7 (patch)
tree7e0f0683df5a1fa4f44fb8e9af4c8c065fdc573e
parent3d5cfcfed16c5a79bdf67027afe4ea8058b899cb (diff)
i965: Allocate the whole URB to the VS and fix calculations for Gen6.
Since we never enable the GS on Sandybridge, there's no need to allocate it any URB space. Furthermore, the previous calculation was incorrect: it neglected to multiply by nr_vs_entries, instead comparing whether twice the size of a single VS URB entry was bigger than the entire URB space. It also neglected to take into account that vs_size is in units of 128 byte blocks, while urb_size is in bytes. Despite the above problems, the calculations resulted in an acceptable programming of the URB in most cases, at least on GT2. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Eric Anholt <eric@anholt.net>
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c5
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h5
-rw-r--r--src/mesa/drivers/dri/i965/gen6_urb.c34
3 files changed, 25 insertions, 19 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index a74ba5c90b8..230d326fa12 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -182,7 +182,6 @@ GLboolean brwCreateContext( int api,
/* WM maximum threads is number of EUs times number of threads per EU. */
if (intel->gen >= 6) {
- brw->urb.size = 1024;
if (IS_GT2(intel->intelScreen->deviceID)) {
/* This could possibly be 80, but is supposed to require
* disabling of WIZ hashing (bit 6 of GT_MODE, 0x20d0) and a
@@ -190,9 +189,13 @@ GLboolean brwCreateContext( int api,
*/
brw->wm_max_threads = 40;
brw->vs_max_threads = 60;
+ brw->urb.size = 64; /* volume 5c.5 section 5.1 */
+ brw->urb.max_vs_handles = 128; /* volume 2a (see 3DSTATE_URB) */
} else {
brw->wm_max_threads = 40;
brw->vs_max_threads = 24;
+ brw->urb.size = 32; /* volume 5c.5 section 5.1 */
+ brw->urb.max_vs_handles = 256; /* volume 2a (see 3DSTATE_URB) */
}
} else if (intel->gen == 5) {
brw->urb.size = 1024;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 4b4dfbafb1a..1daa49abfb3 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -568,6 +568,9 @@ struct brw_context
GLboolean constrained;
+ GLuint max_vs_handles; /* Maximum number of VS handles */
+ GLuint max_gs_handles; /* Maximum number of GS handles */
+
GLuint nr_vs_entries;
GLuint nr_gs_entries;
GLuint nr_clip_entries;
@@ -579,7 +582,7 @@ struct brw_context
* a number of 1024-bit (128-byte) rows. Should be >= 1.
*/
GLuint vs_size;
-/* GLuint gs_size; */
+ GLuint gs_size;
GLuint vs_start;
GLuint gs_start;
diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c
index c3819f9b360..909e1bbe9ba 100644
--- a/src/mesa/drivers/dri/i965/gen6_urb.c
+++ b/src/mesa/drivers/dri/i965/gen6_urb.c
@@ -34,26 +34,25 @@
static void
prepare_urb( struct brw_context *brw )
{
- int urb_size, max_urb_entry;
- struct intel_context *intel = &brw->intel;
-
- if (IS_GT1(intel->intelScreen->deviceID)) {
- urb_size = 32 * 1024;
- max_urb_entry = 128;
- } else {
- urb_size = 64 * 1024;
- max_urb_entry = 256;
- }
-
- brw->urb.nr_vs_entries = max_urb_entry;
- brw->urb.nr_gs_entries = max_urb_entry;
+ int nr_vs_entries;
/* CACHE_NEW_VS_PROG */
brw->urb.vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1);
- if (2 * brw->urb.vs_size > urb_size)
- brw->urb.nr_vs_entries = brw->urb.nr_gs_entries =
- (urb_size ) / (2 * brw->urb.vs_size);
+ /* Calculate how many VS URB entries fit in the total URB size */
+ nr_vs_entries = (brw->urb.size * 1024) / (brw->urb.vs_size * 128);
+
+ if (nr_vs_entries > brw->urb.max_vs_handles)
+ nr_vs_entries = brw->urb.max_vs_handles;
+
+ /* According to volume 2a, nr_vs_entries must be a multiple of 4. */
+ brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 4);
+
+ /* Since we currently don't support Geometry Shaders, we always put the
+ * GS unit in passthrough mode and don't allocate it any URB space.
+ */
+ brw->urb.nr_gs_entries = 0;
+ brw->urb.gs_size = 1; /* Incorrect, but with 0 GS entries it doesn't matter. */
}
static void
@@ -61,6 +60,7 @@ upload_urb(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
+ assert(brw->urb.nr_vs_entries >= 24);
assert(brw->urb.nr_vs_entries % 4 == 0);
assert(brw->urb.nr_gs_entries % 4 == 0);
/* GS requirement */
@@ -70,7 +70,7 @@ upload_urb(struct brw_context *brw)
OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2));
OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) |
((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT));
- OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) |
+ OUT_BATCH(((brw->urb.gs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) |
((brw->urb.nr_gs_entries) << GEN6_URB_GS_ENTRIES_SHIFT));
ADVANCE_BATCH();
}