summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPaul Berry <stereotype441@gmail.com>2011-08-31 15:04:23 -0700
committerPaul Berry <stereotype441@gmail.com>2011-09-06 11:03:49 -0700
commit45f1d7a66666d849031ffc2b8647149e17cc13bc (patch)
tree860631edab8c820adc87745517d02ba7a3a4a9c3 /src
parent6489a1d5bab75589569658d374257bf23cb67a23 (diff)
i965: Write code to compute a VUE map.
Several places in the i965 code make implicit assumptions about the structure of data in the VUE (vertex URB entry). This patch adds a function, brw_compute_vue_map(), which computes the structure of the VUE explicitly. Future patches will modify the rest of the driver to use the explicitly computed map rather than rely on implicit assumptions about it. Reviewed-by: Eric Anholt <eric@anholt.net>
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h58
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c110
2 files changed, 168 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 69821d91914..bf15fd6cfcf 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -281,6 +281,59 @@ struct brw_wm_prog_data {
enum param_conversion pull_param_convert[MAX_UNIFORMS * 4];
};
+/**
+ * Enum representing the i965-specific vertex results that don't correspond
+ * exactly to any element of gl_vert_result. The values of this enum are
+ * assigned such that they don't conflict with gl_vert_result.
+ */
+typedef enum
+{
+ BRW_VERT_RESULT_NDC = VERT_RESULT_MAX,
+ BRW_VERT_RESULT_HPOS_DUPLICATE,
+ BRW_VERT_RESULT_CLIP0,
+ BRW_VERT_RESULT_CLIP1,
+ BRW_VERT_RESULT_PAD,
+ BRW_VERT_RESULT_MAX
+} brw_vert_result;
+
+
+/**
+ * Data structure recording the relationship between the gl_vert_result enum
+ * and "slots" within the vertex URB entry (VUE). A "slot" is defined as a
+ * single octaword within the VUE (128 bits).
+ *
+ * Note that each BRW register contains 256 bits (2 octawords), so when
+ * accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two
+ * consecutive VUE slots. When accessing the VUE in URB_INTERLEAVED mode (as
+ * in a vertex shader), each register corresponds to a single VUE slot, since
+ * it contains data for two separate vertices.
+ */
+struct brw_vue_map {
+ /**
+ * Map from gl_vert_result value to VUE slot. For gl_vert_results that are
+ * not stored in a slot (because they are not written, or because
+ * additional processing is applied before storing them in the VUE), the
+ * value is -1.
+ */
+ int vert_result_to_slot[BRW_VERT_RESULT_MAX];
+
+ /**
+ * Map from VUE slot to gl_vert_result value. For slots that do not
+ * directly correspond to a gl_vert_result, the value comes from
+ * brw_vert_result.
+ *
+ * For slots that are not in use, the value is BRW_VERT_RESULT_MAX (this
+ * simplifies code that uses the value stored in slot_to_vert_result to
+ * create a bit mask).
+ */
+ int slot_to_vert_result[BRW_VERT_RESULT_MAX];
+
+ /**
+ * Total number of VUE slots in use
+ */
+ int num_slots;
+};
+
struct brw_sf_prog_data {
GLuint urb_read_length;
GLuint total_grf;
@@ -893,6 +946,11 @@ void brw_upload_cs_urb_state(struct brw_context *brw);
/* brw_disasm.c */
int brw_disasm (FILE *file, struct brw_instruction *inst, int gen);
+/* brw_vs.c */
+void brw_compute_vue_map(struct brw_vue_map *vue_map,
+ const struct intel_context *intel, int nr_userclip,
+ bool two_side_color, GLbitfield64 outputs_written);
+
/*======================================================================
* Inline conversion functions. These are better-typed than the
* macros used previously:
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 3fb389aa407..de19ded2fb0 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -40,6 +40,116 @@
#include "glsl/ralloc.h"
+static inline void assign_vue_slot(struct brw_vue_map *vue_map,
+ int vert_result)
+{
+ /* Make sure this vert_result hasn't been assigned a slot already */
+ assert (vue_map->vert_result_to_slot[vert_result] == -1);
+
+ vue_map->vert_result_to_slot[vert_result] = vue_map->num_slots;
+ vue_map->slot_to_vert_result[vue_map->num_slots++] = vert_result;
+}
+
+/**
+ * Compute the VUE map for vertex shader program.
+ */
+void
+brw_compute_vue_map(struct brw_vue_map *vue_map,
+ const struct intel_context *intel, int nr_userclip,
+ bool two_side_color, GLbitfield64 outputs_written)
+{
+ int i;
+
+ vue_map->num_slots = 0;
+ for (i = 0; i < BRW_VERT_RESULT_MAX; ++i) {
+ vue_map->vert_result_to_slot[i] = -1;
+ vue_map->slot_to_vert_result[i] = BRW_VERT_RESULT_MAX;
+ }
+
+ /* VUE header: format depends on chip generation and whether clipping is
+ * enabled.
+ */
+ switch (intel->gen) {
+ case 4:
+ /* There are 8 dwords in VUE header pre-Ironlake:
+ * dword 0-3 is indices, point width, clip flags.
+ * dword 4-7 is ndc position
+ * dword 8-11 is the first vertex data.
+ */
+ assign_vue_slot(vue_map, VERT_RESULT_PSIZ);
+ assign_vue_slot(vue_map, BRW_VERT_RESULT_NDC);
+ assign_vue_slot(vue_map, VERT_RESULT_HPOS);
+ break;
+ case 5:
+ /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
+ * dword 0-3 of the header is indices, point width, clip flags.
+ * dword 4-7 is the ndc position
+ * dword 8-11 of the vertex header is the 4D space position
+ * dword 12-19 of the vertex header is the user clip distance.
+ * dword 20-23 is a pad so that the vertex element data is aligned
+ * dword 24-27 is the first vertex data we fill.
+ *
+ * Note: future pipeline stages expect 4D space position to be
+ * contiguous with the other vert_results, so we make dword 24-27 a
+ * duplicate copy of the 4D space position.
+ */
+ assign_vue_slot(vue_map, VERT_RESULT_PSIZ);
+ assign_vue_slot(vue_map, BRW_VERT_RESULT_NDC);
+ assign_vue_slot(vue_map, BRW_VERT_RESULT_HPOS_DUPLICATE);
+ assign_vue_slot(vue_map, BRW_VERT_RESULT_CLIP0);
+ assign_vue_slot(vue_map, BRW_VERT_RESULT_CLIP1);
+ assign_vue_slot(vue_map, BRW_VERT_RESULT_PAD);
+ assign_vue_slot(vue_map, VERT_RESULT_HPOS);
+ break;
+ case 6:
+ case 7:
+ /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
+ * dword 0-3 of the header is indices, point width, clip flags.
+ * dword 4-7 is the 4D space position
+ * dword 8-15 of the vertex header is the user clip distance if
+ * enabled.
+ * dword 8-11 or 16-19 is the first vertex element data we fill.
+ */
+ assign_vue_slot(vue_map, VERT_RESULT_PSIZ);
+ assign_vue_slot(vue_map, VERT_RESULT_HPOS);
+ if (nr_userclip) {
+ assign_vue_slot(vue_map, BRW_VERT_RESULT_CLIP0);
+ assign_vue_slot(vue_map, BRW_VERT_RESULT_CLIP1);
+ }
+ if (two_side_color) {
+ /* front and back colors need to be consecutive */
+ if ((outputs_written & BITFIELD64_BIT(VERT_RESULT_COL1)) &&
+ (outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC1))) {
+ assert(outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0));
+ assert(outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0));
+ assign_vue_slot(vue_map, VERT_RESULT_COL0);
+ assign_vue_slot(vue_map, VERT_RESULT_BFC0);
+ assign_vue_slot(vue_map, VERT_RESULT_COL1);
+ assign_vue_slot(vue_map, VERT_RESULT_BFC1);
+ } else if ((outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)) &&
+ (outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0))) {
+ assign_vue_slot(vue_map, VERT_RESULT_COL0);
+ assign_vue_slot(vue_map, VERT_RESULT_BFC0);
+ }
+ }
+ break;
+ default:
+ assert (!"VUE map not known for this chip generation");
+ break;
+ }
+
+ /* The hardware doesn't care about the rest of the vertex outputs, so just
+ * assign them contiguously. Don't reassign outputs that already have a
+ * slot.
+ */
+ for (int i = 0; i < VERT_RESULT_MAX; ++i) {
+ if ((outputs_written & BITFIELD64_BIT(i)) &&
+ vue_map->vert_result_to_slot[i] == -1) {
+ assign_vue_slot(vue_map, i);
+ }
+ }
+}
+
static bool
do_vs_prog(struct brw_context *brw,
struct gl_shader_program *prog,