summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Graunke <kenneth@whitecape.org>2015-09-30 16:11:44 -0700
committerKenneth Graunke <kenneth@whitecape.org>2015-11-04 10:18:56 -0800
commitb672ca12894fc91f441191637ed801730269e2a9 (patch)
treed58e4fb65316989684e029b7fdc09c3b6ca95d14
parent95a5ebff7ed8928e9c8e7701d60e431850480161 (diff)
i965: Implement tessellation shaders.
Written by Chris Forbes, Fabian Bieler, and Kenneth Graunke.
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources4
-rw-r--r--src/mesa/drivers/dri/i965/brw_compiler.h53
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h21
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h33
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c11
-rw-r--r--src/mesa/drivers/dri/i965/brw_ds.c228
-rw-r--r--src/mesa/drivers/dri/i965/brw_ds.h42
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h8
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_hs.c241
-rw-r--r--src/mesa/drivers/dri/i965/brw_hs.h42
-rw-r--r--src/mesa/drivers/dri/i965/brw_link.cpp8
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.c125
-rw-r--r--src/mesa/drivers/dri/i965/brw_reg.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp29
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_cache.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c12
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp13
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h10
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_cse.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp5
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_ds_visitor.cpp295
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_ds_visitor.h79
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_generator.cpp305
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_hs_visitor.cpp426
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_hs_visitor.h113
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_nir.cpp52
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp8
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_vs_state.c1
-rw-r--r--src/mesa/drivers/dri/i965/gen7_hs_state.c29
-rw-r--r--src/mesa/drivers/dri/i965/gen7_te_state.c37
-rw-r--r--src/mesa/drivers/dri/i965/gen7_urb.c1
-rw-r--r--src/mesa/drivers/dri/i965/gen8_ds_state.c74
-rw-r--r--src/mesa/drivers/dri/i965/gen8_hs_state.c91
39 files changed, 2335 insertions, 82 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index abacb996cb..2fd87395bf 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -62,10 +62,12 @@ i965_compiler_FILES = \
brw_vec4.cpp \
brw_vec4_cse.cpp \
brw_vec4_dead_code_eliminate.cpp \
+ brw_vec4_ds_visitor.cpp \
brw_vec4_generator.cpp \
brw_vec4_gs_visitor.cpp \
brw_vec4_gs_visitor.h \
brw_vec4.h \
+ brw_vec4_hs_visitor.cpp \
brw_vec4_live_variables.cpp \
brw_vec4_live_variables.h \
brw_vec4_nir.cpp \
@@ -114,6 +116,7 @@ i965_FILES = \
brw_draw.c \
brw_draw.h \
brw_draw_upload.c \
+ brw_ds.c \
brw_ds_surface_state.c \
brw_ff_gs.c \
brw_ff_gs_emit.c \
@@ -122,6 +125,7 @@ i965_FILES = \
brw_gs.h \
brw_gs_state.c \
brw_gs_surface_state.c \
+ brw_hs.c \
brw_hs_surface_state.c \
brw_link.cpp \
brw_meta_fast_clear.c \
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index c547ad50b0..406140b0b7 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -184,6 +184,24 @@ struct brw_vs_prog_key {
struct brw_sampler_prog_key_data tex;
};
+/** The program key for Hull (Tessellation Control) Shaders. */
+struct brw_hs_prog_key
+{
+ unsigned program_string_id;
+
+ GLenum ds_primitive_mode;
+
+ struct brw_sampler_prog_key_data tex;
+};
+
+/** The program key for Domain (Tessellation Evaluation) Shaders. */
+struct brw_ds_prog_key
+{
+ unsigned program_string_id;
+
+ struct brw_sampler_prog_key_data tex;
+};
+
/** The program key for Geometry Shaders. */
struct brw_gs_prog_key
{
@@ -543,6 +561,25 @@ struct brw_hs_prog_data
struct brw_ds_prog_data
{
struct brw_vue_prog_data base;
+
+ enum {
+ integer,
+ odd_fractional,
+ even_fractional
+ } partitioning;
+
+ enum {
+ point,
+ line,
+ tri_cw,
+ tri_ccw
+ } output_topology;
+
+ enum {
+ quad,
+ tri,
+ isoline
+ } domain;
};
struct brw_gs_prog_data
@@ -631,6 +668,22 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
char **error_str);
/**
+ * Compile a tessellation evaluation (domain) shader.
+ *
+ * Returns the final assembly and the program's size.
+ */
+const unsigned *
+brw_compile_ds(const struct brw_compiler *compiler, void *log_data,
+ void *mem_ctx,
+ const struct brw_ds_prog_key *key,
+ struct brw_ds_prog_data *prog_data,
+ const struct nir_shader *shader,
+ struct gl_shader_program *shader_prog,
+ int shader_time_index,
+ unsigned *final_assembly_size,
+ char **error_str);
+
+/**
* Compile a vertex shader.
*
* Returns the final assembly and the program's size.
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 0f72ce9d1a..f090851713 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -563,6 +563,8 @@ struct brw_tracked_state {
enum shader_time_shader_type {
ST_NONE,
ST_VS,
+ ST_HS,
+ ST_DS,
ST_GS,
ST_FS8,
ST_FS16,
@@ -1089,6 +1091,13 @@ struct brw_context
* pipeline enabled the HS; false otherwise.
*/
bool enabled;
+
+ /**
+ * A reference to upload buffer section that contains gl_PatchVerticesIn
+ * (which we have to upload as an extra constant).
+ */
+ drm_intel_bo *patch_vertices_in_bo;
+ uint32_t patch_vertices_in_offset;
} hs;
struct {
@@ -1682,6 +1691,18 @@ brw_vertex_program_const(const struct gl_vertex_program *p)
return (const struct brw_vertex_program *) p;
}
+static inline struct brw_tess_ctrl_program *
+brw_tess_ctrl_program(struct gl_tess_ctrl_program *p)
+{
+ return (struct brw_tess_ctrl_program *) p;
+}
+
+static inline struct brw_tess_eval_program *
+brw_tess_eval_program(struct gl_tess_eval_program *p)
+{
+ return (struct brw_tess_eval_program *) p;
+}
+
static inline struct brw_geometry_program *
brw_geometry_program(struct gl_geometry_program *p)
{
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index e8be55dfa2..44c9609dc6 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1266,6 +1266,19 @@ enum opcode {
* Calculate the high 32-bits of a 32x32 multiply.
*/
SHADER_OPCODE_MULH,
+
+ HS_OPCODE_GET_INSTANCE_ID,
+ HS_OPCODE_URB_WRITE,
+ VEC4_OPCODE_URB_READ,
+ HS_OPCODE_SET_INPUT_URB_OFFSETS,
+ HS_OPCODE_SET_OUTPUT_URB_OFFSETS,
+ DS_OPCODE_CREATE_INPUT_READ_HEADER,
+ DS_OPCODE_ADD_INDIRECT_URB_OFFSET,
+
+ DS_OPCODE_GET_PRIMITIVE_ID,
+ HS_OPCODE_GET_PRIMITIVE_ID,
+
+ HS_OPCODE_CREATE_BARRIER_HEADER,
};
enum brw_urb_write_flags {
@@ -1891,8 +1904,13 @@ enum brw_message_target {
/* Gen7 "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size
* is 2^9, or 512. It's counted in multiples of 64 bytes.
+ *
+ * Identical for HS and DS (and VS)
*/
#define GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES (512*64)
+#define GEN7_MAX_HS_URB_ENTRY_SIZE_BYTES (512*64)
+#define GEN7_MAX_DS_URB_ENTRY_SIZE_BYTES (512*64)
+
/* Gen6 "GS URB Entry Allocation Size" is defined as a number of 1024-bit
* (128 bytes) URB rows and the maximum allowed value is 5 rows.
*/
@@ -2034,6 +2052,21 @@ enum brw_message_target {
# define GEN7_HS_URB_ENTRY_READ_OFFSET_MASK INTEL_MASK(9, 4)
# define GEN7_HS_URB_ENTRY_READ_OFFSET_SHIFT 4
+/* HS Thread Payload
+ */
+/* R0 */
+# define GEN7_HS_PAYLOAD_INSTANCE_NUMBER_MASK INTEL_MASK(22, 16)
+# define GEN7_HS_PAYLOAD_INSTANCE_NUMBER_WIDTH (22 - 16 + 1)
+# define GEN7_HS_PAYLOAD_INSTANCE_NUMBER_SHIFT 16
+
+# define GEN7_HS_PAYLOAD_SEMAPHORE_HANDLE_MASK INTEL_MASK(11, 0)
+
+# define HSW_HS_PAYLOAD_INSTANCE_NUMBER_MASK INTEL_MASK(23, 17)
+# define HSW_HS_PAYLOAD_INSTANCE_NUMBER_WIDTH (23 - 17 + 1)
+# define HSW_HS_PAYLOAD_INSTANCE_NUMBER_SHIFT 17
+
+# define HSW_HS_PAYLOAD_SEMAPHORE_HANDLE_MASK INTEL_MASK(12, 0)
+
#define _3DSTATE_TE 0x781C /* GEN7+ */
/* DW1 */
# define GEN7_TE_PARTITIONING_SHIFT 12
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index a0a3cebf0c..cbf9954bbc 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -145,14 +145,21 @@ gen6_set_prim(struct brw_context *brw, const struct _mesa_prim *prim)
DBG("PRIM: %s\n", _mesa_enum_to_string(prim->mode));
- if (prim->mode == GL_PATCHES)
+ if (prim->mode == GL_PATCHES) {
hw_prim = _3DPRIM_PATCHLIST(ctx->TessCtrlProgram.patch_vertices);
- else
+ brw->ctx.NewDriverState |= BRW_NEW_PRIMITIVE | BRW_NEW_HS_PROG_DATA;
+ } else {
hw_prim = get_hw_prim_for_gl_prim(prim->mode);
+ }
if (hw_prim != brw->primitive) {
brw->primitive = hw_prim;
brw->ctx.NewDriverState |= BRW_NEW_PRIMITIVE;
+ if (prim->mode == GL_PATCHES) {
+ intel_upload_data(brw, &ctx->TessCtrlProgram.patch_vertices, 4, 4,
+ &brw->hs.patch_vertices_in_bo,
+ &brw->hs.patch_vertices_in_offset);
+ }
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_ds.c b/src/mesa/drivers/dri/i965/brw_ds.c
new file mode 100644
index 0000000000..299a17bbd4
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_ds.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file brw_vec4_ds.c
+ *
+ * State atom for client-programmable tessellation evaluation shaders, and support code.
+ */
+
+#include "brw_context.h"
+#include "brw_ds.h"
+#include "brw_nir.h"
+#include "brw_state.h"
+#include "brw_vec4_ds_visitor.h"
+
+
+static bool
+brw_codegen_ds_prog(struct brw_context *brw,
+ struct gl_shader_program *shader_prog,
+ struct brw_tess_eval_program *dp,
+ struct brw_ds_prog_key *key)
+{
+ const struct brw_compiler *compiler = brw->intelScreen->compiler;
+ const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
+ struct brw_stage_state *stage_state = &brw->ds.base;
+ nir_shader *nir = dp->program.Base.nir;
+ struct brw_ds_prog_data prog_data;
+ memset(&prog_data, 0, sizeof(prog_data));
+
+ brw_assign_common_binding_table_offsets(MESA_SHADER_TESS_EVAL, devinfo,
+ shader_prog, &dp->program.Base,
+ &prog_data.base.base, 0);
+
+ switch (dp->program.Spacing) {
+ case GL_EQUAL:
+ prog_data.partitioning = integer;
+ break;
+ case GL_FRACTIONAL_ODD:
+ prog_data.partitioning = odd_fractional;
+ break;
+ case GL_FRACTIONAL_EVEN:
+ prog_data.partitioning = even_fractional;
+ break;
+ default:
+ unreachable("invalid domain shader spacing");
+ }
+
+ switch (dp->program.PrimitiveMode) {
+ case GL_QUADS:
+ prog_data.domain = quad;
+ break;
+ case GL_TRIANGLES:
+ prog_data.domain = tri;
+ break;
+ case GL_ISOLINES:
+ prog_data.domain = isoline;
+ break;
+ default:
+ unreachable("invalid domain shader primitive mode");
+ }
+
+ if (dp->program.PointMode) {
+ prog_data.output_topology = point;
+ } else if (dp->program.PrimitiveMode == GL_ISOLINES) {
+ prog_data.output_topology = line;
+ } else {
+ /* Hardware winding order is backwards from OpenGL */
+ switch (dp->program.VertexOrder) {
+ case GL_CCW:
+ prog_data.output_topology = tri_cw;
+ break;
+ case GL_CW:
+ prog_data.output_topology = tri_ccw;
+ break;
+ default:
+ unreachable("invalid domain shader vertex order");
+ }
+ }
+
+ /* Allocate the references to the uniforms that will end up in the
+ * prog_data associated with the compiled program, and which will be freed
+ * by the state cache.
+ *
+ * Note: param_count needs to be num_uniform_components * 4, since we add
+ * padding around uniform values below vec4 size, so the worst case is that
+ * every uniform is a float which gets padded to the size of a vec4.
+ */
+ struct gl_shader *ds = shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
+ int param_count = nir->num_uniforms * 4;
+
+ prog_data.base.base.param =
+ rzalloc_array(NULL, const gl_constant_value *, param_count);
+ prog_data.base.base.pull_param =
+ rzalloc_array(NULL, const gl_constant_value *, param_count);
+ prog_data.base.base.image_param =
+ rzalloc_array(NULL, struct brw_image_param, ds->NumImages);
+ prog_data.base.base.nr_params = param_count;
+ prog_data.base.base.nr_image_params = ds->NumImages;
+
+ brw_nir_setup_glsl_uniforms(nir, shader_prog, &dp->program.Base,
+ &prog_data.base.base, false);
+
+ int st_index = -1;
+ if (unlikely(INTEL_DEBUG & DEBUG_SHADER_TIME))
+ st_index = brw_get_shader_time_index(brw, shader_prog, NULL, ST_DS);
+
+ void *mem_ctx = ralloc_context(NULL);
+ unsigned program_size;
+ char *error_str;
+ const unsigned *program =
+ brw_compile_ds(compiler, brw, mem_ctx, key, &prog_data, nir,
+ shader_prog, st_index, &program_size, &error_str);
+ if (program == NULL) {
+ ralloc_free(mem_ctx);
+ return false;
+ }
+
+ /* Scratch space is used for register spilling */
+ if (prog_data.base.base.total_scratch) {
+ brw_get_scratch_bo(brw, &stage_state->scratch_bo,
+ prog_data.base.base.total_scratch *
+ brw->max_ds_threads);
+ }
+
+ brw_upload_cache(&brw->cache, BRW_CACHE_DS_PROG,
+ key, sizeof(*key),
+ program, program_size,
+ &prog_data, sizeof(prog_data),
+ &stage_state->prog_offset, &brw->ds.prog_data);
+ ralloc_free(mem_ctx);
+
+ return true;
+}
+
+
+void
+brw_upload_ds_prog(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->ctx;
+ struct gl_shader_program **current = ctx->_Shader->CurrentProgram;
+ struct brw_stage_state *stage_state = &brw->ds.base;
+ struct brw_ds_prog_key key;
+ /* BRW_NEW_TESS_EVAL_PROGRAM */
+ struct brw_tess_eval_program *dp =
+ (struct brw_tess_eval_program *) brw->tess_eval_program;
+
+ if (!brw_state_dirty(brw,
+ _NEW_TEXTURE,
+ BRW_NEW_TESS_EVAL_PROGRAM))
+ return;
+
+ if (dp == NULL) {
+ /* Other state atoms had better not try to access prog_data, since
+ * there's no DS program.
+ */
+ brw->ds.prog_data = NULL;
+ brw->ds.base.prog_data = NULL;
+
+ return;
+ }
+
+ struct gl_program *prog = &dp->program.Base;
+
+ memset(&key, 0, sizeof(key));
+
+ key.program_string_id = dp->id;
+
+ /* _NEW_TEXTURE */
+ brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
+ &key.tex);
+
+ if (!brw_search_cache(&brw->cache, BRW_CACHE_DS_PROG,
+ &key, sizeof(key),
+ &stage_state->prog_offset, &brw->ds.prog_data)) {
+ bool success = brw_codegen_ds_prog(brw, current[MESA_SHADER_TESS_EVAL],
+ dp, &key);
+ assert(success);
+ (void)success;
+ }
+ brw->ds.base.prog_data = &brw->ds.prog_data->base.base;
+}
+
+
+bool
+brw_ds_precompile(struct gl_context *ctx,
+ struct gl_shader_program *shader_prog,
+ struct gl_program *prog)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_ds_prog_key key;
+ uint32_t old_prog_offset = brw->ds.base.prog_offset;
+ struct brw_ds_prog_data *old_prog_data = brw->ds.prog_data;
+ bool success;
+
+ struct gl_tess_eval_program *dp = (struct gl_tess_eval_program *)prog;
+ struct brw_tess_eval_program *bdp = brw_tess_eval_program(dp);
+
+ memset(&key, 0, sizeof(key));
+
+ key.program_string_id = bdp->id;
+
+ success = brw_codegen_ds_prog(brw, shader_prog, bdp, &key);
+
+ brw->ds.base.prog_offset = old_prog_offset;
+ brw->ds.prog_data = old_prog_data;
+
+ return success;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_ds.h b/src/mesa/drivers/dri/i965/brw_ds.h
new file mode 100644
index 0000000000..00c3cdfd03
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_ds.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef BRW_DS_H
+#define BRW_DS_H
+
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct brw_context;
+struct gl_shader_program;
+
+void brw_upload_ds_prog(struct brw_context *brw);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* BRW_DS_H */
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 0ac1ad9378..9673e06a56 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -206,6 +206,14 @@ void brw_set_sampler_message(struct brw_codegen *p,
unsigned simd_mode,
unsigned return_format);
+void brw_set_message_descriptor(struct brw_codegen *p,
+ brw_inst *inst,
+ enum brw_message_target sfid,
+ unsigned msg_length,
+ unsigned response_length,
+ bool header_present,
+ bool end_of_thread);
+
void brw_set_dp_read_message(struct brw_codegen *p,
brw_inst *insn,
unsigned binding_table_index,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index a6fbb54291..84c9abad91 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -534,7 +534,7 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
* \b before filling out any message-specific data. Callers can
* choose not to fill in irrelevant bits; they will be zero.
*/
-static void
+void
brw_set_message_descriptor(struct brw_codegen *p,
brw_inst *inst,
enum brw_message_target sfid,
@@ -2875,6 +2875,7 @@ brw_untyped_atomic(struct brw_codegen *p,
brw_surface_payload_size(p, response_expected,
devinfo->gen >= 8 || devinfo->is_haswell, true),
align1);
+ brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_4);
brw_set_dp_untyped_atomic_message(
p, insn, atomic_op, response_expected);
diff --git a/src/mesa/drivers/dri/i965/brw_hs.c b/src/mesa/drivers/dri/i965/brw_hs.c
new file mode 100644
index 0000000000..30f69de951
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_hs.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file brw_vec4_hs.c
+ *
+ * State atom for client-programmable tessellation control shaders,
+ * and support code.
+ */
+
+#include "brw_context.h"
+#include "brw_hs.h"
+#include "brw_nir.h"
+#include "brw_vec4_hs_visitor.h"
+#include "brw_state.h"
+
+static bool
+brw_codegen_hs_prog(struct brw_context *brw,
+ struct gl_shader_program *shader_prog,
+ struct brw_tess_ctrl_program *hp,
+ struct brw_hs_prog_key *key)
+{
+ const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
+ struct brw_stage_state *stage_state = &brw->hs.base;
+ nir_shader *nir = hp->program.Base.nir;
+ struct brw_hs_compile c;
+ memset(&c, 0, sizeof(c));
+ c.key = *key;
+ c.hp = hp;
+
+ const int patch_vertices_out = hp->program.VerticesOut;
+ c.prog_data.instances = patch_vertices_out;
+ c.prog_data.uses_barrier_function = false; // XXX: gp->program.UsesBarrier;
+
+ /* Allocate the references to the uniforms that will end up in the
+ * prog_data associated with the compiled program, and which will be freed
+ * by the state cache.
+ *
+ * Note: param_count needs to be num_uniform_components * 4, since we add
+ * padding around uniform values below vec4 size, so the worst case is that
+ * every uniform is a float which gets padded to the size of a vec4.
+ */
+ struct gl_shader *hs = shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
+ int param_count = nir->num_uniforms * 4;
+
+ c.prog_data.base.base.param =
+ rzalloc_array(NULL, const gl_constant_value *, param_count);
+ c.prog_data.base.base.pull_param =
+ rzalloc_array(NULL, const gl_constant_value *, param_count);
+ c.prog_data.base.base.image_param =
+ rzalloc_array(NULL, struct brw_image_param, hs->NumImages);
+ c.prog_data.base.base.nr_params = param_count;
+ c.prog_data.base.base.nr_image_params = hs->NumImages;
+
+ brw_nir_setup_glsl_uniforms(nir, shader_prog, &hp->program.Base,
+ &c.prog_data.base.base, false);
+
+ brw_compute_tess_vue_map(&c.prog_data.base.vue_map,
+ nir->info.outputs_written,
+ nir->info.patch_outputs_written);
+
+ /* Compute URB entry size. The maximum allowed URB entry size is 32k.
+ * That divides up as follows:
+ *
+ * 32 bytes for the patch header (tessellation factors)
+ * 480 bytes for per-patch varyings (a varying component is 4 bytes and
+ * gl_MaxTessPatchComponents = 120)
+ * 16384 bytes for per-vertex varyings (a varying component is 4 bytes,
+ * gl_MaxPatchVertices = 32 and
+ * gl_MaxTessControlOutputComponents = 128)
+ *
+ * 15808 bytes left for varying packing overhead
+ */
+ const int num_per_patch_slots = c.prog_data.base.vue_map.num_per_patch_slots;
+ const int num_per_vertex_slots = c.prog_data.base.vue_map.num_per_vertex_slots;
+ unsigned output_size_bytes = 0;
+ /* Note that the patch header is counted in num_per_patch_slots. */
+ output_size_bytes += num_per_patch_slots * 16;
+ output_size_bytes += patch_vertices_out * num_per_vertex_slots * 16;
+
+ assert(output_size_bytes >= 1);
+ if (output_size_bytes > GEN7_MAX_HS_URB_ENTRY_SIZE_BYTES)
+ return false;
+
+ /* URB entry sizes are stored as a multiple of 64 bytes. */
+ c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
+
+ brw_compute_vue_map(devinfo, &c.input_vue_map,
+ nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID,
+ true);
+
+ /* HS does not use the usual payload pushing from URB to GRFs,
+ * because we don't have enough registers for a full-size payload, and
+ * the hardware is broken on Haswell anyway. */
+ c.prog_data.base.urb_read_length = 0;
+
+ if (unlikely(INTEL_DEBUG & DEBUG_HS)) {
+ brw_dump_ir("tessellation control", shader_prog, hs, NULL);
+
+ fprintf(stderr, "TCS Input ");
+ brw_print_vue_map(stderr, &c.input_vue_map);
+ fprintf(stderr, "TCS Output ");
+ brw_print_vue_map(stderr, &c.prog_data.base.vue_map);
+ }
+
+ int st_index = -1;
+ if (unlikely(INTEL_DEBUG & DEBUG_SHADER_TIME))
+ st_index = brw_get_shader_time_index(brw, shader_prog, NULL, ST_HS);
+
+ void *mem_ctx = ralloc_context(NULL);
+ unsigned program_size;
+ const unsigned *program =
+ brw_hs_emit(brw, shader_prog, &c, mem_ctx, st_index, &program_size);
+ if (program == NULL) {
+ ralloc_free(mem_ctx);
+ return false;
+ }
+
+ /* Scratch space is used for register spilling */
+ if (c.prog_data.base.base.total_scratch) {
+ brw_get_scratch_bo(brw, &stage_state->scratch_bo,
+ c.prog_data.base.base.total_scratch *
+ brw->max_hs_threads);
+ }
+
+ brw_upload_cache(&brw->cache, BRW_CACHE_HS_PROG,
+ &c.key, sizeof(c.key),
+ program, program_size,
+ &c.prog_data, sizeof(c.prog_data),
+ &stage_state->prog_offset, &brw->hs.prog_data);
+ ralloc_free(mem_ctx);
+
+ return true;
+}
+
+
+void
+brw_upload_hs_prog(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->ctx;
+ struct gl_shader_program **current = ctx->_Shader->CurrentProgram;
+ struct brw_stage_state *stage_state = &brw->hs.base;
+ struct brw_hs_prog_key key;
+ /* BRW_NEW_TESS_CTRL_PROGRAM */
+ struct brw_tess_ctrl_program *hp =
+ (struct brw_tess_ctrl_program *) brw->tess_ctrl_program;
+
+ if (!brw_state_dirty(brw,
+ _NEW_TEXTURE,
+ BRW_NEW_TESS_CTRL_PROGRAM |
+ BRW_NEW_TESS_EVAL_PROGRAM))
+ return;
+
+ if (hp == NULL) {
+ /* Other state atoms had better not try to access prog_data, since
+ * there's no HS program.
+ */
+ brw->hs.prog_data = NULL;
+ brw->hs.base.prog_data = NULL;
+
+ return;
+ }
+
+ struct gl_program *prog = &hp->program.Base;
+
+ memset(&key, 0, sizeof(key));
+
+ key.program_string_id = hp->id;
+
+ /* _NEW_TEXTURE */
+ brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
+ &key.tex);
+
+ /* BRW_NEW_TESS_EVAL_PROGRAM */
+ /* We need to specialize our code generation for tessellation levels
+ * based on the domain the DS is expecting to tessellate.
+ */
+ struct brw_tess_eval_program *dp =
+ (struct brw_tess_eval_program *) brw->tess_eval_program;
+ assert(dp);
+ key.ds_primitive_mode = dp->program.PrimitiveMode;
+
+ if (!brw_search_cache(&brw->cache, BRW_CACHE_HS_PROG,
+ &key, sizeof(key),
+ &stage_state->prog_offset, &brw->hs.prog_data)) {
+ bool success = brw_codegen_hs_prog(brw, current[MESA_SHADER_TESS_CTRL],
+ hp, &key);
+ assert(success);
+ (void)success;
+ }
+ brw->hs.base.prog_data = &brw->hs.prog_data->base.base;
+}
+
+
+bool
+brw_hs_precompile(struct gl_context *ctx,
+ struct gl_shader_program *shader_prog,
+ struct gl_program *prog)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_hs_prog_key key;
+ uint32_t old_prog_offset = brw->hs.base.prog_offset;
+ struct brw_hs_prog_data *old_prog_data = brw->hs.prog_data;
+ bool success;
+
+ struct gl_tess_ctrl_program *hp = (struct gl_tess_ctrl_program *)prog;
+ struct brw_tess_ctrl_program *bhp = brw_tess_ctrl_program(hp);
+
+ memset(&key, 0, sizeof(key));
+
+ key.program_string_id = bhp->id;
+
+ key.ds_primitive_mode = GL_TRIANGLES;
+
+ success = brw_codegen_hs_prog(brw, shader_prog, bhp, &key);
+
+ brw->hs.base.prog_offset = old_prog_offset;
+ brw->hs.prog_data = old_prog_data;
+
+ return success;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_hs.h b/src/mesa/drivers/dri/i965/brw_hs.h
new file mode 100644
index 0000000000..e2a102429e
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_hs.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef BRW_HS_H
+#define BRW_HS_H
+
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct brw_context;
+struct gl_shader_program;
+
+void brw_upload_hs_prog(struct brw_context *brw);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* BRW_HS_H */
diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp
index fc9bee43d8..392953b668 100644
--- a/src/mesa/drivers/dri/i965/brw_link.cpp
+++ b/src/mesa/drivers/dri/i965/brw_link.cpp
@@ -42,6 +42,8 @@ brw_shader_precompile(struct gl_context *ctx,
struct gl_shader_program *sh_prog)
{
struct gl_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX];
+ struct gl_shader *hs = sh_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
+ struct gl_shader *ds = sh_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
struct gl_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
struct gl_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
struct gl_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE];
@@ -52,6 +54,12 @@ brw_shader_precompile(struct gl_context *ctx,
if (gs && !brw_gs_precompile(ctx, sh_prog, gs->Program))
return false;
+ if (ds && !brw_ds_precompile(ctx, sh_prog, ds->Program))
+ return false;
+
+ if (hs && !brw_hs_precompile(ctx, sh_prog, hs->Program))
+ return false;
+
if (vs && !brw_vs_precompile(ctx, sh_prog, vs->Program))
return false;
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index dece208233..d0fc18a7c7 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -55,6 +55,57 @@ remap_vs_attrs(nir_block *block, void *closure)
return true;
}
+static bool
+remap_inputs_with_vue_map(nir_block *block, void *closure)
+{
+ const struct brw_vue_map *vue_map = closure;
+
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ if (intrin->intrinsic == nir_intrinsic_load_input ||
+ intrin->intrinsic == nir_intrinsic_load_input_indirect ||
+ intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
+ intrin->intrinsic == nir_intrinsic_load_per_vertex_input_indirect) {
+ int vue_slot = vue_map->varying_to_slot[intrin->const_index[0]];
+ assert(vue_slot != -1);
+ intrin->const_index[0] = vue_slot;
+ }
+ }
+ return true;
+}
+
+static bool
+remap_outputs_with_vue_map(nir_block *block, void *closure)
+{
+ const struct brw_vue_map *vue_map = closure;
+
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ if (intrin->intrinsic == nir_intrinsic_load_output ||
+ intrin->intrinsic == nir_intrinsic_load_output_indirect ||
+ intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
+ intrin->intrinsic == nir_intrinsic_load_per_vertex_output_indirect ||
+ intrin->intrinsic == nir_intrinsic_store_output ||
+ intrin->intrinsic == nir_intrinsic_store_output_indirect ||
+ intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
+ intrin->intrinsic == nir_intrinsic_store_per_vertex_output_indirect) {
+ int vue_slot = vue_map->varying_to_slot[intrin->const_index[0]];
+ assert(vue_slot != -1);
+ intrin->const_index[0] = vue_slot;
+ }
+ }
+ return true;
+}
+
+
static void
brw_nir_lower_inputs(const struct brw_device_info *devinfo,
nir_shader *nir, bool is_scalar)
@@ -91,8 +142,9 @@ brw_nir_lower_inputs(const struct brw_device_info *devinfo,
}
}
break;
+ case MESA_SHADER_TESS_CTRL:
case MESA_SHADER_GEOMETRY: {
- if (!is_scalar) {
+ if (!is_scalar && nir->stage == MESA_SHADER_GEOMETRY) {
foreach_list_typed(nir_variable, var, node, &nir->inputs) {
var->data.driver_location = var->data.location;
}
@@ -110,21 +162,43 @@ brw_nir_lower_inputs(const struct brw_device_info *devinfo,
* However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
* written by previous stages and shows up via payload magic.
*/
- struct brw_vue_map input_vue_map;
+ struct brw_vue_map vue_map;
GLbitfield64 inputs_read =
nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID;
- brw_compute_vue_map(devinfo, &input_vue_map, inputs_read,
- nir->info.separate_shader);
+ brw_compute_vue_map(devinfo, &vue_map, inputs_read,
+ nir->info.separate_shader || nir->stage == MESA_SHADER_TESS_CTRL);
- /* Start with the slot for the variable's base. */
foreach_list_typed(nir_variable, var, node, &nir->inputs) {
- assert(input_vue_map.varying_to_slot[var->data.location] != -1);
- var->data.driver_location =
- input_vue_map.varying_to_slot[var->data.location];
+ var->data.driver_location = var->data.location;
}
/* Inputs are stored in vec4 slots, so use type_size_vec4(). */
nir_lower_io(nir, nir_var_shader_in, type_size_vec4);
+
+ nir_foreach_overload(nir, overload) {
+ if (overload->impl) {
+ nir_foreach_block(overload->impl, remap_inputs_with_vue_map, &vue_map);
+ }
+ }
+ }
+ break;
+ }
+ case MESA_SHADER_TESS_EVAL: {
+ struct brw_vue_map vue_map;
+ brw_compute_tess_vue_map(&vue_map,
+ nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID,
+ nir->info.patch_inputs_read);
+
+ foreach_list_typed(nir_variable, var, node, &nir->inputs) {
+ var->data.driver_location = var->data.location;
+ }
+
+ nir_lower_io(nir, nir_var_shader_in, type_size_vec4);
+
+ nir_foreach_overload(nir, overload) {
+ if (overload->impl) {
+ nir_foreach_block(overload->impl, remap_inputs_with_vue_map, &vue_map);
+ }
}
break;
}
@@ -143,11 +217,13 @@ brw_nir_lower_inputs(const struct brw_device_info *devinfo,
}
static void
-brw_nir_lower_outputs(nir_shader *nir, bool is_scalar)
+brw_nir_lower_outputs(const struct brw_device_info *devinfo,
+ nir_shader *nir, bool is_scalar)
{
switch (nir->stage) {
case MESA_SHADER_VERTEX:
case MESA_SHADER_GEOMETRY:
+ case MESA_SHADER_TESS_EVAL:
if (is_scalar) {
nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
type_size_vec4_times_4);
@@ -157,6 +233,24 @@ brw_nir_lower_outputs(nir_shader *nir, bool is_scalar)
var->data.driver_location = var->data.location;
}
break;
+ case MESA_SHADER_TESS_CTRL: {
+ struct brw_vue_map vue_map;
+ brw_compute_tess_vue_map(&vue_map, nir->info.outputs_written,
+ nir->info.patch_outputs_written);
+
+ nir_foreach_variable(var, &nir->outputs) {
+ var->data.driver_location = var->data.location;
+ }
+
+ nir_lower_io(nir, nir_var_shader_out, type_size_vec4);
+
+ nir_foreach_overload(nir, overload) {
+ if (overload->impl) {
+ nir_foreach_block(overload->impl, remap_outputs_with_vue_map, &vue_map);
+ }
+ }
+ break;
+ }
case MESA_SHADER_FRAGMENT:
nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
type_size_scalar);
@@ -265,8 +359,19 @@ brw_create_nir(struct brw_context *brw,
/* Get rid of split copies */
nir_optimize(nir, is_scalar);
+ if (stage == MESA_SHADER_TESS_EVAL &&
+ shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]) {
+ const struct gl_program *tcs =
+ shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]->Program;
+ /* Work around the TCS having bonus outputs used as shared memory
+ * segments, which makes OutputsWritten not match InputsRead
+ */
+ nir->info.inputs_read = tcs->OutputsWritten;
+ nir->info.patch_inputs_read = tcs->PatchOutputsWritten;
+ }
+
brw_nir_lower_inputs(devinfo, nir, is_scalar);
- brw_nir_lower_outputs(nir, is_scalar);
+ brw_nir_lower_outputs(devinfo, nir, is_scalar);
nir_assign_var_locations(&nir->uniforms,
&nir->num_uniforms,
is_scalar ? type_size_scalar : type_size_vec4);
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index 083c46a372..7d4c3604cf 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -85,6 +85,7 @@ struct brw_device_info;
#define BRW_SWIZZLE_YZXW BRW_SWIZZLE4(1,2,0,3)
#define BRW_SWIZZLE_ZXYW BRW_SWIZZLE4(2,0,1,3)
#define BRW_SWIZZLE_ZWZW BRW_SWIZZLE4(2,3,2,3)
+#define BRW_SWIZZLE_WZYX BRW_SWIZZLE4(3,2,1,0)
static inline bool
brw_is_single_value_swizzle(unsigned swiz)
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 4ea297ade4..d1589c34d2 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -25,6 +25,8 @@
#include "brw_context.h"
#include "brw_vs.h"
#include "brw_gs.h"
+#include "brw_hs.h"
+#include "brw_ds.h"
#include "brw_fs.h"
#include "brw_cfg.h"
#include "brw_nir.h"
@@ -150,6 +152,11 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
compiler->glsl_compiler_options[i].NirOptions = nir_options;
}
+ compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].EmitNoIndirectInput = false;
+ compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].EmitNoIndirectOutput = false;
+ compiler->glsl_compiler_options[MESA_SHADER_TESS_EVAL].EmitNoIndirectInput = false;
+ compiler->glsl_compiler_options[MESA_SHADER_TESS_EVAL].EmitNoIndirectOutput = false;
+
return compiler;
}
@@ -546,6 +553,28 @@ brw_instruction_name(enum opcode op)
return "barrier";
case SHADER_OPCODE_MULH:
return "mulh";
+
+ case HS_OPCODE_GET_INSTANCE_ID:
+ return "hs_get_instance_id";
+ case HS_OPCODE_URB_WRITE:
+ return "hs_urb_write";
+ case VEC4_OPCODE_URB_READ:
+ return "urb_read";
+ case HS_OPCODE_SET_INPUT_URB_OFFSETS:
+ return "hs_set_input_urb_offsets";
+ case HS_OPCODE_SET_OUTPUT_URB_OFFSETS:
+ return "hs_set_output_urb_offsets";
+ case DS_OPCODE_CREATE_INPUT_READ_HEADER:
+ return "ds_create_input_read_header";
+ case DS_OPCODE_ADD_INDIRECT_URB_OFFSET:
+ return "ds_add_indirect_urb_offset";
+
+ case DS_OPCODE_GET_PRIMITIVE_ID:
+ return "ds_get_primitive_id";
+ case HS_OPCODE_GET_PRIMITIVE_ID:
+ return "hs_get_primitive_id";
+ case HS_OPCODE_CREATE_BARRIER_HEADER:
+ return "hs_create_barrier_header";
}
unreachable("not reached");
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 29baebf0cc..2575fcff97 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -265,6 +265,12 @@ brw_assign_common_binding_table_offsets(gl_shader_stage stage,
bool brw_vs_precompile(struct gl_context *ctx,
struct gl_shader_program *shader_prog,
struct gl_program *prog);
+bool brw_hs_precompile(struct gl_context *ctx,
+ struct gl_shader_program *shader_prog,
+ struct gl_program *prog);
+bool brw_ds_precompile(struct gl_context *ctx,
+ struct gl_shader_program *shader_prog,
+ struct gl_program *prog);
bool brw_gs_precompile(struct gl_context *ctx,
struct gl_shader_program *shader_prog,
struct gl_program *prog);
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 194cfdea6b..48df6b7976 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -143,6 +143,7 @@ extern const struct brw_tracked_state gen7_depthbuffer;
extern const struct brw_tracked_state gen7_clip_state;
extern const struct brw_tracked_state gen7_ds_push_constants;
extern const struct brw_tracked_state gen7_ds_state;
+extern const struct brw_tracked_state gen7_gs_push_constants;
extern const struct brw_tracked_state gen7_gs_state;
extern const struct brw_tracked_state gen7_hs_push_constants;
extern const struct brw_tracked_state gen7_hs_state;
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index f7c0a2037d..427466ce13 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -51,6 +51,8 @@
#include "brw_wm.h"
#include "brw_gs.h"
#include "brw_cs.h"
+#include "brw_ds.h"
+#include "brw_hs.h"
#define FILE_DEBUG_FLAG DEBUG_STATE
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 7272eb8974..50907a550d 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -37,6 +37,8 @@
#include "intel_batchbuffer.h"
#include "intel_buffers.h"
#include "brw_vs.h"
+#include "brw_hs.h"
+#include "brw_ds.h"
#include "brw_ff_gs.h"
#include "brw_gs.h"
#include "brw_wm.h"
@@ -348,6 +350,9 @@ static const struct brw_tracked_state *gen8_render_atoms[] =
&gen7_te_state,
&gen8_ds_state,
&gen8_vs_state,
+ &gen8_hs_state,
+ &gen7_te_state,
+ &gen8_ds_state,
&gen8_gs_state,
&gen8_sol_state,
&gen6_clip_state,
@@ -691,6 +696,11 @@ brw_upload_programs(struct brw_context *brw,
if (pipeline == BRW_RENDER_PIPELINE) {
brw_upload_vs_prog(brw);
+ if (brw->gen >= 7) {
+ brw_upload_hs_prog(brw);
+ brw_upload_ds_prog(brw);
+ }
+
if (brw->gen < 6)
brw_upload_ff_gs_prog(brw);
else
@@ -703,6 +713,8 @@ brw_upload_programs(struct brw_context *brw,
bool old_separate = brw->vue_map_geom_out.separate;
if (brw->geometry_program)
brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
+ else if (brw->tess_eval_program)
+ brw->vue_map_geom_out = brw->ds.prog_data->base.vue_map;
else
brw->vue_map_geom_out = brw->vs.prog_data->base.vue_map;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index eae9f3c8cb..e52114af49 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -235,6 +235,9 @@ vec4_instruction::is_send_from_grf()
case SHADER_OPCODE_TYPED_ATOMIC:
case SHADER_OPCODE_TYPED_SURFACE_READ:
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
+ case VEC4_OPCODE_URB_READ:
+ case HS_OPCODE_URB_WRITE:
+ case SHADER_OPCODE_BARRIER:
return true;
default:
return false;
@@ -264,7 +267,10 @@ bool
vec4_instruction::has_source_and_destination_hazard() const
{
switch (opcode) {
- /* Most opcodes in the vec4 world use MRFs. */
+ case HS_OPCODE_SET_INPUT_URB_OFFSETS:
+ case HS_OPCODE_SET_OUTPUT_URB_OFFSETS:
+ case DS_OPCODE_ADD_INDIRECT_URB_OFFSET:
+ return true;
default:
return false;
}
@@ -284,6 +290,7 @@ vec4_instruction::regs_read(unsigned arg) const
case SHADER_OPCODE_TYPED_ATOMIC:
case SHADER_OPCODE_TYPED_SURFACE_READ:
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
+ case HS_OPCODE_URB_WRITE:
return arg == 0 ? mlen : 1;
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
@@ -361,6 +368,8 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
return 0;
case GS_OPCODE_FF_SYNC:
return 1;
+ case HS_OPCODE_URB_WRITE:
+ return 0;
case SHADER_OPCODE_SHADER_TIME_ADD:
return 0;
case SHADER_OPCODE_TEX:
@@ -2028,7 +2037,7 @@ vec4_visitor::run()
}
}
- opt_schedule_instructions();
+ //opt_schedule_instructions();
opt_set_dependency_control();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index fb8fbbdc93..b620ceec0b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -84,7 +84,8 @@ public:
const nir_shader *shader,
void *mem_ctx,
bool no_spills,
- int shader_time_index);
+ int shader_time_index,
+ const struct brw_vue_map *const input_vue_map);
virtual ~vec4_visitor();
dst_reg dst_null_f()
@@ -358,9 +359,14 @@ public:
unsigned num_components = 4);
src_reg get_nir_src(nir_src src,
unsigned num_components = 4);
+ src_reg get_nir_src_imm1(nir_src src, enum brw_reg_type type);
src_reg get_nir_indirect_src(nir_intrinsic_instr *instr);
src_reg get_nir_vertex_index_src(nir_intrinsic_instr *instr);
+ void get_patch_urb_offsets(nir_intrinsic_instr *instr,
+ unsigned num_per_vertex_slots,
+ unsigned *imm_offset,
+ src_reg *indirect_offset);
virtual dst_reg *make_reg_for_system_value(int location,
const glsl_type *type) = 0;
@@ -384,6 +390,8 @@ protected:
virtual void gs_emit_vertex(int stream_id);
virtual void gs_end_primitive();
+ const struct brw_vue_map *const input_vue_map;
+
private:
/**
* If true, then register allocation should fail instead of spilling.
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
index 5a277f74c4..904800eed9 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
@@ -75,6 +75,8 @@ is_expression(const vec4_instruction *const inst)
case VEC4_OPCODE_UNPACK_UNIFORM:
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
case SHADER_OPCODE_BROADCAST:
+ case HS_OPCODE_SET_INPUT_URB_OFFSETS:
+ case HS_OPCODE_SET_OUTPUT_URB_OFFSETS:
return true;
case SHADER_OPCODE_RCP:
case SHADER_OPCODE_RSQ:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
index 284e0a8d0a..e1cc219388 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
@@ -45,6 +45,11 @@ can_do_writemask(const struct brw_device_info *devinfo,
case VS_OPCODE_PULL_CONSTANT_LOAD:
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
+ case HS_OPCODE_SET_INPUT_URB_OFFSETS:
+ case HS_OPCODE_SET_OUTPUT_URB_OFFSETS:
+ case DS_OPCODE_CREATE_INPUT_READ_HEADER:
+ case DS_OPCODE_ADD_INDIRECT_URB_OFFSET:
+ case VEC4_OPCODE_URB_READ:
return false;
default:
/* The MATH instruction on Gen6 only executes in align1 mode, which does
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_ds_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_ds_visitor.cpp
new file mode 100644
index 0000000000..92b2be965b
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_ds_visitor.cpp
@@ -0,0 +1,295 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file brw_vec4_ds_visitor.cpp
+ *
+ * Tessellaton evaluation shader specific code derived from the vec4_visitor class.
+ */
+
+#include "brw_vec4_ds_visitor.h"
+
+namespace brw {
+
+vec4_ds_visitor::vec4_ds_visitor(const struct brw_compiler *compiler,
+ void *log_data,
+ const struct brw_ds_prog_key *key,
+ struct brw_ds_prog_data *prog_data,
+ struct brw_vue_map *input_vue_map,
+ const nir_shader *shader,
+ void *mem_ctx,
+ int shader_time_index)
+ : vec4_visitor(compiler, log_data, &key->tex, &prog_data->base,
+ shader, mem_ctx, false, shader_time_index,
+ input_vue_map)
+{
+}
+
+
+dst_reg *
+vec4_ds_visitor::make_reg_for_system_value(int location, const glsl_type *type)
+{
+ dst_reg *reg = new(mem_ctx) dst_reg(this, type);
+
+ switch (location) {
+ case SYSTEM_VALUE_TESS_COORD:
+ break;
+ case SYSTEM_VALUE_PRIMITIVE_ID:
+ break;
+ default:
+ assert(!"not reached");
+ break;
+ }
+
+ return reg;
+}
+
+void
+vec4_ds_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
+{
+ const struct brw_ds_prog_data *ds_prog_data =
+ (const struct brw_ds_prog_data *) prog_data;
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_tess_level_outer: {
+ dst_reg dst(this, glsl_type::vec4_type);
+ nir_system_values[SYSTEM_VALUE_TESS_LEVEL_OUTER] = dst;
+
+ dst_reg temp(this, glsl_type::vec4_type);
+ vec4_instruction *read =
+ emit(VEC4_OPCODE_URB_READ, temp, input_read_header);
+ read->offset = 1;
+ read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
+ emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WZYX)));
+ break;
+ }
+ case nir_intrinsic_load_tess_level_inner: {
+ dst_reg dst(this, glsl_type::vec2_type);
+ nir_system_values[SYSTEM_VALUE_TESS_LEVEL_INNER] = dst;
+
+ /* Set up the message header to reference the proper parts of the URB */
+ dst_reg temp(this, glsl_type::vec4_type);
+ vec4_instruction *read =
+ emit(VEC4_OPCODE_URB_READ, temp, input_read_header);
+ read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
+ if (ds_prog_data->domain == brw_ds_prog_data::quad) {
+ emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WZYX)));
+ } else {
+ read->offset = 1;
+ emit(MOV(dst, src_reg(temp)));
+ }
+ break;
+ }
+ default:
+ vec4_visitor::nir_setup_system_value_intrinsic(instr);
+ }
+}
+
+
+void
+vec4_ds_visitor::setup_payload()
+{
+ int reg = 0;
+
+ /* The payload always contains important data in r0 and r1, which contains
+ * the URB handles that are passed on to the URB write at the end
+ * of the thread.
+ */
+ reg += 2;
+
+ reg = setup_uniforms(reg);
+
+ this->first_non_payload_grf = reg;
+}
+
+
+void
+vec4_ds_visitor::emit_prolog()
+{
+ input_read_header = src_reg(this, glsl_type::uvec4_type);
+ emit(DS_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header));
+
+ this->current_annotation = NULL;
+}
+
+
+void
+vec4_ds_visitor::emit_urb_write_header(int mrf)
+{
+ /* No need to do anything for DS; an implied write to this MRF will be
+ * performed by VS_OPCODE_URB_WRITE.
+ */
+ (void) mrf;
+}
+
+
+vec4_instruction *
+vec4_ds_visitor::emit_urb_write_opcode(bool complete)
+{
+ /* For DS, the URB writes end the thread. */
+ if (complete) {
+ if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ emit_shader_time_end();
+ }
+
+ vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
+ inst->urb_write_flags = complete ?
+ BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS;
+
+ return inst;
+}
+
+void
+vec4_ds_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
+{
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_tess_coord:
+ /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
+ emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
+ src_reg(brw_vec8_grf(1, 0))));
+ break;
+ case nir_intrinsic_load_primitive_id:
+ emit(DS_OPCODE_GET_PRIMITIVE_ID,
+ get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
+ break;
+
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_load_input_indirect:
+ case nir_intrinsic_load_per_vertex_input:
+ case nir_intrinsic_load_per_vertex_input_indirect: {
+ src_reg indirect_offset;
+ unsigned imm_offset;
+
+ get_patch_urb_offsets(instr, input_vue_map->num_per_vertex_slots,
+ &imm_offset, &indirect_offset);
+
+ src_reg header = input_read_header;
+
+ if (indirect_offset.file != BAD_FILE) {
+ header = src_reg(this, glsl_type::uvec4_type);
+ emit(DS_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
+ input_read_header, indirect_offset);
+ }
+
+ dst_reg temp(this, glsl_type::ivec4_type);
+ vec4_instruction *read =
+ emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
+ read->offset = imm_offset;
+ read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
+
+ /* Copy to target. We might end up with some funky writemasks landing
+ * in here, but we really don't want them in the above pseudo-ops.
+ */
+ dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
+ dst.writemask = brw_writemask_for_size(instr->num_components);
+ emit(MOV(dst, src_reg(temp)));
+ break;
+ }
+ default:
+ vec4_visitor::nir_emit_intrinsic(instr);
+ }
+}
+
+
+void
+vec4_ds_visitor::emit_thread_end()
+{
+ /* For DS, we always end the thread by emitting a single vertex.
+ * emit_urb_write_opcode() will take care of setting the eot flag on the
+ * SEND instruction.
+ */
+ emit_vertex();
+}
+
+
+extern "C" const unsigned *
+brw_compile_ds(const struct brw_compiler *compiler,
+ void *log_data,
+ void *mem_ctx,
+ const struct brw_ds_prog_key *key,
+ struct brw_ds_prog_data *prog_data,
+ const nir_shader *nir,
+ struct gl_shader_program *shader_prog,
+ int shader_time_index,
+ unsigned *final_assembly_size,
+ char **error_str)
+{
+ const struct brw_device_info *devinfo = compiler->devinfo;
+ struct gl_shader *shader =
+ shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
+
+ struct brw_ds_compile c;
+ memset(&c, 0, sizeof(c));
+ c.key = *key;
+
+ brw_compute_vue_map(devinfo, &prog_data->base.vue_map,
+ nir->info.outputs_written,
+ shader_prog->SeparateShader);
+
+ unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4;
+
+ assert(output_size_bytes >= 1);
+ if (output_size_bytes > GEN7_MAX_DS_URB_ENTRY_SIZE_BYTES) {
+ if (error_str)
+ *error_str = ralloc_strdup(mem_ctx, "DS outputs exceed maximum size");
+ return NULL;
+ }
+
+ /* URB entry sizes are stored as a multiple of 64 bytes. */
+ prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
+
+ struct brw_vue_map input_vue_map;
+ brw_compute_tess_vue_map(&input_vue_map,
+ nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID,
+ nir->info.patch_inputs_read);
+
+ /* We always have our DS pull from the patch URB entry via messages. */
+ prog_data->base.urb_read_length = 0;
+
+ if (unlikely(INTEL_DEBUG & DEBUG_DS)) {
+ brw_dump_ir("tessellation evaluation", shader_prog, shader, NULL);
+
+ fprintf(stderr, "TES Input ");
+ brw_print_vue_map(stderr, &input_vue_map);
+ fprintf(stderr, "TES Output ");
+ brw_print_vue_map(stderr, &prog_data->base.vue_map);
+ }
+
+ vec4_ds_visitor v(compiler, log_data, key, prog_data, &input_vue_map,
+ nir, mem_ctx, shader_time_index);
+ if (!v.run()) {
+ if (error_str)
+ *error_str = ralloc_strdup(mem_ctx, v.fail_msg);
+ return NULL;
+ }
+
+ if (unlikely(INTEL_DEBUG & DEBUG_DS))
+ v.dump_instructions();
+
+ return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir,
+ &prog_data->base, v.cfg,
+ final_assembly_size);
+}
+
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_ds_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_ds_visitor.h
new file mode 100644
index 0000000000..5cdeedc9a2
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_ds_visitor.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file brw_vec4_ds_visitor.h
+ *
+ * Geometry-shader-specific code derived from the vec4_visitor class.
+ */
+
+#ifndef BRW_VEC4_DS_VISITOR_H
+#define BRW_VEC4_DS_VISITOR_H
+
+#include "brw_vec4.h"
+
+/**
+ * Scratch data used when compiling a GLSL geometry shader.
+ */
+struct brw_ds_compile
+{
+ struct brw_ds_prog_key key;
+ struct brw_vue_map input_vue_map;
+};
+
+#ifdef __cplusplus
+namespace brw {
+
+class vec4_ds_visitor : public vec4_visitor
+{
+public:
+ vec4_ds_visitor(const struct brw_compiler *compiler,
+ void *log_data,
+ const struct brw_ds_prog_key *key,
+ struct brw_ds_prog_data *prog_data,
+ struct brw_vue_map *input_vue_map,
+ const nir_shader *nir,
+ void *mem_ctx,
+ int shader_time_index);
+
+protected:
+ virtual dst_reg *make_reg_for_system_value(int location,
+ const glsl_type *type);
+ virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
+ virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
+
+ virtual void setup_payload();
+ virtual void emit_prolog();
+ virtual void emit_thread_end();
+
+ virtual void emit_urb_write_header(int mrf);
+ virtual vec4_instruction *emit_urb_write_opcode(bool complete);
+
+private:
+ src_reg input_read_header;
+};
+
+} /* namespace brw */
+#endif /* __cplusplus */
+
+#endif /* BRW_VEC4_DS_VISITOR_H */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 8bc21df5ff..923fb5a9e8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -319,6 +319,7 @@ generate_tex(struct brw_codegen *p,
static void
generate_vs_urb_write(struct brw_codegen *p, vec4_instruction *inst)
{
+ /* XXX: THIS IS CRAZY. We need to initialize m0.3 - the slot offsets */
brw_urb_WRITE(p,
brw_null_reg(), /* dest */
inst->base_mrf, /* starting mrf reg nr */
@@ -708,6 +709,253 @@ generate_gs_set_primitive_id(struct brw_codegen *p, struct brw_reg dst)
}
static void
+generate_hs_get_instance_id(struct brw_codegen *p, struct brw_reg dst)
+{
+ /* "Instance Count" comes as part of the payload in r0.2 bits 23:17. */
+ dst = retype(dst, BRW_REGISTER_TYPE_UD);
+ struct brw_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+
+ brw_SHR(p, get_element_ud(dst, 0),
+ get_element_ud(r0, 2),
+ brw_imm_ud(HSW_HS_PAYLOAD_INSTANCE_NUMBER_SHIFT));
+ brw_AND(p, get_element(dst, 0),
+ get_element_ud(dst, 0),
+ brw_imm_ud((1 << HSW_HS_PAYLOAD_INSTANCE_NUMBER_WIDTH) - 1));
+
+ brw_pop_insn_state(p);
+
+ /* XXX: The IVB PRM suggests that Instance ID might be 22:16 there, but
+ * all later docs imply that it's 23:17 even on IVB. Need to test.
+ */
+}
+
+static void
+generate_hs_urb_write(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg urb_header)
+{
+ const struct brw_device_info *devinfo = p->devinfo;
+
+ brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
+ brw_set_dest(p, send, brw_null_reg());
+ brw_set_src0(p, send, urb_header);
+
+ brw_set_message_descriptor(p, send, BRW_SFID_URB,
+ inst->mlen /* mlen */, 0 /* rlen */,
+ true /* header */, false /* eot */);
+ brw_inst_set_urb_opcode(devinfo, send, BRW_URB_OPCODE_WRITE_OWORD);
+ brw_inst_set_urb_global_offset(devinfo, send, inst->offset);
+ brw_inst_set_urb_per_slot_offset(devinfo, send, 1);
+
+ /* what happens to swizzles? */
+}
+
+
+static void
+generate_hs_input_urb_offsets(struct brw_codegen *p,
+ struct brw_reg dst,
+ struct brw_reg vertex,
+ struct brw_reg offset)
+{
+ /* Generates an URB read/write message header for HS/DS operation.
+ * Inputs are a vertex index, and a byte offset from the beginning of
+ * the vertex. */
+
+ /* If `vertex` is not an immediate, we clobber a0.0 */
+
+ assert(vertex.file == BRW_IMMEDIATE_VALUE || vertex.file == BRW_GENERAL_REGISTER_FILE);
+ assert(vertex.type == BRW_REGISTER_TYPE_UD || vertex.type == BRW_REGISTER_TYPE_D);
+
+ assert(dst.file == BRW_GENERAL_REGISTER_FILE);
+
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p, dst, brw_imm_ud(0));
+
+ /* m0.5 bits 8-15 are channel enables */
+ brw_MOV(p, get_element_ud(dst, 5), brw_imm_ud(0x0f00));
+
+ /* m0.0-0.1: URB handles */
+ if (vertex.file == BRW_IMMEDIATE_VALUE) {
+ uint32_t vertex_index = vertex.dw1.ud;
+ struct brw_reg index_reg = brw_vec1_grf(
+ 1 + (vertex_index >> 3), vertex_index & 7);
+
+ brw_MOV(p, get_element_ud(dst, 0), retype(index_reg, BRW_REGISTER_TYPE_UD));
+ } else {
+ /* indirect via a0.0 */
+ struct brw_reg addr = brw_address_reg(0);
+
+ /* bottom half: m0.0 = g[1.0 + vertex.0]UD */
+ brw_ADD(p, addr, get_element_ud(vertex, 0), brw_imm_uw(0x8));
+ brw_SHL(p, addr, addr, brw_imm_ud(2));
+ brw_MOV(p, get_element_ud(dst, 0), deref_1ud(brw_indirect(0, 0), 0));
+ }
+
+ /* m0.3-0.4: 128bit-granular offsets into the URB from the handles */
+ if (offset.file != BAD_FILE)
+ brw_MOV(p, get_element_ud(dst, 3), get_element_ud(offset, 0));
+
+ brw_pop_insn_state(p);
+}
+
+
+static void
+generate_hs_output_urb_offsets(struct brw_codegen *p,
+ struct brw_reg dst,
+ struct brw_reg write_mask,
+ struct brw_reg offset)
+{
+ /* Generates an URB read/write message header for HS/DS operation, for the patch URB entry. */
+ assert(dst.file == BRW_GENERAL_REGISTER_FILE || dst.file == BRW_MESSAGE_REGISTER_FILE);
+
+ assert(write_mask.file == BRW_IMMEDIATE_VALUE);
+ assert(write_mask.type == BRW_REGISTER_TYPE_UD);
+
+ brw_push_insn_state(p);
+
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p, dst, brw_imm_ud(0));
+
+ unsigned mask = write_mask.dw1.ud;
+
+ /* m0.5 bits 11:8 are channel enables */
+ brw_MOV(p, get_element_ud(dst, 5), brw_imm_ud(mask << 8));
+
+ /* HS patch URB handle is delivered in r0.0 */
+ struct brw_reg urb_handle = brw_vec1_grf(0, 0);
+
+ /* m0.0-0.1: URB handles */
+ brw_MOV(p, get_element_ud(dst, 0), retype(urb_handle, BRW_REGISTER_TYPE_UD));
+
+ /* m0.3-0.4: 128bit-granular offsets into the URB from the handles */
+ if (offset.file != BAD_FILE)
+ brw_MOV(p, get_element_ud(dst, 3), stride(offset, 0, 1, 0));
+
+ brw_pop_insn_state(p);
+}
+
+static void
+generate_ds_create_input_read_header(struct brw_codegen *p,
+ struct brw_reg dst)
+{
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+
+ /* Initialize the register to 0 */
+ brw_MOV(p, dst, brw_imm_ud(0));
+
+ /* Enable all the channels in m0.5 bits 15:8 */
+ brw_MOV(p, get_element_ud(dst, 5), brw_imm_ud(0xff00));
+
+ /* Copy g1.3 (the patch URB handle) to m0.0 and m0.1. For safety,
+ * mask out irrelevant "Reserved" bits, as they're not marked MBZ.
+ */
+ brw_AND(p, vec2(get_element_ud(dst, 0)),
+ retype(brw_vec1_grf(1, 3), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(0x1fff));
+ brw_pop_insn_state(p);
+}
+
+static void
+generate_ds_add_indirect_urb_offset(struct brw_codegen *p,
+ struct brw_reg dst,
+ struct brw_reg header,
+ struct brw_reg offset)
+{
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+
+ brw_MOV(p, dst, header);
+ /* m0.3-0.4: 128-bit-granular offsets into the URB from the handles */
+ brw_MOV(p, vec2(get_element_ud(dst, 3)), stride(offset, 4, 1, 0));
+
+ brw_pop_insn_state(p);
+}
+
+static void
+generate_vec4_urb_read(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg header)
+{
+ const struct brw_device_info *devinfo = p->devinfo;
+
+ assert(header.file == BRW_GENERAL_REGISTER_FILE);
+ assert(header.type == BRW_REGISTER_TYPE_UD);
+
+ brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
+ brw_set_dest(p, send, dst);
+ brw_set_src0(p, send, header);
+
+ brw_set_message_descriptor(p, send, BRW_SFID_URB,
+ 1 /* mlen */, 1 /* rlen */,
+ true /* header */, false /* eot */);
+ brw_inst_set_urb_opcode(devinfo, send, BRW_URB_OPCODE_READ_OWORD);
+
+ if (inst->urb_write_flags & BRW_URB_WRITE_PER_SLOT_OFFSET) {
+ brw_inst_set_urb_swizzle_control(devinfo, send, BRW_URB_SWIZZLE_INTERLEAVE);
+ }
+ brw_inst_set_urb_per_slot_offset(devinfo, send, 1);
+
+ brw_inst_set_urb_global_offset(devinfo, send, inst->offset);
+}
+
+static void
+generate_ds_get_primitive_id(struct brw_codegen *p, struct brw_reg dst)
+{
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, dst, retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_D));
+ brw_pop_insn_state(p);
+}
+
+static void
+generate_hs_get_primitive_id(struct brw_codegen *p, struct brw_reg dst)
+{
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, dst, retype(brw_vec1_grf(0, 1), BRW_REGISTER_TYPE_UD));
+ brw_pop_insn_state(p);
+}
+
+static void
+generate_hs_create_barrier_header(struct brw_codegen *p,
+ struct brw_vue_prog_data *prog_data,
+ struct brw_reg dst)
+{
+ struct brw_reg m0_2 = get_element_ud(dst, 2);
+ unsigned instances = ((struct brw_hs_prog_data *) prog_data)->instances;
+
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+
+ /* Zero the message header */
+ brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u));
+
+ /* Copy "Barrier ID" from DW0 bits 16:13 */
+ brw_AND(p, m0_2,
+ retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(0x1e000));
+
+ /* Shift it into place */
+ brw_SHL(p, m0_2, get_element_ud(dst, 2), brw_imm_ud(11));
+
+ /* Set the Barrier Count and the enable bit */
+ brw_OR(p, m0_2, m0_2, brw_imm_ud(instances << 9 | (1 << 15)));
+
+ brw_pop_insn_state(p);
+}
+
+static void
generate_oword_dual_block_offsets(struct brw_codegen *p,
struct brw_reg m1,
struct brw_reg index)
@@ -1084,7 +1332,8 @@ generate_code(struct brw_codegen *p,
* force_writemask_all in order to make sure the instruction is executed
* regardless of which channels are enabled.
*/
- assert(inst->force_writemask_all);
+ /* XXX: disabled for tess */
+ //assert(inst->force_writemask_all);
/* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy
* the following register region restrictions (from Graphics BSpec:
@@ -1524,6 +1773,60 @@ generate_code(struct brw_codegen *p,
break;
}
+ case HS_OPCODE_URB_WRITE:
+ generate_hs_urb_write(p, inst, src[0]);
+ break;
+
+ case VEC4_OPCODE_URB_READ:
+ generate_vec4_urb_read(p, inst, dst, src[0]);
+ break;
+
+ case HS_OPCODE_SET_INPUT_URB_OFFSETS:
+ generate_hs_input_urb_offsets(p, dst, src[0], src[1]);
+ break;
+
+ case HS_OPCODE_SET_OUTPUT_URB_OFFSETS:
+ generate_hs_output_urb_offsets(p, dst, src[0], src[1]);
+ break;
+
+ case DS_OPCODE_CREATE_INPUT_READ_HEADER:
+ generate_ds_create_input_read_header(p, dst);
+ break;
+
+ case DS_OPCODE_ADD_INDIRECT_URB_OFFSET:
+ generate_ds_add_indirect_urb_offset(p, dst, src[0], src[1]);
+ break;
+
+ case HS_OPCODE_GET_INSTANCE_ID:
+ generate_hs_get_instance_id(p, dst);
+ break;
+
+ case DS_OPCODE_GET_PRIMITIVE_ID:
+ generate_ds_get_primitive_id(p, dst);
+ break;
+
+ case HS_OPCODE_GET_PRIMITIVE_ID:
+ generate_hs_get_primitive_id(p, dst);
+ break;
+
+ case HS_OPCODE_CREATE_BARRIER_HEADER:
+ generate_hs_create_barrier_header(p, prog_data, dst);
+ break;
+
+ case SHADER_OPCODE_BARRIER:
+ brw_barrier(p, src[0]);
+ brw_WAIT(p);
+ break;
+
+ case FS_OPCODE_MOV_DISPATCH_TO_FLAGS:
+ /* XXX: hack, get a real opcode */
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p, brw_flag_reg(0, 0), brw_imm_ud(0xf));
+ brw_pop_insn_state(p);
+ break;
+
default:
unreachable("Unsupported opcode");
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 49c1083733..235806fe12 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -43,7 +43,7 @@ vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler,
int shader_time_index)
: vec4_visitor(compiler, log_data, &c->key.tex,
&prog_data->base, shader, mem_ctx,
- no_spills, shader_time_index),
+ no_spills, shader_time_index, &c->input_vue_map),
c(c),
gs_prog_data(prog_data)
{
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_hs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_hs_visitor.cpp
new file mode 100644
index 0000000000..afb3c07e19
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_hs_visitor.cpp
@@ -0,0 +1,426 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file brw_vec4_hs_visitor.cpp
+ *
+ * Tessellaton control shader specific code derived from the vec4_visitor class.
+ */
+
+#include "brw_vec4_hs_visitor.h"
+
+const unsigned MAX_HS_INPUT_VERTICES = 32;
+
+namespace brw {
+
+vec4_hs_visitor::vec4_hs_visitor(const struct brw_compiler *compiler,
+ void *log_data,
+ struct brw_hs_compile *c,
+ nir_shader *shader,
+ void *mem_ctx,
+ bool no_spills,
+ int shader_time_index)
+ : vec4_visitor(compiler, log_data, &c->key.tex, &c->prog_data.base,
+ shader, mem_ctx, no_spills, shader_time_index,
+ &c->input_vue_map),
+ c(c)
+{
+}
+
+
+void
+vec4_hs_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
+{
+ dst_reg *reg;
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_invocation_id:
+ reg = &this->nir_system_values[SYSTEM_VALUE_INVOCATION_ID];
+ if (reg->file == BAD_FILE) {
+ *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_INVOCATION_ID,
+ glsl_type::int_type);
+ }
+ break;
+ default:
+ vec4_visitor::nir_setup_system_value_intrinsic(instr);
+ }
+}
+
+dst_reg *
+vec4_hs_visitor::make_reg_for_system_value(int location, const glsl_type *type)
+{
+ dst_reg *reg = new(mem_ctx) dst_reg(this, type);
+
+ switch (location) {
+ case SYSTEM_VALUE_INVOCATION_ID:
+ this->current_annotation = "initialize gl_InvocationID";
+ emit(HS_OPCODE_GET_INSTANCE_ID, *reg);
+ break;
+ case SYSTEM_VALUE_PRIMITIVE_ID:
+ break;
+ default:
+ assert(!"not reached");
+ break;
+ }
+
+ return reg;
+}
+
+
+void
+vec4_hs_visitor::setup_payload()
+{
+ int reg = 0;
+
+ /* The payload always contains important data in r0, which contains
+ * the URB handles that are passed on to the URB write at the end
+ * of the thread.
+ */
+ reg++;
+
+ /* r1.0 - r4.7 may contain the input control point URB handles,
+ * which we use to pull vertex data.
+ */
+
+ //int num_vertices = ((brw_hs_prog_key *)key)->input_vertices;
+ //int vertex_handle_regs = (num_vertices + 7) / 8;
+ //reg += vertex_handle_regs;
+ reg += 4;
+
+ /* Push constants may start at r5.0 */
+
+ reg = setup_uniforms(reg);
+
+ /* But the first push constant is gl_PatchVerticesIn. */
+ prog_data->base.dispatch_grf_start_reg++;
+ reg++;
+
+ this->first_non_payload_grf = reg;
+}
+
+
+void
+vec4_hs_visitor::emit_prolog()
+{
+ emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
+ emit(IF(BRW_PREDICATE_NORMAL));
+}
+
+
+void
+vec4_hs_visitor::emit_thread_end()
+{
+ current_annotation = "thread end";
+
+ emit(BRW_OPCODE_ENDIF);
+
+ /* XXX: need 0xf channel enables... */
+
+ vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
+ inst->mlen = 1; /* just the header, no data. */
+ inst->urb_write_flags = BRW_URB_WRITE_EOT_COMPLETE;
+}
+
+
+void
+vec4_hs_visitor::visit(ir_barrier *ir)
+{
+ /* XXX: Emit code to send BarrierMsg to the Message Gateway shared function */
+}
+
+void
+vec4_hs_visitor::emit_input_urb_read(const dst_reg &dst,
+ const src_reg &vertex_index,
+ unsigned base_offset,
+ const src_reg &indirect_offset)
+{
+ dst_reg temp(this, glsl_type::ivec4_type);
+ temp.type = dst.type;
+
+ /* Set up the message header to reference the proper parts of the URB */
+ dst_reg header = dst_reg(this, glsl_type::uvec4_type);
+ emit(HS_OPCODE_SET_INPUT_URB_OFFSETS, header, vertex_index,
+ indirect_offset);
+
+ /* Read into a temporary, ignoring writemasking. */
+ vec4_instruction *read = emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
+ read->offset = base_offset;
+ read->mlen = 1;
+ read->base_mrf = -1;
+
+ /* Copy the temporary to the destination to deal with writemasking.
+ *
+ * Also attempt to deal with gl_PointSize being in the .w component.
+ */
+ if (read->offset == 0 && indirect_offset.file == BAD_FILE) {
+ emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WWWW)));
+ } else {
+ emit(MOV(dst, src_reg(temp)));
+ }
+}
+
+void
+vec4_hs_visitor::emit_output_urb_read(const dst_reg &dst,
+ unsigned base_offset,
+ const src_reg &indirect_offset)
+{
+ /* Set up the message header to reference the proper parts of the URB */
+ dst_reg header = dst_reg(this, glsl_type::uvec4_type);
+ emit(HS_OPCODE_SET_OUTPUT_URB_OFFSETS, header,
+ src_reg(dst.writemask), indirect_offset);
+
+ /* Read into a temporary, ignoring writemasking. */
+ vec4_instruction *read = emit(VEC4_OPCODE_URB_READ, dst, src_reg(header));
+ read->offset = base_offset;
+ read->mlen = 1;
+ read->base_mrf = -1;
+}
+
+void
+vec4_hs_visitor::emit_urb_write(const src_reg &value,
+ unsigned writemask,
+ unsigned base_offset,
+ const src_reg &indirect_offset)
+{
+ src_reg message(this, glsl_type::uvec4_type, 2);
+ vec4_instruction *inst;
+
+ emit(HS_OPCODE_SET_OUTPUT_URB_OFFSETS, dst_reg(message),
+ src_reg(writemask), indirect_offset);
+ inst = emit(MOV(offset(dst_reg(retype(message, value.type)), 1), value));
+ inst->force_writemask_all = true;
+
+ inst = emit(HS_OPCODE_URB_WRITE, dst_null_f(), message);
+ inst->offset = base_offset;
+ inst->mlen = 2;
+ inst->base_mrf = -1;
+}
+
+void
+vec4_hs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
+{
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_primitive_id:
+ emit(HS_OPCODE_GET_PRIMITIVE_ID,
+ get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
+ break;
+ case nir_intrinsic_load_patch_vertices_in:
+ emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D),
+ src_reg(retype(brw_vec1_grf(5, 0), BRW_REGISTER_TYPE_D))));
+ break;
+ case nir_intrinsic_load_per_vertex_input:
+ case nir_intrinsic_load_per_vertex_input_indirect: {
+ src_reg indirect_offset = get_nir_indirect_src(instr);
+ src_reg vertex_index = get_nir_vertex_index_src(instr);
+ unsigned imm_offset = instr->const_index[0];
+
+ dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
+ dst.writemask = brw_writemask_for_size(instr->num_components);
+
+ emit_input_urb_read(dst, vertex_index, imm_offset, indirect_offset);
+ break;
+ }
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_load_input_indirect:
+ unreachable("nir_lower_io should never give us these.");
+ break;
+ case nir_intrinsic_load_output:
+ case nir_intrinsic_load_output_indirect:
+ case nir_intrinsic_load_per_vertex_output:
+ case nir_intrinsic_load_per_vertex_output_indirect: {
+ src_reg indirect_offset;
+ unsigned imm_offset;
+
+ get_patch_urb_offsets(instr, prog_data->vue_map.num_per_vertex_slots,
+ &imm_offset, &indirect_offset);
+
+ dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
+ dst.writemask = brw_writemask_for_size(instr->num_components);
+
+ if (imm_offset == 0 && indirect_offset.file == BAD_FILE) {
+ dst.type = BRW_REGISTER_TYPE_F;
+
+ /* This is a read of gl_TessLevelInner[], which lives in the
+ * Patch URB header. The layout depends on the domain.
+ */
+ switch (c->key.ds_primitive_mode) {
+ case GL_QUADS: {
+ /* DWords 3-2 (reversed); use offset 0 and WZYX swizzle. */
+ dst_reg tmp(this, glsl_type::vec4_type);
+ emit_output_urb_read(tmp, 0, src_reg());
+ emit(MOV(writemask(dst, WRITEMASK_XY),
+ swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX)));
+ break;
+ }
+ case GL_TRIANGLES:
+ /* DWord 4; use offset 1 but normal swizzle/writemask. */
+ emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, src_reg());
+ break;
+ case GL_ISOLINES:
+ /* All channels are undefined. */
+ return;
+ default:
+ unreachable("Bogus tessellation domain");
+ }
+ } else if (imm_offset == 1 && indirect_offset.file == BAD_FILE) {
+ dst.type = BRW_REGISTER_TYPE_F;
+
+ /* This is a read of gl_TessLevelOuter[], which lives in the
+ * high 4 DWords of the Patch URB header, in reverse order.
+ */
+ switch (c->key.ds_primitive_mode) {
+ case GL_QUADS:
+ dst.writemask = WRITEMASK_XYZW;
+ break;
+ case GL_TRIANGLES:
+ dst.writemask = WRITEMASK_XYZ;
+ break;
+ case GL_ISOLINES:
+ dst.writemask = WRITEMASK_XY;
+ return;
+ default:
+ unreachable("Bogus tessellation domain");
+ }
+
+ dst_reg tmp(this, glsl_type::vec4_type);
+ emit_output_urb_read(tmp, 1, src_reg());
+ emit(MOV(dst, swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX)));
+ } else {
+ emit_output_urb_read(dst, imm_offset, indirect_offset);
+ }
+ break;
+ }
+ case nir_intrinsic_store_output:
+ case nir_intrinsic_store_output_indirect:
+ case nir_intrinsic_store_per_vertex_output:
+ case nir_intrinsic_store_per_vertex_output_indirect: {
+ src_reg value = get_nir_src(instr->src[0]);
+ src_reg indirect_offset;
+ unsigned imm_offset;
+ unsigned mask = brw_writemask_for_size(instr->num_components);
+ unsigned swiz = BRW_SWIZZLE_XYZW;
+
+ get_patch_urb_offsets(instr, prog_data->vue_map.num_per_vertex_slots,
+ &imm_offset, &indirect_offset);
+
+ if (imm_offset == 0 && indirect_offset.file == BAD_FILE) {
+ value.type = BRW_REGISTER_TYPE_F;
+
+ /* This is a write to gl_TessLevelInner[], which lives in the
+ * Patch URB header. The layout depends on the domain.
+ */
+ switch (c->key.ds_primitive_mode) {
+ case GL_QUADS:
+ /* gl_TessLevelInner[].xy lives at DWords 3-2 (reversed).
+ * We use an XXYX swizzle to reverse put .xy in the .wz
+ * channels, and use a .zw writemask.
+ */
+ swiz = BRW_SWIZZLE4(0, 0, 1, 0);
+ mask = WRITEMASK_ZW;
+ break;
+ case GL_TRIANGLES:
+ /* gl_TessLevelInner[].x lives at DWord 4, so we set the
+ * writemask to X and bump the URB offset by 1.
+ */
+ imm_offset = 1;
+ mask = WRITEMASK_X;
+ break;
+ case GL_ISOLINES:
+ /* Skip; gl_TessLevelInner[] doesn't exist for isolines. */
+ return;
+ default:
+ unreachable("Bogus tessellation domain");
+ }
+ } else if (imm_offset == 1 && indirect_offset.file == BAD_FILE) {
+ value.type = BRW_REGISTER_TYPE_F;
+
+ /* This is a write to gl_TessLevelOuter[] which lives in the
+ * Patch URB Header at DWords 4-7. However, it's reversed, so
+ * instead of .xyzw we have .wzyx.
+ */
+ swiz = BRW_SWIZZLE_WZYX;
+
+ switch (c->key.ds_primitive_mode) {
+ case GL_QUADS:
+ mask = WRITEMASK_XYZW;
+ break;
+ case GL_TRIANGLES:
+ mask = WRITEMASK_YZW;
+ break;
+ case GL_ISOLINES:
+ mask = WRITEMASK_ZW;
+ break;
+ default:
+ unreachable("Bogus tessellation domain");
+ }
+ }
+
+ emit_urb_write(swizzle(value, swiz), mask,
+ imm_offset, indirect_offset);
+ break;
+ }
+
+ case nir_intrinsic_barrier: {
+ dst_reg header = dst_reg(this, glsl_type::uvec4_type);
+ emit(HS_OPCODE_CREATE_BARRIER_HEADER, header);
+ emit(SHADER_OPCODE_BARRIER, dst_null_ud(), src_reg(header));
+ break;
+ }
+
+ default:
+ vec4_visitor::nir_emit_intrinsic(instr);
+ }
+}
+
+
+extern "C" const unsigned *
+brw_hs_emit(struct brw_context *brw,
+ struct gl_shader_program *shader_prog,
+ struct brw_hs_compile *c,
+ void *mem_ctx,
+ int shader_time_index,
+ unsigned *final_assembly_size)
+{
+ struct gl_shader *shader =
+ shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
+ const struct brw_compiler *compiler = brw->intelScreen->compiler;
+
+ vec4_hs_visitor v(compiler, brw, c, shader->Program->nir, mem_ctx,
+ false /* no_spills */, shader_time_index);
+ if (!v.run()) {
+ shader_prog->LinkStatus = false;
+ ralloc_strcat(&shader_prog->InfoLog, v.fail_msg);
+ return NULL;
+ }
+
+ if (unlikely(INTEL_DEBUG & DEBUG_HS))
+ v.dump_instructions();
+
+ return brw_vec4_generate_assembly(compiler, brw, mem_ctx,
+ shader->Program->nir,
+ &c->prog_data.base, v.cfg,
+ final_assembly_size);
+}
+
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_hs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_hs_visitor.h
new file mode 100644
index 0000000000..0ca6ef6b70
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_hs_visitor.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file brw_vec4_hs_visitor.h
+ *
+ * Tessellation-control-shader-specific code derived from the vec4_visitor class.
+ */
+
+#ifndef BRW_VEC4_HS_VISITOR_H
+#define BRW_VEC4_HS_VISITOR_H
+
+#include "brw_vec4.h"
+
+
+/**
+ * Scratch data used when compiling a GLSL tessellation control shader
+ */
+struct brw_hs_compile
+{
+ struct brw_hs_prog_key key;
+ struct brw_hs_prog_data prog_data;
+ struct brw_vue_map input_vue_map;
+
+ struct brw_tess_ctrl_program *hp;
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+const unsigned *brw_hs_emit(struct brw_context *brw,
+ struct gl_shader_program *prog,
+ struct brw_hs_compile *c,
+ void *mem_ctx,
+ int shader_time_index,
+ unsigned *final_assembly_size);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#ifdef __cplusplus
+namespace brw {
+
+class vec4_hs_visitor : public vec4_visitor
+{
+public:
+ vec4_hs_visitor(const struct brw_compiler *compiler,
+ void *log_data,
+ struct brw_hs_compile *c,
+ nir_shader *shader,
+ void *mem_ctx,
+ bool no_spills,
+ int shader_time_index);
+
+protected:
+ virtual dst_reg *make_reg_for_system_value(int location,
+ const glsl_type *type);
+ virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
+ virtual void setup_payload();
+ virtual void emit_prolog();
+ virtual void emit_thread_end();
+
+ virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
+
+ virtual void visit(ir_barrier *);
+
+ void emit_input_urb_read(const dst_reg &dst,
+ const src_reg &vertex_index,
+ unsigned base_offset,
+ const src_reg &indirect_offset);
+ void emit_output_urb_read(const dst_reg &dst,
+ unsigned base_offset,
+ const src_reg &indirect_offset);
+
+ void emit_urb_write(const src_reg &value, unsigned writemask,
+ unsigned base_offset, const src_reg &indirect_offset);
+
+ /* we do not use the normal end-of-shader URB write mechanism -- but every vec4 stage
+ * must provide implementations of these:
+ */
+ virtual void emit_urb_write_header(int mrf) {}
+ virtual vec4_instruction *emit_urb_write_opcode(bool complete) { return NULL; }
+
+private:
+ const struct brw_hs_compile * const c;
+};
+
+} /* namespace brw */
+#endif /* __cplusplus */
+
+#endif /* BRW_VEC4_HS_VISITOR_H */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 483e14d02c..bbe1fc3377 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -117,6 +117,10 @@ vec4_visitor::nir_setup_system_values()
void
vec4_visitor::nir_setup_inputs()
{
+ /* We pull tessellation shader inputs; we don't want registers for them. */
+ if (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL)
+ return;
+
nir_inputs = ralloc_array(mem_ctx, src_reg, nir->num_inputs);
nir_foreach_variable(var, &nir->inputs) {
@@ -339,6 +343,13 @@ vec4_visitor::get_nir_src(nir_src src, unsigned num_components)
return get_nir_src(src, nir_type_int, num_components);
}
+src_reg
+vec4_visitor::get_nir_src_imm1(nir_src src, enum brw_reg_type type)
+{
+ nir_const_value *val = nir_src_as_const_value(src);
+ return val ? retype(src_reg(val->u[0]), type) : get_nir_src(src, type, 1);
+}
+
void
vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr)
{
@@ -413,16 +424,48 @@ vec4_visitor::get_nir_vertex_index_src(nir_intrinsic_instr *instr)
case nir_intrinsic_load_per_vertex_output:
case nir_intrinsic_load_per_vertex_input_indirect:
case nir_intrinsic_load_per_vertex_output_indirect:
- return get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1);
+ return get_nir_src_imm1(instr->src[0], BRW_REGISTER_TYPE_D);
case nir_intrinsic_store_per_vertex_output:
case nir_intrinsic_store_per_vertex_output_indirect:
- return get_nir_src(instr->src[1], BRW_REGISTER_TYPE_D, 1);
+ return get_nir_src_imm1(instr->src[1], BRW_REGISTER_TYPE_D);
default:
/* Return a BAD_FILE register. */
return src_reg();
}
}
+void
+vec4_visitor::get_patch_urb_offsets(nir_intrinsic_instr *instr,
+ unsigned num_per_vertex_slots,
+ unsigned *out_imm_offset,
+ src_reg *out_indirect_offset)
+{
+ src_reg indirect_offset = get_nir_indirect_src(instr);
+ src_reg vertex_index = get_nir_vertex_index_src(instr);
+ unsigned imm_offset = instr->const_index[0];
+
+ if (vertex_index.file == IMM) {
+ imm_offset += vertex_index.fixed_hw_reg.dw1.ud * num_per_vertex_slots;
+ } else if (vertex_index.file != BAD_FILE) {
+ /* Multiply by the number of per-vertex slots. */
+ src_reg vert_offset(this, glsl_type::uint_type);
+ emit(MUL(dst_reg(vert_offset), vertex_index,
+ src_reg(num_per_vertex_slots)));
+
+ /* ADD to the existing indirect offset, or make this the offset. */
+ if (indirect_offset.file != BAD_FILE) {
+ src_reg sum(this, glsl_type::uint_type);
+ emit(ADD(dst_reg(sum), indirect_offset, vert_offset));
+ indirect_offset = sum;
+ } else {
+ indirect_offset = vert_offset;
+ }
+ }
+
+ *out_imm_offset = imm_offset;
+ *out_indirect_offset = indirect_offset;
+}
+
void
vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
@@ -729,7 +772,10 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
case nir_intrinsic_load_vertex_id_zero_base:
case nir_intrinsic_load_base_vertex:
- case nir_intrinsic_load_instance_id: {
+ case nir_intrinsic_load_instance_id:
+ case nir_intrinsic_load_invocation_id:
+ case nir_intrinsic_load_tess_level_inner:
+ case nir_intrinsic_load_tess_level_outer: {
gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
src_reg val = src_reg(nir_system_values[sv]);
assert(val.file != BAD_FILE);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 606fbd0627..695691f87a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -26,6 +26,10 @@
#include "glsl/ir_uniform.h"
#include "program/sampler.h"
+/* XXX hack alert! */
+#include "brw_vec4_hs_visitor.h"
+#include "brw_vec4_ds_visitor.h"
+
namespace brw {
vec4_instruction::vec4_instruction(enum opcode opcode, const dst_reg &dst,
@@ -1843,13 +1847,15 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
const nir_shader *shader,
void *mem_ctx,
bool no_spills,
- int shader_time_index)
+ int shader_time_index,
+ const struct brw_vue_map *const input_vue_map)
: backend_shader(compiler, log_data, mem_ctx, shader, &prog_data->base),
key_tex(key_tex),
prog_data(prog_data),
fail_msg(NULL),
first_non_payload_grf(0),
need_all_constants_in_pull_buffer(false),
+ input_vue_map(input_vue_map),
no_spills(no_spills),
shader_time_index(shader_time_index),
last_scratch(0)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
index 5dd4f98cec..b9ed035517 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
@@ -307,7 +307,7 @@ vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler,
int shader_time_index,
bool use_legacy_snorm_formula)
: vec4_visitor(compiler, log_data, &key->tex, &vs_prog_data->base, shader,
- mem_ctx, false /* no_spills */, shader_time_index),
+ mem_ctx, false /* no_spills */, shader_time_index, NULL),
key(key),
vs_prog_data(vs_prog_data),
clip_planes(clip_planes),
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 967448e0e4..314bb13fbb 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -146,7 +146,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
brw_compute_vue_map(brw->intelScreen->devinfo,
&prog_data.base.vue_map, outputs_written,
- prog ? prog->SeparateShader : false);
+ prog ? prog->SeparateShader || prog->_LinkedShaders[MESA_SHADER_TESS_CTRL] : false);
if (0) {
_mesa_fprint_program_opt(stderr, &vp->program.Base, PROG_PRINT_DEBUG,
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index 6653a6d759..283999fc4b 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -60,6 +60,7 @@ gen6_upload_push_constants(struct brw_context *brw,
if (prog_data->nr_params == 0) {
stage_state->push_const_size = 0;
+ stage_state->push_const_offset = 0;
} else {
/* Updates the ParamaterValues[i] pointers for all parameters of the
* basic type of PROGRAM_STATE_VAR.
diff --git a/src/mesa/drivers/dri/i965/gen7_hs_state.c b/src/mesa/drivers/dri/i965/gen7_hs_state.c
index dc81884905..2456a15598 100644
--- a/src/mesa/drivers/dri/i965/gen7_hs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_hs_state.c
@@ -41,7 +41,33 @@ gen7_upload_hs_push_constants(struct brw_context *brw)
stage_state, AUB_TRACE_VS_CONSTANTS);
}
- gen7_upload_constant_state(brw, stage_state, hp, _3DSTATE_CONSTANT_HS);
+ assert(brw->gen == 8); // XXX: Do Gen7 code.
+ if (hp) {
+ BEGIN_BATCH(11);
+ OUT_BATCH(_3DSTATE_CONSTANT_HS << 16 | (11 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(1 | (stage_state->push_const_size << 16));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_RELOC64(brw->hs.patch_vertices_in_bo, I915_GEM_DOMAIN_RENDER, 0,
+ brw->hs.patch_vertices_in_offset);
+ if (stage_state->push_const_size != 0) {
+ OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_RENDER, 0,
+ stage_state->push_const_offset);
+ } else {
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ }
+ ADVANCE_BATCH();
+ } else {
+ BEGIN_BATCH(11);
+ OUT_BATCH(_3DSTATE_CONSTANT_HS << 16 | (11 - 2));
+ for (int i = 1; i < 11; i++)
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ }
}
const struct brw_tracked_state gen7_hs_push_constants = {
@@ -50,6 +76,7 @@ const struct brw_tracked_state gen7_hs_push_constants = {
_NEW_TRANSFORM,
.brw = BRW_NEW_BATCH |
BRW_NEW_HS_PROG_DATA |
+ BRW_NEW_PRIMITIVE |
BRW_NEW_PUSH_CONSTANT_ALLOCATION |
BRW_NEW_TESS_CTRL_PROGRAM,
},
diff --git a/src/mesa/drivers/dri/i965/gen7_te_state.c b/src/mesa/drivers/dri/i965/gen7_te_state.c
index 95a5e98133..dfcccb238e 100644
--- a/src/mesa/drivers/dri/i965/gen7_te_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_te_state.c
@@ -29,19 +29,38 @@
static void
upload_te_state(struct brw_context *brw)
{
- /* Disable the TE */
- BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2));
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
+ /* BRW_NEW_TESS_EVAL_PROGRAM */
+ bool active = brw->tess_eval_program;
+ if (active)
+ assert(brw->tess_ctrl_program);
+
+ if (active) {
+ BEGIN_BATCH(4);
+ OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2));
+ OUT_BATCH((brw->ds.prog_data->partitioning << GEN7_TE_PARTITIONING_SHIFT) |
+ (brw->ds.prog_data->output_topology << GEN7_TE_OUTPUT_TOPOLOGY_SHIFT) |
+ (brw->ds.prog_data->domain << GEN7_TE_DOMAIN_SHIFT) |
+ GEN7_TE_ENABLE);
+ OUT_BATCH_F(63.0);
+ OUT_BATCH_F(64.0);
+ ADVANCE_BATCH();
+ } else {
+ BEGIN_BATCH(4);
+ OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH_F(0);
+ OUT_BATCH_F(0);
+ ADVANCE_BATCH();
+ }
}
const struct brw_tracked_state gen7_te_state = {
.dirty = {
- .mesa = 0,
- .brw = BRW_NEW_CONTEXT,
+ .mesa = _NEW_TRANSFORM,//??
+ .brw = BRW_NEW_BATCH | //??
+ BRW_NEW_CONTEXT |
+ BRW_NEW_DS_PROG_DATA |
+ BRW_NEW_TESS_EVAL_PROGRAM,
},
.emit = upload_te_state,
};
diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c
index fe0ecf8f40..219fdcaad1 100644
--- a/src/mesa/drivers/dri/i965/gen7_urb.c
+++ b/src/mesa/drivers/dri/i965/gen7_urb.c
@@ -184,6 +184,7 @@ gen7_upload_urb(struct brw_context *brw)
* skip the rest of the logic.
*/
if (!(brw->ctx.NewDriverState & BRW_NEW_CONTEXT) &&
+ !(brw->ctx.NewDriverState & BRW_NEW_HS_PROG_DATA) &&
brw->urb.vsize == vs_size &&
brw->urb.gs_present == gs_present &&
brw->urb.gsize == gs_size &&
diff --git a/src/mesa/drivers/dri/i965/gen8_ds_state.c b/src/mesa/drivers/dri/i965/gen8_ds_state.c
index 31f1e273fd..45d3386881 100644
--- a/src/mesa/drivers/dri/i965/gen8_ds_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ds_state.c
@@ -29,26 +29,68 @@
static void
gen8_upload_ds_state(struct brw_context *brw)
{
- /* Disable the DS Unit */
- BEGIN_BATCH(11);
- OUT_BATCH(_3DSTATE_CONSTANT_DS << 16 | (11 - 2));
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
+ struct gl_context *ctx = &brw->ctx;
+ const struct brw_stage_state *stage_state = &brw->ds.base;
+ /* BRW_NEW_TESS_EVAL_PROGRAM */
+ bool active = brw->tess_eval_program;
+ assert(!active || brw->tess_ctrl_program);
+
+ /* BRW_NEW_DS_PROG_DATA */
+ const struct brw_vue_prog_data *prog_data = &brw->ds.prog_data->base;
+
+ if (active) {
+ /* DS will pull from the patch URB entry */
+ assert(!prog_data->urb_read_length);
+
+ BEGIN_BATCH(9);
+ OUT_BATCH(_3DSTATE_DS << 16 | (9 - 2));
+ OUT_BATCH(stage_state->prog_offset);
+ OUT_BATCH(0);
+ OUT_BATCH(SET_FIELD(DIV_ROUND_UP(stage_state->sampler_count, 4),
+ GEN7_DS_SAMPLER_COUNT) |
+ SET_FIELD(brw->ds.prog_data->base.base.binding_table.size_bytes / 4,
+ GEN7_DS_BINDING_TABLE_ENTRY_COUNT));
+ if (prog_data->base.total_scratch) {
+ OUT_RELOC64(stage_state->scratch_bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ ffs(prog_data->base.total_scratch) - 11);
+ } else {
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ }
+ OUT_BATCH(SET_FIELD(prog_data->base.dispatch_grf_start_reg,
+ GEN7_DS_DISPATCH_START_GRF));
+
+ OUT_BATCH(GEN7_DS_ENABLE |
+ GEN7_DS_STATISTICS_ENABLE |
+ (brw->max_ds_threads - 1) << HSW_DS_MAX_THREADS_SHIFT |
+ (brw->ds.prog_data->domain == tri ? GEN7_DS_COMPUTE_W_COORDINATE_ENABLE : 0));
+ OUT_BATCH(SET_FIELD(ctx->Transform.ClipPlanesEnabled,
+ GEN8_DS_USER_CLIP_DISTANCE));
+ ADVANCE_BATCH();
+ } else {
+ BEGIN_BATCH(9);
+ OUT_BATCH(_3DSTATE_DS << 16 | (9 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ }
+ brw->ds.enabled = active;
}
const struct brw_tracked_state gen8_ds_state = {
.dirty = {
- .mesa = 0,
- .brw = BRW_NEW_CONTEXT,
+ .mesa = _NEW_TRANSFORM,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_CONTEXT |
+ BRW_NEW_DS_PROG_DATA |
+ BRW_NEW_TESS_EVAL_PROGRAM,
},
.emit = gen8_upload_ds_state,
};
diff --git a/src/mesa/drivers/dri/i965/gen8_hs_state.c b/src/mesa/drivers/dri/i965/gen8_hs_state.c
index 13f70ef0a8..bd51aeb374 100644
--- a/src/mesa/drivers/dri/i965/gen8_hs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_hs_state.c
@@ -29,43 +29,70 @@
static void
gen8_upload_hs_state(struct brw_context *brw)
{
- /* Disable the HS Unit */
- BEGIN_BATCH(11);
- OUT_BATCH(_3DSTATE_CONSTANT_HS << 16 | (11 - 2));
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
+ const struct brw_stage_state *stage_state = &brw->hs.base;
+ /* BRW_NEW_TESS_CTRL_PROGRAM */
+ bool active = brw->tess_ctrl_program;
+ assert(!active || brw->tess_eval_program);
+ /* BRW_NEW_HS_PROG_DATA */
+ const struct brw_vue_prog_data *prog_data = &brw->hs.prog_data->base;
- BEGIN_BATCH(9);
- OUT_BATCH(_3DSTATE_HS << 16 | (9 - 2));
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
-
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_HS << 16 | (2 - 2));
- OUT_BATCH(brw->hw_bt_pool.next_offset);
- ADVANCE_BATCH();
+ if (active) {
+ int num_instances = brw->hs.prog_data->instances;
+ /* XXX: If the HS kernel uses a barrier function, software must
+ * restrict the Instance Count to the number of threads that can be
+ * simultaneously active within a half-slice. Factors which must be
+ * considered includes scratch memory availability.
+ */
+ BEGIN_BATCH(9);
+ OUT_BATCH(_3DSTATE_HS << 16 | (9 - 2));
+ OUT_BATCH(SET_FIELD(DIV_ROUND_UP(stage_state->sampler_count, 4),
+ GEN7_HS_SAMPLER_COUNT) |
+ SET_FIELD(prog_data->base.binding_table.size_bytes / 4,
+ GEN7_HS_BINDING_TABLE_ENTRY_COUNT));
+ OUT_BATCH(GEN7_HS_ENABLE |
+ GEN7_HS_STATISTICS_ENABLE |
+ (brw->max_hs_threads - 1) << GEN8_HS_MAX_THREADS_SHIFT |
+ SET_FIELD(num_instances - 1, GEN7_HS_INSTANCE_COUNT));
+ OUT_BATCH(stage_state->prog_offset);
+ OUT_BATCH(0);
+ if (prog_data->base.total_scratch) {
+ OUT_RELOC64(stage_state->scratch_bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ ffs(prog_data->base.total_scratch) - 11);
+ } else {
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ }
+ /* Decrement dispatch_grf_start_reg by 1 for gl_PatchVerticesIn hack */
+ OUT_BATCH(GEN7_HS_SINGLE_PROGRAM_FLOW |
+ GEN7_HS_INCLUDE_VERTEX_HANDLES |
+ SET_FIELD(prog_data->base.dispatch_grf_start_reg - 1,
+ GEN7_HS_DISPATCH_START_GRF));
+ OUT_BATCH(0); /* MBZ */
+ ADVANCE_BATCH();
+ } else {
+ BEGIN_BATCH(9);
+ OUT_BATCH(_3DSTATE_HS << 16 | (9 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ }
+ brw->hs.enabled = active;
}
const struct brw_tracked_state gen8_hs_state = {
.dirty = {
- .mesa = 0,
- .brw = BRW_NEW_CONTEXT,
+ .mesa = _NEW_TRANSFORM,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_CONTEXT |
+ BRW_NEW_HS_PROG_DATA |
+ BRW_NEW_TESS_CTRL_PROGRAM,
},
.emit = gen8_upload_hs_state,
};