summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/nouveau/nv50/nv50_screen.c
diff options
context:
space:
mode:
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>2015-10-14 21:42:41 +0200
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>2015-11-14 23:42:15 +0100
commitff72440b40211326eda118232fabd53965410afd (patch)
treea86202d500d81b667c11266e536fcfaad4e00ce2 /src/gallium/drivers/nouveau/nv50/nv50_screen.c
parent7167a058baa2b524e31e51d8924fe5d0ea661b70 (diff)
nv50: implement a basic compute support
This adds the ability to launch simple compute kernels like the one I will use to read out MP performance counters in the upcoming patch. This compute support is based on the work of Francisco Jerez (aka curro) that he did as part of his EVoC project in 2011/2012 to get OpenCL working on Tesla. His original work can be found here: https://github.com/curro/mesa/commits/nv50-compute I did some improvements on the original code, like fixing using both 3D and COMPUTE simultaneously, improving global buffers binding, and making the code closer to what nvc0 already does. This compute support has been tested by Pierre Moreau and myself with some compute kernels. This is a step towards OpenCL. Speaking about this, it seems like compute programs overlap fragment programs when they are used both. To fix this, we need to re-validate fragment programs when binding compute programs and vice versa. Note that, textures, samplers and surfaces still need to be implemented. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Tested-by: Pierre Moreau <pierre.morrow@free.fr> Acked-by: Ilia Mirkin <imirkin@alum.mit.edu>
Diffstat (limited to 'src/gallium/drivers/nouveau/nv50/nv50_screen.c')
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.c61
1 files changed, 58 insertions, 3 deletions
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index f47e998ab1e..0142e86ba20 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -41,8 +41,6 @@
#define THREADS_IN_WARP 32
-#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float))
-
static boolean
nv50_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
@@ -183,6 +181,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_COMPUTE:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP:
return 1; /* class_3d >= NVA0_3D_CLASS; */
@@ -212,7 +211,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_FAKE_SW_MSAA:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
- case PIPE_CAP_COMPUTE:
case PIPE_CAP_DRAW_INDIRECT:
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: /* potentially supported on some hw */
@@ -251,6 +249,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_VERTEX:
case PIPE_SHADER_GEOMETRY:
case PIPE_SHADER_FRAGMENT:
+ case PIPE_SHADER_COMPUTE:
break;
default:
return 0;
@@ -336,6 +335,52 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
return 0.0f;
}
+static int
+nv50_screen_get_compute_param(struct pipe_screen *pscreen,
+ enum pipe_compute_cap param, void *data)
+{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+
+#define RET(x) do { \
+ if (data) \
+ memcpy(data, x, sizeof(x)); \
+ return sizeof(x); \
+} while (0)
+
+ switch (param) {
+ case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+ RET((uint64_t []) { 2 });
+ case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+ RET(((uint64_t []) { 65535, 65535 }));
+ case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+ RET(((uint64_t []) { 512, 512, 64 }));
+ case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+ RET((uint64_t []) { 512 });
+ case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g0-15[] */
+ RET((uint64_t []) { 1ULL << 32 });
+ case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
+ RET((uint64_t []) { 16 << 10 });
+ case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
+ RET((uint64_t []) { 16 << 10 });
+ case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
+ RET((uint64_t []) { 4096 });
+ case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+ RET((uint32_t []) { 32 });
+ case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
+ RET((uint64_t []) { 1ULL << 40 });
+ case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+ RET((uint32_t []) { 0 });
+ case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+ RET((uint32_t []) { screen->mp_count });
+ case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+ RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */
+ default:
+ return 0;
+ }
+
+#undef RET
+}
+
static void
nv50_screen_destroy(struct pipe_screen *pscreen)
{
@@ -377,6 +422,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
nouveau_object_del(&screen->tesla);
nouveau_object_del(&screen->eng2d);
nouveau_object_del(&screen->m2mf);
+ nouveau_object_del(&screen->compute);
nouveau_object_del(&screen->sync);
nouveau_screen_fini(&screen->base);
@@ -742,6 +788,7 @@ nv50_screen_create(struct nouveau_device *dev)
pscreen->get_param = nv50_screen_get_param;
pscreen->get_shader_param = nv50_screen_get_shader_param;
pscreen->get_paramf = nv50_screen_get_paramf;
+ pscreen->get_compute_param = nv50_screen_get_compute_param;
nv50_screen_init_resource_functions(pscreen);
@@ -851,6 +898,8 @@ nv50_screen_create(struct nouveau_device *dev)
screen->TPs = util_bitcount(value & 0xffff);
screen->MPsInTP = util_bitcount((value >> 24) & 0xf);
+ screen->mp_count = screen->TPs * screen->MPsInTP;
+
stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP *
STACK_WARPS_ALLOC * 64 * 8;
@@ -902,6 +951,12 @@ nv50_screen_create(struct nouveau_device *dev)
nv50_screen_init_hwctx(screen);
+ ret = nv50_screen_compute_setup(screen, screen->base.pushbuf);
+ if (ret) {
+ NOUVEAU_ERR("Failed to init compute context: %d\n", ret);
+ goto fail;
+ }
+
nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
return pscreen;