summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/r600/evergreen_compute_internal.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/r600/evergreen_compute_internal.c')
-rw-r--r--src/gallium/drivers/r600/evergreen_compute_internal.c830
1 files changed, 830 insertions, 0 deletions
diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c b/src/gallium/drivers/r600/evergreen_compute_internal.c
new file mode 100644
index 00000000000..209f064d1de
--- /dev/null
+++ b/src/gallium/drivers/r600/evergreen_compute_internal.c
@@ -0,0 +1,830 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Adam Rak <adam.rak@streamnovation.com>
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "util/u_blitter.h"
+#include "util/u_double_list.h"
+#include "util/u_transfer.h"
+#include "util/u_surface.h"
+#include "util/u_pack_color.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_framebuffer.h"
+#include "r600.h"
+#include "r600_resource.h"
+#include "r600_shader.h"
+#include "r600_pipe.h"
+#include "r600_formats.h"
+#include "evergreend.h"
+#include "evergreen_compute_internal.h"
+#include "r600_hw_context_priv.h"
+
+int get_compute_resource_num(void)
+{
+ int num = 0;
+#define DECL_COMPUTE_RESOURCE(name, n) num += n;
+#include "compute_resource.def"
+#undef DECL_COMPUTE_RESOURCE
+ return num;
+}
+
+void evergreen_emit_raw_value(
+ struct evergreen_compute_resource* res,
+ unsigned value)
+{
+ res->cs[res->cs_end++] = value;
+}
+
+void evergreen_emit_ctx_value(struct r600_context *ctx, unsigned value)
+{
+ ctx->cs->buf[ctx->cs->cdw++] = value;
+}
+
+void evergreen_mult_reg_set_(
+ struct evergreen_compute_resource* res,
+ int index,
+ u32* array,
+ int size)
+{
+ int i = 0;
+
+ evergreen_emit_raw_reg_set(res, index, size / 4);
+
+ for (i = 0; i < size; i+=4) {
+ res->cs[res->cs_end++] = array[i / 4];
+ }
+}
+
+void evergreen_reg_set(
+ struct evergreen_compute_resource* res,
+ unsigned index,
+ unsigned value)
+{
+ evergreen_emit_raw_reg_set(res, index, 1);
+ res->cs[res->cs_end++] = value;
+}
+
+struct evergreen_compute_resource* get_empty_res(
+ struct r600_pipe_compute* pipe,
+ enum evergreen_compute_resources res_code,
+ int offset_index)
+{
+ int code_index = -1;
+ int code_size = -1;
+
+ {
+ int i = 0;
+ #define DECL_COMPUTE_RESOURCE(name, n) if (COMPUTE_RESOURCE_ ## name == res_code) {code_index = i; code_size = n;} i += n;
+ #include "compute_resource.def"
+ #undef DECL_COMPUTE_RESOURCE
+ }
+
+ assert(code_index != -1 && "internal error: resouce index not found");
+ assert(offset_index < code_size && "internal error: overindexing resource");
+
+ int index = code_index + offset_index;
+
+ struct evergreen_compute_resource* res = &pipe->resources[index];
+
+ res->enabled = true;
+ res->bo = NULL;
+ res->cs_end = 0;
+ bzero(&res->do_reloc, sizeof(res->do_reloc));
+
+ return res;
+}
+
+void evergreen_emit_raw_reg_set(
+ struct evergreen_compute_resource* res,
+ unsigned index,
+ int num)
+{
+ res->enabled = 1;
+ int cs_end = res->cs_end;
+
+ if (index >= EVERGREEN_CONFIG_REG_OFFSET
+ && index < EVERGREEN_CONFIG_REG_END) {
+ res->cs[cs_end] = PKT3C(PKT3_SET_CONFIG_REG, num, 0);
+ res->cs[cs_end+1] = (index - EVERGREEN_CONFIG_REG_OFFSET) >> 2;
+ } else if (index >= EVERGREEN_CONTEXT_REG_OFFSET
+ && index < EVERGREEN_CONTEXT_REG_END) {
+ res->cs[cs_end] = PKT3C(PKT3_SET_CONTEXT_REG, num, 0);
+ res->cs[cs_end+1] = (index - EVERGREEN_CONTEXT_REG_OFFSET) >> 2;
+ } else if (index >= EVERGREEN_RESOURCE_OFFSET
+ && index < EVERGREEN_RESOURCE_END) {
+ res->cs[cs_end] = PKT3C(PKT3_SET_RESOURCE, num, 0);
+ res->cs[cs_end+1] = (index - EVERGREEN_RESOURCE_OFFSET) >> 2;
+ } else if (index >= EVERGREEN_SAMPLER_OFFSET
+ && index < EVERGREEN_SAMPLER_END) {
+ res->cs[cs_end] = PKT3C(PKT3_SET_SAMPLER, num, 0);
+ res->cs[cs_end+1] = (index - EVERGREEN_SAMPLER_OFFSET) >> 2;
+ } else if (index >= EVERGREEN_CTL_CONST_OFFSET
+ && index < EVERGREEN_CTL_CONST_END) {
+ res->cs[cs_end] = PKT3C(PKT3_SET_CTL_CONST, num, 0);
+ res->cs[cs_end+1] = (index - EVERGREEN_CTL_CONST_OFFSET) >> 2;
+ } else if (index >= EVERGREEN_LOOP_CONST_OFFSET
+ && index < EVERGREEN_LOOP_CONST_END) {
+ res->cs[cs_end] = PKT3C(PKT3_SET_LOOP_CONST, num, 0);
+ res->cs[cs_end+1] = (index - EVERGREEN_LOOP_CONST_OFFSET) >> 2;
+ } else if (index >= EVERGREEN_BOOL_CONST_OFFSET
+ && index < EVERGREEN_BOOL_CONST_END) {
+ res->cs[cs_end] = PKT3C(PKT3_SET_BOOL_CONST, num, 0);
+ res->cs[cs_end+1] = (index - EVERGREEN_BOOL_CONST_OFFSET) >> 2;
+ } else {
+ res->cs[cs_end] = PKT0(index, num-1);
+ res->cs_end--;
+ }
+
+ res->cs_end += 2;
+}
+
+void evergreen_emit_force_reloc(struct evergreen_compute_resource* res)
+{
+ res->do_reloc[res->cs_end] += 1;
+}
+
+void evergreen_emit_ctx_reg_set(
+ struct r600_context *ctx,
+ unsigned index,
+ int num)
+{
+
+ if (index >= EVERGREEN_CONFIG_REG_OFFSET
+ && index < EVERGREEN_CONFIG_REG_END) {
+ ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CONFIG_REG, num, 0);
+ ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CONFIG_REG_OFFSET) >> 2;
+ } else if (index >= EVERGREEN_CONTEXT_REG_OFFSET
+ && index < EVERGREEN_CONTEXT_REG_END) {
+ ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CONTEXT_REG, num, 0);
+ ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CONTEXT_REG_OFFSET) >> 2;
+ } else if (index >= EVERGREEN_RESOURCE_OFFSET
+ && index < EVERGREEN_RESOURCE_END) {
+ ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_RESOURCE, num, 0);
+ ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_RESOURCE_OFFSET) >> 2;
+ } else if (index >= EVERGREEN_SAMPLER_OFFSET
+ && index < EVERGREEN_SAMPLER_END) {
+ ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_SAMPLER, num, 0);
+ ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_SAMPLER_OFFSET) >> 2;
+ } else if (index >= EVERGREEN_CTL_CONST_OFFSET
+ && index < EVERGREEN_CTL_CONST_END) {
+ ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CTL_CONST, num, 0);
+ ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CTL_CONST_OFFSET) >> 2;
+ } else if (index >= EVERGREEN_LOOP_CONST_OFFSET
+ && index < EVERGREEN_LOOP_CONST_END) {
+ ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_LOOP_CONST, num, 0);
+ ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_LOOP_CONST_OFFSET) >> 2;
+ } else if (index >= EVERGREEN_BOOL_CONST_OFFSET
+ && index < EVERGREEN_BOOL_CONST_END) {
+ ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_BOOL_CONST, num, 0);
+ ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_BOOL_CONST_OFFSET) >> 2;
+ } else {
+ ctx->cs->buf[ctx->cs->cdw++] = PKT0(index, num-1);
+ }
+}
+
+void evergreen_emit_ctx_reloc(
+ struct r600_context *ctx,
+ struct r600_resource *bo,
+ enum radeon_bo_usage usage)
+{
+ assert(bo);
+
+ ctx->cs->buf[ctx->cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+ u32 rr = r600_context_bo_reloc(ctx, bo, usage);
+ ctx->cs->buf[ctx->cs->cdw++] = rr;
+}
+
+void evergreen_set_buffer_sync(
+ struct r600_context *ctx,
+ struct r600_resource* bo,
+ int size,
+ int flags,
+ enum radeon_bo_usage usage)
+{
+ assert(bo);
+ int32_t cp_coher_size = 0;
+
+ if (size == 0xffffffff || size == 0) {
+ cp_coher_size = 0xffffffff;
+ }
+ else {
+ cp_coher_size = ((size + 255) >> 8);
+ }
+
+ uint32_t sync_flags = 0;
+
+ if ((flags & COMPUTE_RES_TC_FLUSH) == COMPUTE_RES_TC_FLUSH) {
+ sync_flags |= S_0085F0_TC_ACTION_ENA(1);
+ }
+
+ if ((flags & COMPUTE_RES_VC_FLUSH) == COMPUTE_RES_VC_FLUSH) {
+ sync_flags |= S_0085F0_VC_ACTION_ENA(1);
+ }
+
+ if ((flags & COMPUTE_RES_SH_FLUSH) == COMPUTE_RES_SH_FLUSH) {
+ sync_flags |= S_0085F0_SH_ACTION_ENA(1);
+ }
+
+ if ((flags & COMPUTE_RES_CB_FLUSH(0)) == COMPUTE_RES_CB_FLUSH(0)) {
+ sync_flags |= S_0085F0_CB_ACTION_ENA(1);
+
+ switch((flags >> 8) & 0xF) {
+ case 0:
+ sync_flags |= S_0085F0_CB0_DEST_BASE_ENA(1);
+ break;
+ case 1:
+ sync_flags |= S_0085F0_CB1_DEST_BASE_ENA(1);
+ break;
+ case 2:
+ sync_flags |= S_0085F0_CB2_DEST_BASE_ENA(1);
+ break;
+ case 3:
+ sync_flags |= S_0085F0_CB3_DEST_BASE_ENA(1);
+ break;
+ case 4:
+ sync_flags |= S_0085F0_CB4_DEST_BASE_ENA(1);
+ break;
+ case 5:
+ sync_flags |= S_0085F0_CB5_DEST_BASE_ENA(1);
+ break;
+ case 6:
+ sync_flags |= S_0085F0_CB6_DEST_BASE_ENA(1);
+ break;
+ case 7:
+ sync_flags |= S_0085F0_CB7_DEST_BASE_ENA(1);
+ break;
+ case 8:
+ sync_flags |= S_0085F0_CB8_DEST_BASE_ENA(1);
+ break;
+ case 9:
+ sync_flags |= S_0085F0_CB9_DEST_BASE_ENA(1);
+ break;
+ case 10:
+ sync_flags |= S_0085F0_CB10_DEST_BASE_ENA(1);
+ break;
+ case 11:
+ sync_flags |= S_0085F0_CB11_DEST_BASE_ENA(1);
+ break;
+ default:
+ assert(0);
+ }
+ }
+
+ int32_t poll_interval = 10;
+
+ ctx->cs->buf[ctx->cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
+ ctx->cs->buf[ctx->cs->cdw++] = sync_flags;
+ ctx->cs->buf[ctx->cs->cdw++] = cp_coher_size;
+ ctx->cs->buf[ctx->cs->cdw++] = 0;
+ ctx->cs->buf[ctx->cs->cdw++] = poll_interval;
+
+ if (cp_coher_size != 0xffffffff) {
+ evergreen_emit_ctx_reloc(ctx, bo, usage);
+ }
+}
+
+int evergreen_compute_get_gpu_format(
+ struct number_type_and_format* fmt,
+ struct r600_resource *bo)
+{
+ switch (bo->b.b.format)
+ {
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R32_UNORM:
+ fmt->format = V_028C70_COLOR_32;
+ fmt->number_type = V_028C70_NUMBER_UNORM;
+ fmt->num_format_all = 0;
+ break;
+ case PIPE_FORMAT_R32_FLOAT:
+ fmt->format = V_028C70_COLOR_32_FLOAT;
+ fmt->number_type = V_028C70_NUMBER_FLOAT;
+ fmt->num_format_all = 0;
+ break;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ fmt->format = V_028C70_COLOR_32_32_32_32_FLOAT;
+ fmt->number_type = V_028C70_NUMBER_FLOAT;
+ fmt->num_format_all = 0;
+ break;
+
+ ///TODO: other formats...
+
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
+void evergreen_set_rat(
+ struct r600_pipe_compute *pipe,
+ int id,
+ struct r600_resource* bo,
+ int start,
+ int size)
+{
+ assert(id < 12);
+ assert((size & 3) == 0);
+ assert((start & 0xFF) == 0);
+
+ int offset;
+ COMPUTE_DBG("bind rat: %i \n", id);
+
+ if (id < 8) {
+ offset = id*0x3c;
+ }
+ else {
+ offset = 8*0x3c + (id-8)*0x1c;
+ }
+
+ int linear = 0;
+
+ if (bo->b.b.height0 <= 1 && bo->b.b.depth0 <= 1
+ && bo->b.b.target == PIPE_BUFFER) {
+ linear = 1;
+ }
+
+ struct evergreen_compute_resource* res =
+ get_empty_res(pipe, COMPUTE_RESOURCE_RAT, id);
+
+ evergreen_emit_force_reloc(res);
+
+ evergreen_reg_set(res, R_028C64_CB_COLOR0_PITCH, 0); ///TODO: for 2D?
+ evergreen_reg_set(res, R_028C68_CB_COLOR0_SLICE, 0);
+
+ struct number_type_and_format fmt;
+
+ ///default config
+ if (bo->b.b.format == PIPE_FORMAT_NONE) {
+ fmt.format = V_028C70_COLOR_32;
+ fmt.number_type = V_028C70_NUMBER_FLOAT;
+ } else {
+ evergreen_compute_get_gpu_format(&fmt, bo);
+ }
+
+ if (linear) {
+ evergreen_reg_set(res,
+ R_028C70_CB_COLOR0_INFO, S_028C70_RAT(1)
+ | S_028C70_ARRAY_MODE(V_028C70_ARRAY_LINEAR_ALIGNED)
+ | S_028C70_FORMAT(fmt.format)
+ | S_028C70_NUMBER_TYPE(fmt.number_type)
+ );
+ evergreen_emit_force_reloc(res);
+ } else {
+ assert(0 && "TODO");
+ ///TODO
+// evergreen_reg_set(res, R_028C70_CB_COLOR0_INFO, S_028C70_RAT(1) | S_028C70_ARRAY_MODE(????));
+// evergreen_emit_force_reloc(res);
+ }
+
+ evergreen_reg_set(res, R_028C74_CB_COLOR0_ATTRIB, S_028C74_NON_DISP_TILING_ORDER(1));
+ evergreen_emit_force_reloc(res);
+
+ if (linear) {
+ /* XXX: Why are we using size instead of bo->b.b.b.width0 ? */
+ evergreen_reg_set(res, R_028C78_CB_COLOR0_DIM, size);
+ } else {
+ evergreen_reg_set(res, R_028C78_CB_COLOR0_DIM,
+ S_028C78_WIDTH_MAX(bo->b.b.width0)
+ | S_028C78_HEIGHT_MAX(bo->b.b.height0));
+ }
+
+ if (id < 8) {
+ evergreen_reg_set(res, R_028C7C_CB_COLOR0_CMASK, 0);
+ evergreen_emit_force_reloc(res);
+ evergreen_reg_set(res, R_028C84_CB_COLOR0_FMASK, 0);
+ evergreen_emit_force_reloc(res);
+ }
+
+ evergreen_reg_set(res, R_028C60_CB_COLOR0_BASE + offset, start >> 8);
+
+ res->bo = bo;
+ res->usage = RADEON_USAGE_READWRITE;
+ res->coher_bo_size = size;
+ res->flags = COMPUTE_RES_CB_FLUSH(id);
+}
+
+void evergreen_set_lds(
+ struct r600_pipe_compute *pipe,
+ int num_lds,
+ int size,
+ int num_waves)
+{
+ struct evergreen_compute_resource* res =
+ get_empty_res(pipe, COMPUTE_RESOURCE_LDS, 0);
+
+ evergreen_reg_set(res, R_008E2C_SQ_LDS_RESOURCE_MGMT,
+ S_008E2C_NUM_LS_LDS(num_lds));
+ evergreen_reg_set(res, CM_R_0288E8_SQ_LDS_ALLOC, size | num_waves << 14);
+}
+
+void evergreen_set_gds(
+ struct r600_pipe_compute *pipe,
+ uint32_t addr,
+ uint32_t size)
+{
+ struct evergreen_compute_resource* res =
+ get_empty_res(pipe, COMPUTE_RESOURCE_GDS, 0);
+
+ evergreen_reg_set(res, R_028728_GDS_ORDERED_WAVE_PER_SE, 1);
+ evergreen_reg_set(res, R_028720_GDS_ADDR_BASE, addr);
+ evergreen_reg_set(res, R_028724_GDS_ADDR_SIZE, size);
+}
+
+void evergreen_set_export(
+ struct r600_pipe_compute *pipe,
+ struct r600_resource* bo,
+ int offset, int size)
+{
+ #define SX_MEMORY_EXPORT_BASE 0x9010
+ #define SX_MEMORY_EXPORT_SIZE 0x9014
+
+ struct evergreen_compute_resource* res =
+ get_empty_res(pipe, COMPUTE_RESOURCE_EXPORT, 0);
+
+ evergreen_reg_set(res, SX_MEMORY_EXPORT_SIZE, size);
+
+ if (size) {
+ evergreen_reg_set(res, SX_MEMORY_EXPORT_BASE, offset);
+ res->bo = bo;
+ res->usage = RADEON_USAGE_WRITE;
+ res->coher_bo_size = size;
+ res->flags = 0;
+ }
+}
+
+void evergreen_set_loop_const(
+ struct r600_pipe_compute *pipe,
+ int id, int count, int init, int inc) {
+
+ struct evergreen_compute_resource* res =
+ get_empty_res(pipe, COMPUTE_RESOURCE_LOOP, id);
+
+ assert(id < 32);
+ assert(count <= 0xFFF);
+ assert(init <= 0xFF);
+ assert(inc <= 0xFF);
+
+ /* Compute shaders use LOOP_CONST registers SQ_LOOP_CONST_160 to
+ * SQ_LOOP_CONST_191 */
+ evergreen_reg_set(res, R_03A200_SQ_LOOP_CONST_0 + (160 * 4) + (id * 4),
+ count | init << 12 | inc << 24);
+}
+
+void evergreen_set_tmp_ring(
+ struct r600_pipe_compute *pipe,
+ struct r600_resource* bo,
+ int offset, int size, int se)
+{
+ #define SQ_LSTMP_RING_BASE 0x00008e10
+ #define SQ_LSTMP_RING_SIZE 0x00008e14
+ #define GRBM_GFX_INDEX 0x802C
+ #define INSTANCE_INDEX(x) ((x) << 0)
+ #define SE_INDEX(x) ((x) << 16)
+ #define INSTANCE_BROADCAST_WRITES (1 << 30)
+ #define SE_BROADCAST_WRITES (1 << 31)
+
+ struct evergreen_compute_resource* res =
+ get_empty_res(pipe, COMPUTE_RESOURCE_TMPRING, se);
+
+ evergreen_reg_set(res,
+ GRBM_GFX_INDEX,INSTANCE_INDEX(0)
+ | SE_INDEX(se)
+ | INSTANCE_BROADCAST_WRITES);
+ evergreen_reg_set(res, SQ_LSTMP_RING_SIZE, size);
+
+ if (size) {
+ assert(bo);
+
+ evergreen_reg_set(res, SQ_LSTMP_RING_BASE, offset);
+ res->bo = bo;
+ res->usage = RADEON_USAGE_WRITE;
+ res->coher_bo_size = 0;
+ res->flags = 0;
+ }
+
+ if (size) {
+ evergreen_emit_force_reloc(res);
+ }
+
+ evergreen_reg_set(res,
+ GRBM_GFX_INDEX,INSTANCE_INDEX(0)
+ | SE_INDEX(0)
+ | INSTANCE_BROADCAST_WRITES
+ | SE_BROADCAST_WRITES);
+}
+
+static uint32_t r600_colorformat_endian_swap(uint32_t colorformat)
+{
+ if (R600_BIG_ENDIAN) {
+ switch(colorformat) {
+ case V_028C70_COLOR_4_4:
+ return ENDIAN_NONE;
+
+ /* 8-bit buffers. */
+ case V_028C70_COLOR_8:
+ return ENDIAN_NONE;
+
+ /* 16-bit buffers. */
+ case V_028C70_COLOR_5_6_5:
+ case V_028C70_COLOR_1_5_5_5:
+ case V_028C70_COLOR_4_4_4_4:
+ case V_028C70_COLOR_16:
+ case V_028C70_COLOR_8_8:
+ return ENDIAN_8IN16;
+
+ /* 32-bit buffers. */
+ case V_028C70_COLOR_8_8_8_8:
+ case V_028C70_COLOR_2_10_10_10:
+ case V_028C70_COLOR_8_24:
+ case V_028C70_COLOR_24_8:
+ case V_028C70_COLOR_32_FLOAT:
+ case V_028C70_COLOR_16_16_FLOAT:
+ case V_028C70_COLOR_16_16:
+ return ENDIAN_8IN32;
+
+ /* 64-bit buffers. */
+ case V_028C70_COLOR_16_16_16_16:
+ case V_028C70_COLOR_16_16_16_16_FLOAT:
+ return ENDIAN_8IN16;
+
+ case V_028C70_COLOR_32_32_FLOAT:
+ case V_028C70_COLOR_32_32:
+ case V_028C70_COLOR_X24_8_32_FLOAT:
+ return ENDIAN_8IN32;
+
+ /* 96-bit buffers. */
+ case V_028C70_COLOR_32_32_32_FLOAT:
+ /* 128-bit buffers. */
+ case V_028C70_COLOR_32_32_32_32_FLOAT:
+ case V_028C70_COLOR_32_32_32_32:
+ return ENDIAN_8IN32;
+ default:
+ return ENDIAN_NONE; /* Unsupported. */
+ }
+ } else {
+ return ENDIAN_NONE;
+ }
+}
+
+static unsigned r600_tex_dim(unsigned dim)
+{
+ switch (dim) {
+ default:
+ case PIPE_TEXTURE_1D:
+ return V_030000_SQ_TEX_DIM_1D;
+ case PIPE_TEXTURE_1D_ARRAY:
+ return V_030000_SQ_TEX_DIM_1D_ARRAY;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
+ return V_030000_SQ_TEX_DIM_2D;
+ case PIPE_TEXTURE_2D_ARRAY:
+ return V_030000_SQ_TEX_DIM_2D_ARRAY;
+ case PIPE_TEXTURE_3D:
+ return V_030000_SQ_TEX_DIM_3D;
+ case PIPE_TEXTURE_CUBE:
+ return V_030000_SQ_TEX_DIM_CUBEMAP;
+ }
+}
+
+void evergreen_set_vtx_resource(
+ struct r600_pipe_compute *pipe,
+ struct r600_resource* bo,
+ int id, uint64_t offset, int writable)
+{
+ assert(id < 16);
+ uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4;
+ struct number_type_and_format fmt;
+
+ fmt.format = 0;
+
+ assert(bo->b.b.height0 <= 1);
+ assert(bo->b.b.depth0 <= 1);
+
+ int e = evergreen_compute_get_gpu_format(&fmt, bo);
+
+ assert(e && "unknown format");
+
+ struct evergreen_compute_resource* res =
+ get_empty_res(pipe, COMPUTE_RESOURCE_VERT, id);
+
+ unsigned size = bo->b.b.width0;
+ unsigned stride = 1;
+
+// size = (size * util_format_get_blockwidth(bo->b.b.b.format) *
+// util_format_get_blocksize(bo->b.b.b.format));
+
+ COMPUTE_DBG("id: %i vtx size: %i byte, width0: %i elem\n",
+ id, size, bo->b.b.width0);
+
+ sq_vtx_constant_word2 =
+ S_030008_BASE_ADDRESS_HI(offset >> 32) |
+ S_030008_STRIDE(stride) |
+ S_030008_DATA_FORMAT(fmt.format) |
+ S_030008_NUM_FORMAT_ALL(fmt.num_format_all) |
+ S_030008_ENDIAN_SWAP(0);
+
+ COMPUTE_DBG("%08X %i %i %i %i\n", sq_vtx_constant_word2, offset,
+ stride, fmt.format, fmt.num_format_all);
+
+ sq_vtx_constant_word3 =
+ S_03000C_DST_SEL_X(0) |
+ S_03000C_DST_SEL_Y(1) |
+ S_03000C_DST_SEL_Z(2) |
+ S_03000C_DST_SEL_W(3);
+
+ sq_vtx_constant_word4 = 0;
+
+ evergreen_emit_raw_value(res, PKT3C(PKT3_SET_RESOURCE, 8, 0));
+ evergreen_emit_raw_value(res, (id+816)*32 >> 2);
+ evergreen_emit_raw_value(res, (unsigned)((offset) & 0xffffffff));
+ evergreen_emit_raw_value(res, size - 1);
+ evergreen_emit_raw_value(res, sq_vtx_constant_word2);
+ evergreen_emit_raw_value(res, sq_vtx_constant_word3);
+ evergreen_emit_raw_value(res, sq_vtx_constant_word4);
+ evergreen_emit_raw_value(res, 0);
+ evergreen_emit_raw_value(res, 0);
+ evergreen_emit_raw_value(res, S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER));
+
+ res->bo = bo;
+
+ if (writable) {
+ res->usage = RADEON_USAGE_READWRITE;
+ }
+ else {
+ res->usage = RADEON_USAGE_READ;
+ }
+
+ res->coher_bo_size = size;
+ res->flags = COMPUTE_RES_TC_FLUSH | COMPUTE_RES_VC_FLUSH;
+}
+
+void evergreen_set_tex_resource(
+ struct r600_pipe_compute *pipe,
+ struct r600_pipe_sampler_view* view,
+ int id)
+{
+ struct evergreen_compute_resource* res =
+ get_empty_res(pipe, COMPUTE_RESOURCE_TEX, id);
+ struct r600_resource_texture *tmp =
+ (struct r600_resource_texture*)view->base.texture;
+
+ unsigned format, endian;
+ uint32_t word4 = 0, yuv_format = 0, pitch = 0;
+ unsigned char swizzle[4], array_mode = 0, tile_type = 0;
+ unsigned height, depth;
+
+ swizzle[0] = 0;
+ swizzle[1] = 1;
+ swizzle[2] = 2;
+ swizzle[3] = 3;
+
+ format = r600_translate_texformat((struct pipe_screen *)pipe->ctx->screen,
+ view->base.format, swizzle, &word4, &yuv_format);
+
+ if (format == ~0) {
+ format = 0;
+ }
+
+ endian = r600_colorformat_endian_swap(format);
+
+ height = view->base.texture->height0;
+ depth = view->base.texture->depth0;
+
+ pitch = align(tmp->pitch_in_blocks[0] *
+ util_format_get_blockwidth(tmp->real_format), 8);
+ array_mode = tmp->array_mode[0];
+ tile_type = tmp->tile_type;
+
+ assert(view->base.texture->target != PIPE_TEXTURE_1D_ARRAY);
+ assert(view->base.texture->target != PIPE_TEXTURE_2D_ARRAY);
+
+ evergreen_emit_raw_value(res, PKT3C(PKT3_SET_RESOURCE, 8, 0));
+ evergreen_emit_raw_value(res, (id+816)*32 >> 2); ///TODO: check this line
+ evergreen_emit_raw_value(res,
+ (S_030000_DIM(r600_tex_dim(view->base.texture->target)) |
+ S_030000_PITCH((pitch / 8) - 1) |
+ S_030000_NON_DISP_TILING_ORDER(tile_type) |
+ S_030000_TEX_WIDTH(view->base.texture->width0 - 1)));
+ evergreen_emit_raw_value(res, (S_030004_TEX_HEIGHT(height - 1) |
+ S_030004_TEX_DEPTH(depth - 1) |
+ S_030004_ARRAY_MODE(array_mode)));
+ evergreen_emit_raw_value(res, tmp->offset[0] >> 8);
+ evergreen_emit_raw_value(res, tmp->offset[0] >> 8);
+ evergreen_emit_raw_value(res, (word4 |
+ S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
+ S_030010_ENDIAN_SWAP(endian) |
+ S_030010_BASE_LEVEL(0)));
+ evergreen_emit_raw_value(res, (S_030014_LAST_LEVEL(0) |
+ S_030014_BASE_ARRAY(0) |
+ S_030014_LAST_ARRAY(0)));
+ evergreen_emit_raw_value(res, (S_030018_MAX_ANISO(4 /* max 16 samples */)));
+ evergreen_emit_raw_value(res,
+ S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE)
+ | S_03001C_DATA_FORMAT(format));
+
+ res->bo = (struct r600_resource*)view->base.texture;
+
+ res->usage = RADEON_USAGE_READ;
+
+ res->coher_bo_size = tmp->offset[0] + util_format_get_blockwidth(tmp->real_format)*view->base.texture->width0*height*depth;
+ res->flags = COMPUTE_RES_TC_FLUSH;
+
+ evergreen_emit_force_reloc(res);
+ evergreen_emit_force_reloc(res);
+}
+
+void evergreen_set_sampler_resource(
+ struct r600_pipe_compute *pipe,
+ struct compute_sampler_state *sampler,
+ int id)
+{
+ struct evergreen_compute_resource* res =
+ get_empty_res(pipe, COMPUTE_RESOURCE_SAMPLER, id);
+
+ unsigned aniso_flag_offset = sampler->state.max_anisotropy > 1 ? 2 : 0;
+
+ evergreen_emit_raw_value(res, PKT3C(PKT3_SET_SAMPLER, 3, 0));
+ evergreen_emit_raw_value(res, (id + 90)*3);
+ evergreen_emit_raw_value(res,
+ S_03C000_CLAMP_X(r600_tex_wrap(sampler->state.wrap_s)) |
+ S_03C000_CLAMP_Y(r600_tex_wrap(sampler->state.wrap_t)) |
+ S_03C000_CLAMP_Z(r600_tex_wrap(sampler->state.wrap_r)) |
+ S_03C000_XY_MAG_FILTER(r600_tex_filter(sampler->state.mag_img_filter) | aniso_flag_offset) |
+ S_03C000_XY_MIN_FILTER(r600_tex_filter(sampler->state.min_img_filter) | aniso_flag_offset) |
+ S_03C000_BORDER_COLOR_TYPE(V_03C000_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK)
+ );
+ evergreen_emit_raw_value(res,
+ S_03C004_MIN_LOD(S_FIXED(CLAMP(sampler->state.min_lod, 0, 15), 8)) |
+ S_03C004_MAX_LOD(S_FIXED(CLAMP(sampler->state.max_lod, 0, 15), 8))
+ );
+ evergreen_emit_raw_value(res,
+ S_03C008_LOD_BIAS(S_FIXED(CLAMP(sampler->state.lod_bias, -16, 16), 8)) |
+ (sampler->state.seamless_cube_map ? 0 : S_03C008_DISABLE_CUBE_WRAP(1)) |
+ S_03C008_TYPE(1)
+ );
+}
+
+void evergreen_set_const_cache(
+ struct r600_pipe_compute *pipe,
+ int cache_id,
+ struct r600_resource* cbo,
+ int size, int offset)
+{
+ #define SQ_ALU_CONST_BUFFER_SIZE_LS_0 0x00028fc0
+ #define SQ_ALU_CONST_CACHE_LS_0 0x00028f40
+
+ struct evergreen_compute_resource* res =
+ get_empty_res(pipe, COMPUTE_RESOURCE_CONST_MEM, cache_id);
+
+ assert(size < 0x200);
+ assert((offset & 0xFF) == 0);
+ assert(cache_id < 16);
+
+ evergreen_reg_set(res, SQ_ALU_CONST_BUFFER_SIZE_LS_0 + cache_id*4, size);
+ evergreen_reg_set(res, SQ_ALU_CONST_CACHE_LS_0 + cache_id*4, offset >> 8);
+ res->bo = cbo;
+ res->usage = RADEON_USAGE_READ;
+ res->coher_bo_size = size;
+ res->flags = COMPUTE_RES_SH_FLUSH;
+}
+
+struct r600_resource* r600_compute_buffer_alloc_vram(
+ struct r600_screen *screen,
+ unsigned size)
+{
+ assert(size);
+
+ struct pipe_resource * buffer = pipe_buffer_create(
+ (struct pipe_screen*) screen,
+ PIPE_BIND_CUSTOM,
+ PIPE_USAGE_IMMUTABLE,
+ size);
+
+ return (struct r600_resource *)buffer;
+}