summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/r600
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/r600')
-rw-r--r--src/gallium/drivers/r600/Makefile3
-rw-r--r--src/gallium/drivers/r600/SConscript1
-rw-r--r--src/gallium/drivers/r600/eg_asm.c49
-rw-r--r--src/gallium/drivers/r600/eg_state_inlines.h2
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c380
-rw-r--r--src/gallium/drivers/r600/r600.h14
-rw-r--r--src/gallium/drivers/r600/r600_asm.c1689
-rw-r--r--src/gallium/drivers/r600/r600_asm.h34
-rw-r--r--src/gallium/drivers/r600/r600_blit.c55
-rw-r--r--src/gallium/drivers/r600/r600_buffer.c144
-rw-r--r--src/gallium/drivers/r600/r600_opcodes.h4
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c55
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h61
-rw-r--r--src/gallium/drivers/r600/r600_resource.h65
-rw-r--r--src/gallium/drivers/r600/r600_shader.c564
-rw-r--r--src/gallium/drivers/r600/r600_shader.h4
-rw-r--r--src/gallium/drivers/r600/r600_sq.h6
-rw-r--r--src/gallium/drivers/r600/r600_state.c288
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c76
-rw-r--r--src/gallium/drivers/r600/r600_state_inlines.h1
-rw-r--r--src/gallium/drivers/r600/r600_texture.c245
-rw-r--r--src/gallium/drivers/r600/r600_translate.c60
-rw-r--r--src/gallium/drivers/r600/r600_upload.c114
-rw-r--r--src/gallium/drivers/r600/r700_asm.c11
24 files changed, 2648 insertions, 1277 deletions
diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile
index a484f38e9f1..b476b9af3b8 100644
--- a/src/gallium/drivers/r600/Makefile
+++ b/src/gallium/drivers/r600/Makefile
@@ -21,6 +21,7 @@ C_SOURCES = \
evergreen_state.c \
eg_asm.c \
r600_translate.c \
- r600_state_common.c
+ r600_state_common.c \
+ r600_upload.c
include ../../Makefile.template
diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript
index 3fc1fa94c27..64980140963 100644
--- a/src/gallium/drivers/r600/SConscript
+++ b/src/gallium/drivers/r600/SConscript
@@ -28,6 +28,7 @@ r600 = env.ConvenienceLibrary(
'r600_state_common.c',
'r600_texture.c',
'r600_translate.c',
+ 'r600_upload.c',
'r700_asm.c',
'evergreen_state.c',
'eg_asm.c',
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index 21d66fa9564..67d742b3760 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -27,6 +27,7 @@
#include "r600_asm.h"
#include "eg_sq.h"
#include "r600_opcodes.h"
+#include "evergreend.h"
int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
{
@@ -34,15 +35,17 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
switch (cf->inst) {
case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+ case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
- S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) |
- S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) |
- S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank);
+ S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) |
+ S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) |
+ S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank);
bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) |
- S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) |
- S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) |
- S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) |
+ S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) |
+ S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) |
+ S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) |
S_SQ_CF_ALU_WORD1_BARRIER(1) |
S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
break;
@@ -89,3 +92,37 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
}
return 0;
}
+
+void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
+{
+ struct r600_pipe_state *rstate;
+ unsigned i = 0;
+
+ if (count > 8) {
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(8 - 1);
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(count - 8 - 1);
+ } else {
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(count - 1);
+ }
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
+ S_SQ_CF_WORD1_BARRIER(1);
+
+ rstate = &ve->rstate;
+ rstate->id = R600_PIPE_STATE_FETCH_SHADER;
+ rstate->nregs = 0;
+ r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS,
+ (r600_bo_offset(ve->fetch_shader)) >> 8,
+ 0xFFFFFFFF, ve->fetch_shader);
+}
diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h
index 698299ec134..ecea1db4f15 100644
--- a/src/gallium/drivers/r600/eg_state_inlines.h
+++ b/src/gallium/drivers/r600/eg_state_inlines.h
@@ -290,6 +290,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
switch (format) {
/* 8-bit buffers. */
case PIPE_FORMAT_A8_UNORM:
+ return V_028C70_SWAP_ALT_REV;
case PIPE_FORMAT_I8_UNORM:
case PIPE_FORMAT_L8_UNORM:
case PIPE_FORMAT_R8_UNORM:
@@ -312,6 +313,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
return V_028C70_SWAP_STD;
case PIPE_FORMAT_L8A8_UNORM:
+ return V_028C70_SWAP_ALT;
case PIPE_FORMAT_R8G8_UNORM:
return V_028C70_SWAP_STD;
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 0509522d813..94eef77945b 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -410,9 +410,9 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4,
word4 | S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_NORM) |
S_030010_SRF_MODE_ALL(V_030010_SFR_MODE_NO_ZERO) |
- S_030010_BASE_LEVEL(state->first_level), 0xFFFFFFFF, NULL);
+ S_030010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5,
- S_030014_LAST_LEVEL(state->last_level) |
+ S_030014_LAST_LEVEL(state->u.tex.last_level) |
S_030014_BASE_ARRAY(0) |
S_030014_LAST_ARRAY(0), 0xffffffff, NULL);
r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, 0x0, 0xFFFFFFFF, NULL);
@@ -633,10 +633,11 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
struct r600_resource_texture *rtex;
struct r600_resource *rbuffer;
struct r600_surface *surf;
- unsigned level = state->cbufs[cb]->level;
+ unsigned level = state->cbufs[cb]->u.tex.level;
unsigned pitch, slice;
unsigned color_info;
unsigned format, swap, ntype;
+ unsigned offset;
const struct util_format_description *desc;
struct r600_bo *bo[3];
@@ -647,6 +648,9 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
bo[1] = rbuffer->bo;
bo[2] = rbuffer->bo;
+ /* XXX quite sure for dx10+ hw don't need any offset hacks */
+ offset = r600_texture_get_offset((struct r600_resource_texture *)state->cbufs[cb]->texture,
+ level, state->cbufs[cb]->u.tex.first_layer);
pitch = rtex->pitch_in_pixels[level] / 8 - 1;
slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
ntype = 0;
@@ -666,7 +670,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
/* FIXME handle enabling of CB beyond BASE8 which has different offset */
r600_pipe_state_add_reg(rstate,
R_028C60_CB_COLOR0_BASE + cb * 0x3C,
- (state->cbufs[cb]->offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
+ (offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
r600_pipe_state_add_reg(rstate,
R_028C78_CB_COLOR0_DIM + cb * 0x3C,
0x0, 0xFFFFFFFF, NULL);
@@ -698,11 +702,12 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state
struct r600_surface *surf;
unsigned level;
unsigned pitch, slice, format, stencil_format;
+ unsigned offset;
if (state->zsbuf == NULL)
return;
- level = state->zsbuf->level;
+ level = state->zsbuf->u.tex.level;
surf = (struct r600_surface *)state->zsbuf;
rtex = (struct r600_resource_texture*)state->zsbuf->texture;
@@ -712,24 +717,27 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state
rtex->depth = 1;
rbuffer = &rtex->resource;
+ /* XXX quite sure for dx10+ hw don't need any offset hacks */
+ offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture,
+ level, state->zsbuf->u.tex.first_layer);
pitch = rtex->pitch_in_pixels[level] / 8 - 1;
slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
format = r600_translate_dbformat(state->zsbuf->texture->format);
stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format);
r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE,
- (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ (offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE,
- (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ (offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
if (stencil_format) {
uint32_t stencil_offset;
stencil_offset = ((surf->aligned_height * rtex->pitch_in_bytes[level]) + 255) & ~255;
r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE,
- (state->zsbuf->offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ (offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE,
- (state->zsbuf->offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ (offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
}
r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL);
@@ -762,8 +770,6 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
util_copy_framebuffer_state(&rctx->framebuffer, state);
- rctx->pframebuffer = &rctx->framebuffer;
-
/* build states */
for (int i = 0; i < state->nr_cbufs; i++) {
evergreen_cb(rctx, rstate, state, i);
@@ -825,6 +831,10 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]);
rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate;
r600_context_pipe_state_set(&rctx->ctx, rstate);
+
+ if (state->zsbuf) {
+ evergreen_polygon_offset_update(rctx);
+ }
}
static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
@@ -832,6 +842,7 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader,
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_resource *rbuffer = (struct r600_resource*)buffer;
+ uint32_t offset;
/* Note that the state tracker can unbind constant buffers by
* passing NULL here.
@@ -840,6 +851,8 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader,
return;
}
+ r600_upload_const_buffer(rctx, buffer, &offset);
+
switch (shader) {
case PIPE_SHADER_VERTEX:
rctx->vs_const_buffer.nregs = 0;
@@ -849,7 +862,7 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader,
0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(&rctx->vs_const_buffer,
R_028980_ALU_CONST_CACHE_VS_0,
- (r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo);
r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
break;
case PIPE_SHADER_FRAGMENT:
@@ -860,7 +873,7 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader,
0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(&rctx->ps_const_buffer,
R_028940_ALU_CONST_CACHE_PS_0,
- (r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo);
r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);
break;
default:
@@ -1057,12 +1070,76 @@ void evergreen_init_config(struct r600_pipe_context *rctx)
num_hs_stack_entries = 42;
num_ls_stack_entries = 42;
break;
+ case CHIP_BARTS:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ num_ps_threads = 128;
+ num_vs_threads = 20;
+ num_gs_threads = 20;
+ num_es_threads = 20;
+ num_hs_threads = 20;
+ num_ls_threads = 20;
+ num_ps_stack_entries = 85;
+ num_vs_stack_entries = 85;
+ num_gs_stack_entries = 85;
+ num_es_stack_entries = 85;
+ num_hs_stack_entries = 85;
+ num_ls_stack_entries = 85;
+ break;
+ case CHIP_TURKS:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ num_ps_threads = 128;
+ num_vs_threads = 20;
+ num_gs_threads = 20;
+ num_es_threads = 20;
+ num_hs_threads = 20;
+ num_ls_threads = 20;
+ num_ps_stack_entries = 42;
+ num_vs_stack_entries = 42;
+ num_gs_stack_entries = 42;
+ num_es_stack_entries = 42;
+ num_hs_stack_entries = 42;
+ num_ls_stack_entries = 42;
+ break;
+ case CHIP_CAICOS:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ num_ps_threads = 128;
+ num_vs_threads = 10;
+ num_gs_threads = 10;
+ num_es_threads = 10;
+ num_hs_threads = 10;
+ num_ls_threads = 10;
+ num_ps_stack_entries = 42;
+ num_vs_stack_entries = 42;
+ num_gs_stack_entries = 42;
+ num_es_stack_entries = 42;
+ num_hs_stack_entries = 42;
+ num_ls_stack_entries = 42;
+ break;
}
tmp = 0x00000000;
switch (family) {
case CHIP_CEDAR:
case CHIP_PALM:
+ case CHIP_CAICOS:
break;
default:
tmp |= S_008C00_VC_ENABLE(1);
@@ -1194,29 +1271,101 @@ void evergreen_init_config(struct r600_pipe_context *rctx)
r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL);
-r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL,
- 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL);
r600_context_pipe_state_set(&rctx->ctx, rstate);
}
-int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
-void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
+void evergreen_polygon_offset_update(struct r600_pipe_context *rctx)
+{
+ struct r600_pipe_state state;
+
+ state.id = R600_PIPE_STATE_POLYGON_OFFSET;
+ state.nregs = 0;
+ if (rctx->rasterizer && rctx->framebuffer.zsbuf) {
+ float offset_units = rctx->rasterizer->offset_units;
+ unsigned offset_db_fmt_cntl = 0, depth;
+
+ switch (rctx->framebuffer.zsbuf->texture->format) {
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+ depth = -24;
+ offset_units *= 2.0f;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ depth = -23;
+ offset_units *= 1.0f;
+ offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
+ break;
+ case PIPE_FORMAT_Z16_UNORM:
+ depth = -16;
+ offset_units *= 4.0f;
+ break;
+ default:
+ return;
+ }
+ /* FIXME some of those reg can be computed with cso */
+ offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(depth);
+ r600_pipe_state_add_reg(&state,
+ R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
+ fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&state,
+ R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET,
+ fui(offset_units), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&state,
+ R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
+ fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&state,
+ R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET,
+ fui(offset_units), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&state,
+ R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
+ offset_db_fmt_cntl, 0xFFFFFFFF, NULL);
+ r600_context_pipe_state_set(&rctx->ctx, &state);
+ }
+}
+
+static void evergreen_spi_update(struct r600_pipe_context *rctx)
+{
+ struct r600_pipe_shader *shader = rctx->ps_shader;
+ struct r600_pipe_state rstate;
+ struct r600_shader *rshader = &shader->shader;
+ unsigned i, tmp;
+
+ rstate.nregs = 0;
+ for (i = 0; i < rshader->ninput; i++) {
+ tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
+ if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
+ rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
+ rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
+ tmp |= S_028644_FLAT_SHADE(rctx->flatshade);
+ }
+ if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
+ rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
+ tmp |= S_028644_PT_SPRITE_TEX(1);
+ }
+ r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
+ }
+ r600_context_pipe_state_set(&rctx->ctx, &rstate);
+}
+
+void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx)
{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_state *rstate;
struct r600_resource *rbuffer;
- unsigned i, j, offset, prim;
- u32 vgt_dma_index_type, vgt_draw_initiator, mask;
struct pipe_vertex_buffer *vertex_buffer;
- struct r600_draw rdraw;
- struct r600_pipe_state vgt;
- struct r600_drawl draw;
- boolean translate = FALSE;
+ unsigned i, offset;
+
+ /* we don't update until we know vertex elements */
+ if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer)
+ return;
if (rctx->vertex_elements->incompatible_layout) {
+ /* translate rebind new vertex elements so
+ * return once translated
+ */
r600_begin_vertex_translate(rctx);
- translate = TRUE;
+ return;
}
if (rctx->any_user_vbs) {
@@ -1224,6 +1373,72 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
rctx->any_user_vbs = FALSE;
}
+ if (rctx->vertex_elements->vbuffer_need_offset) {
+ /* one resource per vertex elements */
+ rctx->nvs_resource = rctx->vertex_elements->count;
+ } else {
+ /* bind vertex buffer once */
+ rctx->nvs_resource = rctx->nvertex_buffer;
+ }
+
+ for (i = 0 ; i < rctx->nvs_resource; i++) {
+ rstate = &rctx->vs_resource[i];
+ rstate->id = R600_PIPE_STATE_RESOURCE;
+ rstate->nregs = 0;
+
+ if (rctx->vertex_elements->vbuffer_need_offset) {
+ /* one resource per vertex elements */
+ unsigned vbuffer_index;
+ vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index;
+ vertex_buffer = &rctx->vertex_buffer[vbuffer_index];
+ rbuffer = (struct r600_resource*)vertex_buffer->buffer;
+ offset = rctx->vertex_elements->vbuffer_offset[i];
+ } else {
+ /* bind vertex buffer once */
+ vertex_buffer = &rctx->vertex_buffer[i];
+ rbuffer = (struct r600_resource*)vertex_buffer->buffer;
+ offset = 0;
+ }
+ if (vertex_buffer == NULL || rbuffer == NULL)
+ continue;
+ offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo);
+
+ r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0,
+ offset, 0xFFFFFFFF, rbuffer->bo);
+ r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1,
+ rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2,
+ S_030008_STRIDE(vertex_buffer->stride),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3,
+ S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) |
+ S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) |
+ S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) |
+ S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7,
+ 0xC0000000, 0xFFFFFFFF, NULL);
+ evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
+ }
+}
+
+int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
+void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_resource *rbuffer;
+ u32 vgt_dma_index_type, vgt_draw_initiator, mask;
+ struct r600_draw rdraw;
+ struct r600_pipe_state vgt;
+ struct r600_drawl draw;
+ unsigned prim;
+
memset(&draw, 0, sizeof(struct r600_drawl));
draw.ctx = ctx;
draw.mode = info->mode;
@@ -1272,48 +1487,23 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
}
if (r600_conv_pipe_prim(draw.mode, &prim))
return;
-
- /* rebuild vertex shader if input format changed */
- if (r600_pipe_shader_update(&rctx->context, rctx->vs_shader))
+ if (unlikely(rctx->ps_shader == NULL)) {
+ R600_ERR("missing vertex shader\n");
return;
- if (r600_pipe_shader_update(&rctx->context, rctx->ps_shader))
+ }
+ if (unlikely(rctx->vs_shader == NULL)) {
+ R600_ERR("missing vertex shader\n");
+ return;
+ }
+ /* there should be enough input */
+ if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) {
+ R600_ERR("%d resources provided, expecting %d\n",
+ rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource);
return;
-
- for (i = 0 ; i < rctx->vertex_elements->count; i++) {
- uint32_t word3, word2;
- uint32_t format;
- rstate = &rctx->vs_resource[i];
-
- rstate->id = R600_PIPE_STATE_RESOURCE;
- rstate->nregs = 0;
-
- j = rctx->vertex_elements->elements[i].vertex_buffer_index;
- vertex_buffer = &rctx->vertex_buffer[j];
- rbuffer = (struct r600_resource*)vertex_buffer->buffer;
- offset = rctx->vertex_elements->elements[i].src_offset +
- vertex_buffer->buffer_offset +
- r600_bo_offset(rbuffer->bo);
-
- format = r600_translate_vertex_data_type(rctx->vertex_elements->hw_format[i]);
-
- word2 = format | S_030008_STRIDE(vertex_buffer->stride);
-
- word3 = S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) |
- S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) |
- S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) |
- S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W);
-
- r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo);
- r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, word2, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, word3, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, 0xC0000000, 0xFFFFFFFF, NULL);
- evergreen_fs_resource_set(&rctx->ctx, rstate, i);
}
+ evergreen_spi_update(rctx);
+
mask = 0;
for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
mask |= (0xF << (i * 4));
@@ -1328,46 +1518,6 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.min_index, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL);
-
- if (rctx->rasterizer && rctx->framebuffer.zsbuf) {
- float offset_units = rctx->rasterizer->offset_units;
- unsigned offset_db_fmt_cntl = 0, depth;
-
- switch (rctx->framebuffer.zsbuf->texture->format) {
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- depth = -24;
- offset_units *= 2.0f;
- break;
- case PIPE_FORMAT_Z32_FLOAT:
- depth = -23;
- offset_units *= 1.0f;
- offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
- break;
- case PIPE_FORMAT_Z16_UNORM:
- depth = -16;
- offset_units *= 4.0f;
- break;
- default:
- return;
- }
- offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(depth);
- r600_pipe_state_add_reg(&vgt,
- R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
- fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt,
- R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET,
- fui(offset_units), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt,
- R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
- fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt,
- R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET,
- fui(offset_units), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt,
- R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
- offset_db_fmt_cntl, 0xFFFFFFFF, NULL);
- }
r600_context_pipe_state_set(&rctx->ctx, &vgt);
rdraw.vgt_num_indices = draw.count;
@@ -1382,28 +1532,22 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
}
evergreen_context_draw(&rctx->ctx, &rdraw);
- if (translate)
- r600_end_vertex_translate(rctx);
-
pipe_resource_reference(&draw.index_buffer, NULL);
}
void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_state *rstate = &shader->rstate;
struct r600_shader *rshader = &shader->shader;
- unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
+ unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
int pos_index = -1, face_index = -1;
int ninterp = 0;
boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE;
unsigned spi_baryc_cntl;
- /* clear previous register */
rstate->nregs = 0;
for (i = 0; i < rshader->ninput; i++) {
- tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
/* evergreen NUM_INTERP only contains values interpolated into the LDS,
POSITION goes via GPRs from the SC so isn't counted */
if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
@@ -1421,16 +1565,6 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
if (rshader->input[i].centroid)
have_centroid = TRUE;
}
- if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
- tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
- }
- if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
- rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
- tmp |= S_028644_PT_SPRITE_TEX(1);
- }
- r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
}
for (i = 0; i < rshader->noutput; i++) {
if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
@@ -1569,14 +1703,8 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader
R_028864_SQ_PGM_RESOURCES_2_VS,
0x0, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate,
- R_0288A8_SQ_PGM_RESOURCES_FS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
R_02885C_SQ_PGM_START_VS,
(r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo);
- r600_pipe_state_add_reg(rstate,
- R_0288A4_SQ_PGM_START_FS,
- (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo_fetch);
r600_pipe_state_add_reg(rstate,
R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 2ab60f3086a..a852bef6156 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -35,7 +35,7 @@
#define RADEON_CTX_MAX_PM4 (64 * 1024 / 4)
#define R600_ERR(fmt, args...) \
- fprintf(stderr, "EE %s/%s:%d - "fmt, __FILE__, __func__, __LINE__, ##args)
+ fprintf(stderr, "EE %s:%d %s - "fmt, __FILE__, __LINE__, __func__, ##args)
typedef uint64_t u64;
typedef uint32_t u32;
@@ -92,6 +92,9 @@ enum radeon_family {
CHIP_CYPRESS,
CHIP_HEMLOCK,
CHIP_PALM,
+ CHIP_BARTS,
+ CHIP_TURKS,
+ CHIP_CAICOS,
CHIP_LAST,
};
@@ -245,10 +248,7 @@ struct r600_context {
u32 *pm4;
struct list_head query_list;
unsigned num_query_running;
- unsigned fence;
struct list_head fenced_bo;
- unsigned *cfence;
- struct r600_bo *fence_bo;
};
struct r600_draw {
@@ -284,14 +284,12 @@ void r600_context_queries_resume(struct r600_context *ctx);
int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon);
void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw);
-void evergreen_ps_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
-void evergreen_vs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
-void evergreen_fs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
-
void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id);
void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id);
+struct radeon *radeon_decref(struct radeon *radeon);
+
#endif
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index edadedff25f..beefd17ae2f 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -22,59 +22,122 @@
*/
#include <stdio.h>
#include <errno.h>
+#include "util/u_format.h"
#include "util/u_memory.h"
#include "pipe/p_shader_tokens.h"
#include "r600_pipe.h"
#include "r600_sq.h"
#include "r600_opcodes.h"
#include "r600_asm.h"
+#include "r600_formats.h"
+#include "r600d.h"
-static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu)
+#define NUM_OF_CYCLES 3
+#define NUM_OF_COMPONENTS 4
+
+static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r600_bc_alu *alu)
{
if(alu->is_op3)
return 3;
- switch (alu->inst) {
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP:
- return 0;
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE:
- return 2;
-
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
- return 1;
- default: R600_ERR(
- "Need instruction operand number for 0x%x.\n", alu->inst);
- };
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ case CHIPREV_R700:
+ switch (alu->inst) {
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP:
+ return 0;
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE:
+ return 2;
+
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
+ return 1;
+ default: R600_ERR(
+ "Need instruction operand number for 0x%x.\n", alu->inst);
+ }
+ break;
+ case CHIPREV_EVERGREEN:
+ switch (alu->inst) {
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP:
+ return 0;
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW:
+ return 2;
+
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
+ return 1;
+ default: R600_ERR(
+ "Need instruction operand number for 0x%x.\n", alu->inst);
+ }
+ break;
+ }
return 3;
}
@@ -101,7 +164,6 @@ static struct r600_bc_alu *r600_bc_alu(void)
if (alu == NULL)
return NULL;
LIST_INITHEAD(&alu->list);
- LIST_INITHEAD(&alu->bs_list);
return alu;
}
@@ -152,6 +214,9 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family)
case CHIP_CYPRESS:
case CHIP_HEMLOCK:
case CHIP_PALM:
+ case CHIP_BARTS:
+ case CHIP_TURKS:
+ case CHIP_CAICOS:
bc->chiprev = CHIPREV_EVERGREEN;
break;
default:
@@ -189,221 +254,813 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
return 0;
}
-const unsigned bank_swizzle_vec[8] = {SQ_ALU_VEC_210, //000
- SQ_ALU_VEC_120, //001
- SQ_ALU_VEC_102, //010
+/* alu instructions that can ony exits once per group */
+static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ case CHIPREV_R700:
+ return !alu->is_op3 && (
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT);
+ case CHIPREV_EVERGREEN:
+ default:
+ return !alu->is_op3 && (
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT);
+ }
+}
- SQ_ALU_VEC_201, //011
- SQ_ALU_VEC_012, //100
- SQ_ALU_VEC_021, //101
+static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ case CHIPREV_R700:
+ return !alu->is_op3 && (
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4);
+ case CHIPREV_EVERGREEN:
+ default:
+ return !alu->is_op3 && (
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4);
+ }
+}
- SQ_ALU_VEC_012, //110
- SQ_ALU_VEC_012}; //111
+static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ case CHIPREV_R700:
+ return !alu->is_op3 && (
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
+ case CHIPREV_EVERGREEN:
+ default:
+ return !alu->is_op3 && (
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
+ }
+}
-const unsigned bank_swizzle_scl[8] = {SQ_ALU_SCL_210, //000
- SQ_ALU_SCL_122, //001
- SQ_ALU_SCL_122, //010
+/* alu instructions that can only execute on the vector unit */
+static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ return is_alu_reduction_inst(bc, alu) ||
+ is_alu_mova_inst(bc, alu);
+}
- SQ_ALU_SCL_221, //011
- SQ_ALU_SCL_212, //100
- SQ_ALU_SCL_122, //101
+/* alu instructions that can only execute on the trans unit */
+static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ case CHIPREV_R700:
+ if (!alu->is_op3)
+ return alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE;
+ else
+ return alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT ||
+ alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_D2 ||
+ alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2 ||
+ alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4;
+ case CHIPREV_EVERGREEN:
+ default:
+ if (!alu->is_op3)
+ return alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE;
+ else
+ return alu->inst == EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
+ }
+}
+
+/* alu instructions that can execute on any unit */
+static int is_alu_any_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ return !is_alu_vec_unit_inst(bc, alu) &&
+ !is_alu_trans_unit_inst(bc, alu);
+}
+
+static int assign_alu_units(struct r600_bc *bc, struct r600_bc_alu *alu_first,
+ struct r600_bc_alu *assignment[5])
+{
+ struct r600_bc_alu *alu;
+ unsigned i, chan, trans;
+
+ for (i = 0; i < 5; i++)
+ assignment[i] = NULL;
+
+ for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) {
+ chan = alu->dst.chan;
+ if (is_alu_trans_unit_inst(bc, alu))
+ trans = 1;
+ else if (is_alu_vec_unit_inst(bc, alu))
+ trans = 0;
+ else if (assignment[chan])
+ trans = 1; // assume ALU_INST_PREFER_VECTOR
+ else
+ trans = 0;
- SQ_ALU_SCL_122, //110
- SQ_ALU_SCL_122}; //111
+ if (trans) {
+ if (assignment[4]) {
+ assert(0); //ALU.Trans has already been allocated
+ return -1;
+ }
+ assignment[4] = alu;
+ } else {
+ if (assignment[chan]) {
+ assert(0); //ALU.chan has already been allocated
+ return -1;
+ }
+ assignment[chan] = alu;
+ }
-static int init_gpr(struct r600_bc_alu *alu)
+ if (alu->last)
+ break;
+ }
+ return 0;
+}
+
+struct alu_bank_swizzle {
+ int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS];
+ int hw_cfile_addr[4];
+ int hw_cfile_elem[4];
+};
+
+const unsigned cycle_for_bank_swizzle_vec[][3] = {
+ [SQ_ALU_VEC_012] = { 0, 1, 2 },
+ [SQ_ALU_VEC_021] = { 0, 2, 1 },
+ [SQ_ALU_VEC_120] = { 1, 2, 0 },
+ [SQ_ALU_VEC_102] = { 1, 0, 2 },
+ [SQ_ALU_VEC_201] = { 2, 0, 1 },
+ [SQ_ALU_VEC_210] = { 2, 1, 0 }
+};
+
+const unsigned cycle_for_bank_swizzle_scl[][3] = {
+ [SQ_ALU_SCL_210] = { 2, 1, 0 },
+ [SQ_ALU_SCL_122] = { 1, 2, 2 },
+ [SQ_ALU_SCL_212] = { 2, 1, 2 },
+ [SQ_ALU_SCL_221] = { 2, 2, 1 }
+};
+
+static void init_bank_swizzle(struct alu_bank_swizzle *bs)
{
- int cycle, component;
+ int i, cycle, component;
/* set up gpr use */
for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++)
for (component = 0; component < NUM_OF_COMPONENTS; component++)
- alu->hw_gpr[cycle][component] = -1;
- return 0;
+ bs->hw_gpr[cycle][component] = -1;
+ for (i = 0; i < 4; i++)
+ bs->hw_cfile_addr[i] = -1;
+ for (i = 0; i < 4; i++)
+ bs->hw_cfile_elem[i] = -1;
}
-
-#if 0
-static int reserve_gpr(struct r600_bc_alu *alu, unsigned sel, unsigned chan, unsigned cycle)
+
+static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, unsigned cycle)
{
- if (alu->hw_gpr[cycle][chan] < 0)
- alu->hw_gpr[cycle][chan] = sel;
- else if (alu->hw_gpr[cycle][chan] != (int)sel) {
- R600_ERR("Another scalar operation has already used GPR read port for channel\n");
+ if (bs->hw_gpr[cycle][chan] == -1)
+ bs->hw_gpr[cycle][chan] = sel;
+ else if (bs->hw_gpr[cycle][chan] != (int)sel) {
+ // Another scalar operation has already used GPR read port for channel
return -1;
}
return 0;
}
-
-static int cycle_for_scalar_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle)
-{
- int table[3];
- int ret = 0;
- switch (swiz) {
- case SQ_ALU_SCL_210:
- table[0] = 2; table[1] = 1; table[2] = 0;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_SCL_122:
- table[0] = 1; table[1] = 2; table[2] = 2;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_SCL_212:
- table[0] = 2; table[1] = 1; table[2] = 2;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_SCL_221:
- table[0] = 2; table[1] = 2; table[2] = 1;
- *p_cycle = table[sel];
- break;
- break;
- default:
- R600_ERR("bad scalar bank swizzle value\n");
- ret = -1;
- break;
+
+static int reserve_cfile(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan)
+{
+ int res, resmatch = -1, resempty = -1;
+ for (res = 3; res >= 0; --res) {
+ if (bs->hw_cfile_addr[res] == -1)
+ resempty = res;
+ else if (bs->hw_cfile_addr[res] == sel &&
+ bs->hw_cfile_elem[res] == chan)
+ resmatch = res;
}
- return ret;
-}
-
-static int cycle_for_vector_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle)
-{
- int table[3];
- int ret;
-
- switch (swiz) {
- case SQ_ALU_VEC_012:
- table[0] = 0; table[1] = 1; table[2] = 2;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_VEC_021:
- table[0] = 0; table[1] = 2; table[2] = 1;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_VEC_120:
- table[0] = 1; table[1] = 2; table[2] = 0;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_VEC_102:
- table[0] = 1; table[1] = 0; table[2] = 2;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_VEC_201:
- table[0] = 2; table[1] = 0; table[2] = 1;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_VEC_210:
- table[0] = 2; table[1] = 1; table[2] = 0;
- *p_cycle = table[sel];
- break;
- default:
- R600_ERR("bad vector bank swizzle value\n");
- ret = -1;
- break;
+ if (resmatch != -1)
+ return 0; // Read for this scalar element already reserved, nothing to do here.
+ else if (resempty != -1) {
+ bs->hw_cfile_addr[resempty] = sel;
+ bs->hw_cfile_elem[resempty] = chan;
+ } else {
+ // All cfile read ports are used, cannot reference vector element
+ return -1;
}
- return ret;
+ return 0;
+}
+
+static int is_gpr(unsigned sel)
+{
+ return (sel >= 0 && sel <= 127);
}
+/* CB constants start at 512, and get translated to a kcache index when ALU
+ * clauses are constructed. Note that we handle kcache constants the same way
+ * as (the now gone) cfile constants, is that really required? */
+static int is_cfile(unsigned sel)
+{
+ return (sel > 255 && sel < 512) ||
+ (sel > 511 && sel < 4607) || // Kcache before translate
+ (sel > 127 && sel < 192); // Kcache after translate
+}
+
+static int is_const(int sel)
+{
+ return is_cfile(sel) ||
+ (sel >= V_SQ_ALU_SRC_0 &&
+ sel <= V_SQ_ALU_SRC_LITERAL);
+}
+
+static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu,
+ struct alu_bank_swizzle *bs, int bank_swizzle)
+{
+ int r, src, num_src, sel, elem, cycle;
+
+ num_src = r600_bc_get_num_operands(bc, alu);
+ for (src = 0; src < num_src; src++) {
+ sel = alu->src[src].sel;
+ elem = alu->src[src].chan;
+ if (is_gpr(sel)) {
+ cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src];
+ if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan)
+ // Nothing to do; special-case optimization,
+ // second source uses first source’s reservation
+ continue;
+ else {
+ r = reserve_gpr(bs, sel, elem, cycle);
+ if (r)
+ return r;
+ }
+ } else if (is_cfile(sel)) {
+ r = reserve_cfile(bs, sel, elem);
+ if (r)
+ return r;
+ }
+ // No restrictions on PV, PS, literal or special constants
+ }
+ return 0;
+}
+
+static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu,
+ struct alu_bank_swizzle *bs, int bank_swizzle)
+{
+ int r, src, num_src, const_count, sel, elem, cycle;
+
+ num_src = r600_bc_get_num_operands(bc, alu);
+ for (const_count = 0, src = 0; src < num_src; ++src) {
+ sel = alu->src[src].sel;
+ elem = alu->src[src].chan;
+ if (is_const(sel)) { // Any constant, including literal and inline constants
+ if (const_count >= 2)
+ // More than two references to a constant in
+ // transcendental operation.
+ return -1;
+ else
+ const_count++;
+ }
+ if (is_cfile(sel)) {
+ r = reserve_cfile(bs, sel, elem);
+ if (r)
+ return r;
+ }
+ }
+ for (src = 0; src < num_src; ++src) {
+ sel = alu->src[src].sel;
+ elem = alu->src[src].chan;
+ if (is_gpr(sel)) {
+ cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src];
+ if (cycle < const_count)
+ // Cycle for GPR load conflicts with
+ // constant load in transcendental operation.
+ return -1;
+ r = reserve_gpr(bs, sel, elem, cycle);
+ if (r)
+ return r;
+ }
+ // Constants already processed
+ // No restrictions on PV, PS
+ }
+ return 0;
+}
-
-static void update_chan_counter(struct r600_bc_alu *alu, int *chan_counter)
+static int check_and_set_bank_swizzle(struct r600_bc *bc,
+ struct r600_bc_alu *slots[5])
{
- int num_src;
- int i;
- int channel_swizzle;
+ struct alu_bank_swizzle bs;
+ int bank_swizzle[5];
+ int i, r = 0, forced = 0;
+
+ for (i = 0; i < 5; i++)
+ if (slots[i] && slots[i]->bank_swizzle_force) {
+ slots[i]->bank_swizzle = slots[i]->bank_swizzle_force;
+ forced = 1;
+ }
- num_src = r600_bc_get_num_operands(alu);
+ if (forced)
+ return 0;
- for (i = 0; i < num_src; i++) {
- channel_swizzle = alu->src[i].chan;
- if ((alu->src[i].sel > 0 && alu->src[i].sel < 128) && channel_swizzle <= 3)
- chan_counter[channel_swizzle]++;
+ // just check every possible combination of bank swizzle
+ // not very efficent, but works on the first try in most of the cases
+ for (i = 0; i < 4; i++)
+ bank_swizzle[i] = SQ_ALU_VEC_012;
+ bank_swizzle[4] = SQ_ALU_SCL_210;
+ while(bank_swizzle[4] <= SQ_ALU_SCL_221) {
+ init_bank_swizzle(&bs);
+ for (i = 0; i < 4; i++) {
+ if (slots[i]) {
+ r = check_vector(bc, slots[i], &bs, bank_swizzle[i]);
+ if (r)
+ break;
+ }
+ }
+ if (!r && slots[4]) {
+ r = check_scalar(bc, slots[4], &bs, bank_swizzle[4]);
+ }
+ if (!r) {
+ for (i = 0; i < 5; i++) {
+ if (slots[i])
+ slots[i]->bank_swizzle = bank_swizzle[i];
+ }
+ return 0;
+ }
+
+ for (i = 0; i < 5; i++) {
+ bank_swizzle[i]++;
+ if (bank_swizzle[i] <= SQ_ALU_VEC_210)
+ break;
+ else
+ bank_swizzle[i] = SQ_ALU_VEC_012;
+ }
}
+
+ // couldn't find a working swizzle
+ return -1;
}
-/* we need something like this I think - but this is bogus */
-int check_read_slots(struct r600_bc *bc, struct r600_bc_alu *alu_first)
+static int replace_gpr_with_pv_ps(struct r600_bc *bc,
+ struct r600_bc_alu *slots[5], struct r600_bc_alu *alu_prev)
{
- struct r600_bc_alu *alu;
- int chan_counter[4] = { 0 };
+ struct r600_bc_alu *prev[5];
+ int gpr[5], chan[5];
+ int i, j, r, src, num_src;
- update_chan_counter(alu_first, chan_counter);
+ r = assign_alu_units(bc, alu_prev, prev);
+ if (r)
+ return r;
- LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) {
- update_chan_counter(alu, chan_counter);
+ for (i = 0; i < 5; ++i) {
+ if(prev[i] && prev[i]->dst.write && !prev[i]->dst.rel) {
+ gpr[i] = prev[i]->dst.sel;
+ if (is_alu_reduction_inst(bc, prev[i]))
+ chan[i] = 0;
+ else
+ chan[i] = prev[i]->dst.chan;
+ } else
+ gpr[i] = -1;
}
- if (chan_counter[0] > 3 ||
- chan_counter[1] > 3 ||
- chan_counter[2] > 3 ||
- chan_counter[3] > 3) {
- R600_ERR("needed to split instruction for input ran out of banks %x %d %d %d %d\n",
- alu_first->inst, chan_counter[0], chan_counter[1], chan_counter[2], chan_counter[3]);
- return -1;
+ for (i = 0; i < 5; ++i) {
+ struct r600_bc_alu *alu = slots[i];
+ if(!alu)
+ continue;
+
+ num_src = r600_bc_get_num_operands(bc, alu);
+ for (src = 0; src < num_src; ++src) {
+ if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
+ continue;
+
+ if (alu->src[src].sel == gpr[4] &&
+ alu->src[src].chan == chan[4]) {
+ alu->src[src].sel = V_SQ_ALU_SRC_PS;
+ alu->src[src].chan = 0;
+ continue;
+ }
+
+ for (j = 0; j < 4; ++j) {
+ if (alu->src[src].sel == gpr[j] &&
+ alu->src[src].chan == j) {
+ alu->src[src].sel = V_SQ_ALU_SRC_PV;
+ alu->src[src].chan = chan[j];
+ break;
+ }
+ }
+ }
}
+
return 0;
}
-#endif
-static int is_const(int sel)
+void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg)
{
- if (sel > 255 && sel < 512)
- return 1;
- if (sel >= V_SQ_ALU_SRC_0 && sel <= V_SQ_ALU_SRC_LITERAL)
- return 1;
- return 0;
+ switch(value) {
+ case 0:
+ *sel = V_SQ_ALU_SRC_0;
+ break;
+ case 1:
+ *sel = V_SQ_ALU_SRC_1_INT;
+ break;
+ case -1:
+ *sel = V_SQ_ALU_SRC_M_1_INT;
+ break;
+ case 0x3F800000: // 1.0f
+ *sel = V_SQ_ALU_SRC_1;
+ break;
+ case 0x3F000000: // 0.5f
+ *sel = V_SQ_ALU_SRC_0_5;
+ break;
+ case 0xBF800000: // -1.0f
+ *sel = V_SQ_ALU_SRC_1;
+ *neg ^= 1;
+ break;
+ case 0xBF000000: // -0.5f
+ *sel = V_SQ_ALU_SRC_0_5;
+ *neg ^= 1;
+ break;
+ default:
+ *sel = V_SQ_ALU_SRC_LITERAL;
+ break;
+ }
}
-static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu)
+/* compute how many literal are needed */
+static int r600_bc_alu_nliterals(struct r600_bc *bc, struct r600_bc_alu *alu,
+ uint32_t literal[4], unsigned *nliteral)
{
- unsigned swizzle_key;
-
- if (alu->bank_swizzle_force) {
- alu->bank_swizzle = alu->bank_swizzle_force;
- return 0;
+ unsigned num_src = r600_bc_get_num_operands(bc, alu);
+ unsigned i, j;
+
+ for (i = 0; i < num_src; ++i) {
+ if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
+ uint32_t value = alu->src[i].value[alu->src[i].chan];
+ unsigned found = 0;
+ for (j = 0; j < *nliteral; ++j) {
+ if (literal[j] == value) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ if (*nliteral >= 4)
+ return -EINVAL;
+ literal[(*nliteral)++] = value;
+ }
+ }
}
- swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) +
- (is_const(alu->src[1].sel) ? 2 : 0 ) +
- (is_const(alu->src[2].sel) ? 1 : 0 );
-
- alu->bank_swizzle = bank_swizzle_scl[swizzle_key];
return 0;
}
-static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu)
+static void r600_bc_alu_adjust_literals(struct r600_bc *bc,
+ struct r600_bc_alu *alu,
+ uint32_t literal[4], unsigned nliteral)
+{
+ unsigned num_src = r600_bc_get_num_operands(bc, alu);
+ unsigned i, j;
+
+ for (i = 0; i < num_src; ++i) {
+ if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
+ uint32_t value = alu->src[i].value[alu->src[i].chan];
+ for (j = 0; j < nliteral; ++j) {
+ if (literal[j] == value) {
+ alu->src[i].chan = j;
+ break;
+ }
+ }
+ }
+ }
+}
+
+static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5],
+ struct r600_bc_alu *alu_prev)
{
- unsigned swizzle_key;
+ struct r600_bc_alu *prev[5];
+ struct r600_bc_alu *result[5] = { NULL };
+
+ uint32_t literal[4];
+ unsigned nliteral = 0;
+
+ int i, j, r, src, num_src;
+ int num_once_inst = 0;
+
+ r = assign_alu_units(bc, alu_prev, prev);
+ if (r)
+ return r;
- if (alu->bank_swizzle_force) {
- alu->bank_swizzle = alu->bank_swizzle_force;
+ for (i = 0; i < 5; ++i) {
+ struct r600_bc_alu *alu;
+
+ /* check number of literals */
+ if (prev[i] && r600_bc_alu_nliterals(bc, prev[i], literal, &nliteral))
+ return 0;
+ if (slots[i] && r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral))
+ return 0;
+
+ // let's check used slots
+ if (prev[i] && !slots[i]) {
+ result[i] = prev[i];
+ num_once_inst += is_alu_once_inst(bc, prev[i]);
+ continue;
+ } else if (prev[i] && slots[i]) {
+ if (result[4] == NULL && prev[4] == NULL && slots[4] == NULL) {
+ // trans unit is still free try to use it
+ if (is_alu_any_unit_inst(bc, slots[i])) {
+ result[i] = prev[i];
+ result[4] = slots[i];
+ } else if (is_alu_any_unit_inst(bc, prev[i])) {
+ result[i] = slots[i];
+ result[4] = prev[i];
+ } else
+ return 0;
+ } else
+ return 0;
+ } else if(!slots[i]) {
+ continue;
+ } else
+ result[i] = slots[i];
+
+ // let's check source gprs
+ alu = slots[i];
+ num_once_inst += is_alu_once_inst(bc, alu);
+
+ num_src = r600_bc_get_num_operands(bc, alu);
+ for (src = 0; src < num_src; ++src) {
+ // constants doesn't matter
+ if (!is_gpr(alu->src[src].sel))
+ continue;
+
+ for (j = 0; j < 5; ++j) {
+ if (!prev[j] || !prev[j]->dst.write)
+ continue;
+
+ // if it's relative then we can't determin which gpr is really used
+ if (prev[j]->dst.chan == alu->src[src].chan &&
+ (prev[j]->dst.sel == alu->src[src].sel ||
+ prev[j]->dst.rel || alu->src[src].rel))
+ return 0;
+ }
+ }
+ }
+
+ /* more than one PRED_ or KILL_ ? */
+ if (num_once_inst > 1)
return 0;
+
+ /* check if the result can still be swizzlet */
+ r = check_and_set_bank_swizzle(bc, result);
+ if (r)
+ return 0;
+
+ /* looks like everything worked out right, apply the changes */
+
+ /* sort instructions */
+ for (i = 0; i < 5; ++i) {
+ slots[i] = result[i];
+ if (result[i]) {
+ LIST_DEL(&result[i]->list);
+ result[i]->last = 0;
+ LIST_ADDTAIL(&result[i]->list, &bc->cf_last->alu);
+ }
+ }
+
+ /* determine new last instruction */
+ LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list)->last = 1;
+
+ /* determine new first instruction */
+ for (i = 0; i < 5; ++i) {
+ if (result[i]) {
+ bc->cf_last->curr_bs_head = result[i];
+ break;
+ }
}
- swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) +
- (is_const(alu->src[1].sel) ? 2 : 0 ) +
- (is_const(alu->src[2].sel) ? 1 : 0 );
- alu->bank_swizzle = bank_swizzle_vec[swizzle_key];
+ bc->cf_last->prev_bs_head = bc->cf_last->prev2_bs_head;
+ bc->cf_last->prev2_bs_head = NULL;
+
return 0;
}
-static int check_and_set_bank_swizzle(struct r600_bc *bc, struct r600_bc_alu *alu_first)
+/* This code handles kcache lines as single blocks of 32 constants. We could
+ * probably do slightly better by recognizing that we actually have two
+ * consecutive lines of 16 constants, but the resulting code would also be
+ * somewhat more complicated. */
+static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *alu, int type)
{
- struct r600_bc_alu *alu = NULL;
- int num_instr = 1;
+ struct r600_bc_kcache *kcache = bc->cf_last->kcache;
+ unsigned int required_lines;
+ unsigned int free_lines = 0;
+ unsigned int cache_line[3];
+ unsigned int count = 0;
+ unsigned int i, j;
+ int r;
+
+ /* Collect required cache lines. */
+ for (i = 0; i < 3; ++i) {
+ bool found = false;
+ unsigned int line;
+
+ if (alu->src[i].sel < 512)
+ continue;
+
+ line = ((alu->src[i].sel - 512) / 32) * 2;
+
+ for (j = 0; j < count; ++j) {
+ if (cache_line[j] == line) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ cache_line[count++] = line;
+ }
- init_gpr(alu_first);
+ /* This should never actually happen. */
+ if (count >= 3) return -ENOMEM;
- LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) {
- num_instr++;
+ for (i = 0; i < 2; ++i) {
+ if (kcache[i].mode == V_SQ_CF_KCACHE_NOP) {
+ ++free_lines;
+ }
}
- if (num_instr == 1) {
- check_scalar(bc, alu_first);
-
- } else {
-/* check_read_slots(bc, bc->cf_last->curr_bs_head);*/
- check_vector(bc, alu_first);
- LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) {
- check_vector(bc, alu);
+ /* Filter lines pulled in by previous intructions. Note that this is
+ * only for the required_lines count, we can't remove these from the
+ * cache_line array since we may have to start a new ALU clause. */
+ for (i = 0, required_lines = count; i < count; ++i) {
+ for (j = 0; j < 2; ++j) {
+ if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 &&
+ kcache[j].addr == cache_line[i]) {
+ --required_lines;
+ break;
+ }
+ }
+ }
+
+ /* Start a new ALU clause if needed. */
+ if (required_lines > free_lines) {
+ if ((r = r600_bc_add_cf(bc))) {
+ return r;
+ }
+ bc->cf_last->inst = (type << 3);
+ kcache = bc->cf_last->kcache;
+ }
+
+ /* Setup the kcache lines. */
+ for (i = 0; i < count; ++i) {
+ bool found = false;
+
+ for (j = 0; j < 2; ++j) {
+ if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 &&
+ kcache[j].addr == cache_line[i]) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found) continue;
+
+ for (j = 0; j < 2; ++j) {
+ if (kcache[j].mode == V_SQ_CF_KCACHE_NOP) {
+ kcache[j].bank = 0;
+ kcache[j].addr = cache_line[i];
+ kcache[j].mode = V_SQ_CF_KCACHE_LOCK_2;
+ break;
+ }
+ }
+ }
+
+ /* Alter the src operands to refer to the kcache. */
+ for (i = 0; i < 3; ++i) {
+ static const unsigned int base[] = {128, 160, 256, 288};
+ unsigned int line;
+
+ if (alu->src[i].sel < 512)
+ continue;
+
+ alu->src[i].sel -= 512;
+ line = (alu->src[i].sel / 32) * 2;
+
+ for (j = 0; j < 2; ++j) {
+ if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 &&
+ kcache[j].addr == line) {
+ alu->src[i].sel &= 0x1f;
+ alu->src[i].sel += base[j];
+ break;
+ }
}
}
+
return 0;
}
@@ -416,62 +1073,89 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
if (nalu == NULL)
return -ENOMEM;
memcpy(nalu, alu, sizeof(struct r600_bc_alu));
- nalu->nliteral = 0;
+
+ if (bc->cf_last != NULL && bc->cf_last->inst != (type << 3)) {
+ /* check if we could add it anyway */
+ if (bc->cf_last->inst == (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) &&
+ type == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE) {
+ LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) {
+ if (lalu->predicate) {
+ bc->force_add_cf = 1;
+ break;
+ }
+ }
+ } else
+ bc->force_add_cf = 1;
+ }
/* cf can contains only alu or only vtx or only tex */
- if (bc->cf_last == NULL || bc->cf_last->inst != (type << 3) ||
- bc->force_add_cf) {
+ if (bc->cf_last == NULL || bc->force_add_cf) {
r = r600_bc_add_cf(bc);
if (r) {
free(nalu);
return r;
}
- bc->cf_last->inst = (type << 3);
}
+ bc->cf_last->inst = (type << 3);
+
+ /* Setup the kcache for this ALU instruction. This will start a new
+ * ALU clause if needed. */
+ if ((r = r600_bc_alloc_kcache_lines(bc, nalu, type))) {
+ free(nalu);
+ return r;
+ }
+
if (!bc->cf_last->curr_bs_head) {
bc->cf_last->curr_bs_head = nalu;
- LIST_INITHEAD(&nalu->bs_list);
- } else {
- LIST_ADDTAIL(&nalu->bs_list, &bc->cf_last->curr_bs_head->bs_list);
}
- /* at most 128 slots, one add alu can add 4 slots + 4 constants(2 slots)
+ /* at most 128 slots, one add alu can add 5 slots + 4 constants(2 slots)
* worst case */
- if (alu->last && (bc->cf_last->ndw >> 1) >= 120) {
+ if (nalu->last && (bc->cf_last->ndw >> 1) >= 120) {
bc->force_add_cf = 1;
}
/* number of gpr == the last gpr used in any alu */
for (i = 0; i < 3; i++) {
- if (alu->src[i].sel >= bc->ngpr && alu->src[i].sel < 128) {
- bc->ngpr = alu->src[i].sel + 1;
- }
- /* compute how many literal are needed
- * either 2 or 4 literals
- */
- if (alu->src[i].sel == 253) {
- if (((alu->src[i].chan + 2) & 0x6) > nalu->nliteral) {
- nalu->nliteral = (alu->src[i].chan + 2) & 0x6;
- }
- }
- }
- if (!LIST_IS_EMPTY(&bc->cf_last->alu)) {
- lalu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
- if (!lalu->last && lalu->nliteral > nalu->nliteral) {
- nalu->nliteral = lalu->nliteral;
+ if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) {
+ bc->ngpr = nalu->src[i].sel + 1;
}
+ if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
+ r600_bc_special_constants(
+ nalu->src[i].value[nalu->src[i].chan],
+ &nalu->src[i].sel, &nalu->src[i].neg);
}
- if (alu->dst.sel >= bc->ngpr) {
- bc->ngpr = alu->dst.sel + 1;
+ if (nalu->dst.sel >= bc->ngpr) {
+ bc->ngpr = nalu->dst.sel + 1;
}
LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu);
/* each alu use 2 dwords */
bc->cf_last->ndw += 2;
bc->ndw += 2;
- bc->cf_last->kcache0_mode = 2;
-
/* process cur ALU instructions for bank swizzle */
- if (alu->last) {
- check_and_set_bank_swizzle(bc, bc->cf_last->curr_bs_head);
+ if (nalu->last) {
+ struct r600_bc_alu *slots[5];
+ r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots);
+ if (r)
+ return r;
+
+ if (bc->cf_last->prev_bs_head) {
+ r = merge_inst_groups(bc, slots, bc->cf_last->prev_bs_head);
+ if (r)
+ return r;
+ }
+
+ if (bc->cf_last->prev_bs_head) {
+ r = replace_gpr_with_pv_ps(bc, slots, bc->cf_last->prev_bs_head);
+ if (r)
+ return r;
+ }
+
+ r = check_and_set_bank_swizzle(bc, slots);
+ if (r)
+ return r;
+
+ bc->cf_last->prev2_bs_head = bc->cf_last->prev_bs_head;
+ bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head;
bc->cf_last->curr_bs_head = NULL;
}
return 0;
@@ -482,44 +1166,6 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
return r600_bc_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
}
-int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
-{
- struct r600_bc_alu *alu;
-
- if (bc->cf_last == NULL) {
- return 0;
- }
- if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
- return 0;
- }
- /* all same on EG */
- if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP ||
- bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE ||
- bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL ||
- bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK ||
- bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE ||
- bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END ||
- bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) {
- return 0;
- }
- /* same on EG */
- if (((bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) &&
- (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3))) ||
- LIST_IS_EMPTY(&bc->cf_last->alu)) {
- R600_ERR("last CF is not ALU (%p)\n", bc->cf_last);
- return -EINVAL;
- }
- alu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
- if (!alu->last || !alu->nliteral || alu->literal_added) {
- return 0;
- }
- memcpy(alu->value, value, 4 * 4);
- bc->cf_last->ndw += alu->nliteral;
- bc->ndw += alu->nliteral;
- alu->literal_added = 1;
- return 0;
-}
-
int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
{
struct r600_bc_vtx *nvtx = r600_bc_vtx();
@@ -545,7 +1191,7 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
/* each fetch use 4 dwords */
bc->cf_last->ndw += 4;
bc->ndw += 4;
- if ((bc->ndw / 4) > 7)
+ if ((bc->cf_last->ndw / 4) > 7)
bc->force_add_cf = 1;
return 0;
}
@@ -570,11 +1216,17 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex)
}
bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_TEX;
}
+ if (ntex->src_gpr >= bc->ngpr) {
+ bc->ngpr = ntex->src_gpr + 1;
+ }
+ if (ntex->dst_gpr >= bc->ngpr) {
+ bc->ngpr = ntex->dst_gpr + 1;
+ }
LIST_ADDTAIL(&ntex->list, &bc->cf_last->tex);
/* each texture fetch use 4 dwords */
bc->cf_last->ndw += 4;
bc->ndw += 4;
- if ((bc->ndw / 4) > 7)
+ if ((bc->cf_last->ndw / 4) > 7)
bc->force_add_cf = 1;
return 0;
}
@@ -670,8 +1322,6 @@ static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsign
/* r600 only, r700/eg bits in r700_asm.c */
static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
{
- unsigned i;
-
/* don't replace gpr by pv or ps for destination register */
bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
@@ -702,19 +1352,12 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign
S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
+ S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) |
S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) |
S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) |
S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate);
}
- if (alu->last) {
- if (alu->nliteral && !alu->literal_added) {
- R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst);
- }
- for (i = 0; i < alu->nliteral; i++) {
- bc->bytecode[id++] = alu->value[i];
- }
- }
return 0;
}
@@ -726,15 +1369,17 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
switch (cf->inst) {
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
- S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) |
- S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) |
- S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank);
+ S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) |
+ S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) |
+ S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank);
bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) |
- S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) |
- S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) |
- S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) |
+ S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) |
+ S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) |
+ S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) |
S_SQ_CF_ALU_WORD1_BARRIER(1) |
S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == CHIPREV_R600 ? cf->r6xx_uses_waterfall : 0) |
S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
@@ -790,8 +1435,10 @@ int r600_bc_build(struct r600_bc *bc)
struct r600_bc_alu *alu;
struct r600_bc_vtx *vtx;
struct r600_bc_tex *tex;
+ uint32_t literal[4];
+ unsigned nliteral;
unsigned addr;
- int r;
+ int i, r;
if (bc->callstack[0].max > 0)
bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2;
@@ -805,7 +1452,19 @@ int r600_bc_build(struct r600_bc *bc)
LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
switch (cf->inst) {
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+ nliteral = 0;
+ LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+ r = r600_bc_alu_nliterals(bc, alu, literal, &nliteral);
+ if (r)
+ return r;
+ if (alu->last) {
+ cf->ndw += align(nliteral, 2);
+ nliteral = 0;
+ }
+ }
break;
case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
@@ -851,8 +1510,15 @@ int r600_bc_build(struct r600_bc *bc)
return r;
switch (cf->inst) {
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+ nliteral = 0;
LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+ r = r600_bc_alu_nliterals(bc, alu, literal, &nliteral);
+ if (r)
+ return r;
+ r600_bc_alu_adjust_literals(bc, alu, literal, nliteral);
switch(bc->chiprev) {
case CHIPREV_R600:
r = r600_bc_alu_build(bc, alu, addr);
@@ -869,7 +1535,10 @@ int r600_bc_build(struct r600_bc *bc)
return r;
addr += 2;
if (alu->last) {
- addr += alu->nliteral;
+ for (i = 0; i < align(nliteral, 2); ++i) {
+ bc->bytecode[addr++] = literal[i];
+ }
+ nliteral = 0;
}
}
break;
@@ -947,3 +1616,447 @@ void r600_bc_clear(struct r600_bc *bc)
LIST_INITHEAD(&cf->list);
}
+
+void r600_bc_dump(struct r600_bc *bc)
+{
+ struct r600_bc_cf *cf = NULL;
+ struct r600_bc_alu *alu = NULL;
+ struct r600_bc_vtx *vtx = NULL;
+ struct r600_bc_tex *tex = NULL;
+
+ unsigned i, id;
+ uint32_t literal[4];
+ unsigned nliteral;
+ char chip = '6';
+
+ switch (bc->chiprev) {
+ case 1:
+ chip = '7';
+ break;
+ case 2:
+ chip = 'E';
+ break;
+ case 0:
+ default:
+ chip = '6';
+ break;
+ }
+ fprintf(stderr, "bytecode %d dw -- %d gprs ---------------------\n", bc->ndw, bc->ngpr);
+ fprintf(stderr, " %c\n", chip);
+
+ LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
+ id = cf->id;
+
+ switch (cf->inst) {
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+ fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
+ fprintf(stderr, "ADDR:%d ", cf->addr);
+ fprintf(stderr, "KCACHE_MODE0:%X ", cf->kcache[0].mode);
+ fprintf(stderr, "KCACHE_BANK0:%X ", cf->kcache[0].bank);
+ fprintf(stderr, "KCACHE_BANK1:%X\n", cf->kcache[1].bank);
+ id++;
+ fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
+ fprintf(stderr, "INST:%d ", cf->inst);
+ fprintf(stderr, "KCACHE_MODE1:%X ", cf->kcache[1].mode);
+ fprintf(stderr, "KCACHE_ADDR0:%X ", cf->kcache[0].addr);
+ fprintf(stderr, "KCACHE_ADDR1:%X ", cf->kcache[1].addr);
+ fprintf(stderr, "COUNT:%d\n", cf->ndw / 2);
+ break;
+ case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
+ case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
+ case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
+ fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
+ fprintf(stderr, "ADDR:%d\n", cf->addr);
+ id++;
+ fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
+ fprintf(stderr, "INST:%d ", cf->inst);
+ fprintf(stderr, "COUNT:%d\n", cf->ndw / 4);
+ break;
+ case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
+ case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+ fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]);
+ fprintf(stderr, "GPR:%X ", cf->output.gpr);
+ fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size);
+ fprintf(stderr, "ARRAY_BASE:%X ", cf->output.array_base);
+ fprintf(stderr, "TYPE:%X\n", cf->output.type);
+ id++;
+ fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]);
+ fprintf(stderr, "SWIZ_X:%X ", cf->output.swizzle_x);
+ fprintf(stderr, "SWIZ_Y:%X ", cf->output.swizzle_y);
+ fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z);
+ fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
+ fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
+ fprintf(stderr, "BARRIER:%X ", cf->output.barrier);
+ fprintf(stderr, "INST:%d ", cf->output.inst);
+ fprintf(stderr, "EOP:%X\n", cf->output.end_of_program);
+ break;
+ case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+ case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+ case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
+ case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+ fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
+ fprintf(stderr, "ADDR:%d\n", cf->cf_addr);
+ id++;
+ fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
+ fprintf(stderr, "INST:%d ", cf->inst);
+ fprintf(stderr, "COND:%X ", cf->cond);
+ fprintf(stderr, "POP_COUNT:%X\n", cf->pop_count);
+ break;
+ }
+
+ id = cf->addr;
+ nliteral = 0;
+ LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+ r600_bc_alu_nliterals(bc, alu, literal, &nliteral);
+
+ fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
+ fprintf(stderr, "SRC0(SEL:%d ", alu->src[0].sel);
+ fprintf(stderr, "REL:%d ", alu->src[0].rel);
+ fprintf(stderr, "CHAN:%d ", alu->src[0].chan);
+ fprintf(stderr, "NEG:%d) ", alu->src[0].neg);
+ fprintf(stderr, "SRC1(SEL:%d ", alu->src[1].sel);
+ fprintf(stderr, "REL:%d ", alu->src[1].rel);
+ fprintf(stderr, "CHAN:%d ", alu->src[1].chan);
+ fprintf(stderr, "NEG:%d) ", alu->src[1].neg);
+ fprintf(stderr, "LAST:%d)\n", alu->last);
+ id++;
+ fprintf(stderr, "%04d %08X %c ", id, bc->bytecode[id], alu->last ? '*' : ' ');
+ fprintf(stderr, "INST:%d ", alu->inst);
+ fprintf(stderr, "DST(SEL:%d ", alu->dst.sel);
+ fprintf(stderr, "CHAN:%d ", alu->dst.chan);
+ fprintf(stderr, "REL:%d ", alu->dst.rel);
+ fprintf(stderr, "CLAMP:%d) ", alu->dst.clamp);
+ fprintf(stderr, "BANK_SWIZZLE:%d ", alu->bank_swizzle);
+ if (alu->is_op3) {
+ fprintf(stderr, "SRC2(SEL:%d ", alu->src[2].sel);
+ fprintf(stderr, "REL:%d ", alu->src[2].rel);
+ fprintf(stderr, "CHAN:%d ", alu->src[2].chan);
+ fprintf(stderr, "NEG:%d)\n", alu->src[2].neg);
+ } else {
+ fprintf(stderr, "SRC0_ABS:%d ", alu->src[0].abs);
+ fprintf(stderr, "SRC1_ABS:%d ", alu->src[1].abs);
+ fprintf(stderr, "WRITE_MASK:%d ", alu->dst.write);
+ fprintf(stderr, "OMOD:%d ", alu->omod);
+ fprintf(stderr, "EXECUTE_MASK:%d ", alu->predicate);
+ fprintf(stderr, "UPDATE_PRED:%d\n", alu->predicate);
+ }
+
+ id++;
+ if (alu->last) {
+ for (i = 0; i < nliteral; i++, id++) {
+ float *f = (float*)(bc->bytecode + id);
+ fprintf(stderr, "%04d %08X\t%f\n", id, bc->bytecode[id], *f);
+ }
+ id += nliteral & 1;
+ nliteral = 0;
+ }
+ }
+
+ LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
+ //TODO
+ }
+
+ LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
+ //TODO
+ }
+ }
+
+ fprintf(stderr, "--------------------------------------\n");
+}
+
+void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
+{
+ struct r600_pipe_state *rstate;
+ unsigned i = 0;
+
+ if (count > 8) {
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(8 - 1);
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(count - 8 - 1);
+ } else {
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(count - 1);
+ }
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
+ S_SQ_CF_WORD1_BARRIER(1);
+
+ rstate = &ve->rstate;
+ rstate->id = R600_PIPE_STATE_FETCH_SHADER;
+ rstate->nregs = 0;
+ r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS,
+ r600_bo_offset(ve->fetch_shader) >> 8,
+ 0xFFFFFFFF, ve->fetch_shader);
+}
+
+void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
+{
+ struct r600_pipe_state *rstate;
+ unsigned i = 0;
+
+ if (count > 8) {
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(8 - 1);
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT((count - 8) - 1);
+ } else {
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(count - 1);
+ }
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
+ S_SQ_CF_WORD1_BARRIER(1);
+
+ rstate = &ve->rstate;
+ rstate->id = R600_PIPE_STATE_FETCH_SHADER;
+ rstate->nregs = 0;
+ r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS,
+ r600_bo_offset(ve->fetch_shader) >> 8,
+ 0xFFFFFFFF, ve->fetch_shader);
+}
+
+static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
+ unsigned *num_format, unsigned *format_comp)
+{
+ const struct util_format_description *desc;
+ unsigned i;
+
+ *format = 0;
+ *num_format = 0;
+ *format_comp = 0;
+
+ desc = util_format_description(pformat);
+ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
+ goto out_unknown;
+ }
+
+ /* Find the first non-VOID channel. */
+ for (i = 0; i < 4; i++) {
+ if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ break;
+ }
+ }
+
+ switch (desc->channel[i].type) {
+ /* Half-floats, floats, doubles */
+ case UTIL_FORMAT_TYPE_FLOAT:
+ switch (desc->channel[i].size) {
+ case 16:
+ switch (desc->nr_channels) {
+ case 1:
+ *format = FMT_16_FLOAT;
+ break;
+ case 2:
+ *format = FMT_16_16_FLOAT;
+ break;
+ case 3:
+ *format = FMT_16_16_16_FLOAT;
+ break;
+ case 4:
+ *format = FMT_16_16_16_16_FLOAT;
+ break;
+ }
+ break;
+ case 32:
+ switch (desc->nr_channels) {
+ case 1:
+ *format = FMT_32_FLOAT;
+ break;
+ case 2:
+ *format = FMT_32_32_FLOAT;
+ break;
+ case 3:
+ *format = FMT_32_32_32_FLOAT;
+ break;
+ case 4:
+ *format = FMT_32_32_32_32_FLOAT;
+ break;
+ }
+ break;
+ default:
+ goto out_unknown;
+ }
+ break;
+ /* Unsigned ints */
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ /* Signed ints */
+ case UTIL_FORMAT_TYPE_SIGNED:
+ switch (desc->channel[i].size) {
+ case 8:
+ switch (desc->nr_channels) {
+ case 1:
+ *format = FMT_8;
+ break;
+ case 2:
+ *format = FMT_8_8;
+ break;
+ case 3:
+ // *format = FMT_8_8_8; /* fails piglit draw-vertices test */
+ // break;
+ case 4:
+ *format = FMT_8_8_8_8;
+ break;
+ }
+ break;
+ case 16:
+ switch (desc->nr_channels) {
+ case 1:
+ *format = FMT_16;
+ break;
+ case 2:
+ *format = FMT_16_16;
+ break;
+ case 3:
+ // *format = FMT_16_16_16; /* fails piglit draw-vertices test */
+ // break;
+ case 4:
+ *format = FMT_16_16_16_16;
+ break;
+ }
+ break;
+ case 32:
+ switch (desc->nr_channels) {
+ case 1:
+ *format = FMT_32;
+ break;
+ case 2:
+ *format = FMT_32_32;
+ break;
+ case 3:
+ *format = FMT_32_32_32;
+ break;
+ case 4:
+ *format = FMT_32_32_32_32;
+ break;
+ }
+ break;
+ default:
+ goto out_unknown;
+ }
+ break;
+ default:
+ goto out_unknown;
+ }
+
+ if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
+ *format_comp = 1;
+ }
+ if (desc->channel[i].normalized) {
+ *num_format = 0;
+ } else {
+ *num_format = 2;
+ }
+ return;
+out_unknown:
+ R600_ERR("unsupported vertex format %s\n", util_format_name(pformat));
+}
+
+int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve)
+{
+ unsigned ndw, i;
+ u32 *bytecode;
+ unsigned fetch_resource_start = 0, format, num_format, format_comp;
+ struct pipe_vertex_element *elements = ve->elements;
+ const struct util_format_description *desc;
+
+ /* 2 dwords for cf aligned to 4 + 4 dwords per input */
+ ndw = 8 + ve->count * 4;
+ ve->fs_size = ndw * 4;
+
+ /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
+ ve->fetch_shader = r600_bo(rctx->radeon, ndw*4, 256, PIPE_BIND_VERTEX_BUFFER, 0);
+ if (ve->fetch_shader == NULL) {
+ return -ENOMEM;
+ }
+
+ bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL);
+ if (bytecode == NULL) {
+ r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
+ return -ENOMEM;
+ }
+
+ if (rctx->family >= CHIP_CEDAR) {
+ eg_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4);
+ } else {
+ r600_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4);
+ fetch_resource_start = 160;
+ }
+
+ /* vertex elements offset need special handling, if offset is bigger
+ * than what we can put in fetch instruction then we need to alterate
+ * the vertex resource offset. In such case in order to simplify code
+ * we will bound one resource per elements. It's a worst case scenario.
+ */
+ for (i = 0; i < ve->count; i++) {
+ ve->vbuffer_offset[i] = C_SQ_VTX_WORD2_OFFSET & elements[i].src_offset;
+ if (ve->vbuffer_offset[i]) {
+ ve->vbuffer_need_offset = 1;
+ }
+ }
+
+ for (i = 0; i < ve->count; i++) {
+ unsigned vbuffer_index;
+ r600_vertex_data_type(ve->hw_format[i], &format, &num_format, &format_comp);
+ desc = util_format_description(ve->hw_format[i]);
+ if (desc == NULL) {
+ R600_ERR("unknown format %d\n", ve->hw_format[i]);
+ r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
+ return -EINVAL;
+ }
+
+ /* see above for vbuffer_need_offset explanation */
+ vbuffer_index = elements[i].vertex_buffer_index;
+ if (ve->vbuffer_need_offset) {
+ bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(i + fetch_resource_start);
+ } else {
+ bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(vbuffer_index + fetch_resource_start);
+ }
+ bytecode[8 + i * 4 + 0] |= S_SQ_VTX_WORD0_SRC_GPR(0) |
+ S_SQ_VTX_WORD0_SRC_SEL_X(0) |
+ S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(0x1F);
+ bytecode[8 + i * 4 + 1] = S_SQ_VTX_WORD1_DST_SEL_X(desc->swizzle[0]) |
+ S_SQ_VTX_WORD1_DST_SEL_Y(desc->swizzle[1]) |
+ S_SQ_VTX_WORD1_DST_SEL_Z(desc->swizzle[2]) |
+ S_SQ_VTX_WORD1_DST_SEL_W(desc->swizzle[3]) |
+ S_SQ_VTX_WORD1_USE_CONST_FIELDS(0) |
+ S_SQ_VTX_WORD1_DATA_FORMAT(format) |
+ S_SQ_VTX_WORD1_NUM_FORMAT_ALL(num_format) |
+ S_SQ_VTX_WORD1_FORMAT_COMP_ALL(format_comp) |
+ S_SQ_VTX_WORD1_SRF_MODE_ALL(1) |
+ S_SQ_VTX_WORD1_GPR_DST_GPR(i + 1);
+ bytecode[8 + i * 4 + 2] = S_SQ_VTX_WORD2_OFFSET(elements[i].src_offset) |
+ S_SQ_VTX_WORD2_MEGA_FETCH(1);
+ bytecode[8 + i * 4 + 3] = 0;
+ }
+ r600_bo_unmap(rctx->radeon, ve->fetch_shader);
+ return 0;
+}
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index f2016af3e72..278b4466cb0 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -25,8 +25,8 @@
#include "util/u_double_list.h"
-#define NUM_OF_CYCLES 3
-#define NUM_OF_COMPONENTS 4
+struct r600_vertex_element;
+struct r600_pipe_context;
struct r600_bc_alu_src {
unsigned sel;
@@ -34,6 +34,7 @@ struct r600_bc_alu_src {
unsigned neg;
unsigned abs;
unsigned rel;
+ u32 *value;
};
struct r600_bc_alu_dst {
@@ -46,19 +47,15 @@ struct r600_bc_alu_dst {
struct r600_bc_alu {
struct list_head list;
- struct list_head bs_list; /* bank swizzle list */
struct r600_bc_alu_src src[3];
struct r600_bc_alu_dst dst;
unsigned inst;
unsigned last;
unsigned is_op3;
unsigned predicate;
- unsigned nliteral;
- unsigned literal_added;
unsigned bank_swizzle;
unsigned bank_swizzle_force;
- u32 value[4];
- int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS];
+ unsigned omod;
};
struct r600_bc_tex {
@@ -122,6 +119,12 @@ struct r600_bc_output {
unsigned barrier;
};
+struct r600_bc_kcache {
+ unsigned bank;
+ unsigned mode;
+ unsigned addr;
+};
+
struct r600_bc_cf {
struct list_head list;
unsigned inst;
@@ -131,18 +134,15 @@ struct r600_bc_cf {
unsigned cond;
unsigned pop_count;
unsigned cf_addr; /* control flow addr */
- unsigned kcache0_mode;
- unsigned kcache1_mode;
- unsigned kcache0_addr;
- unsigned kcache1_addr;
- unsigned kcache0_bank;
- unsigned kcache1_bank;
+ struct r600_bc_kcache kcache[2];
unsigned r6xx_uses_waterfall;
struct list_head alu;
struct list_head tex;
struct list_head vtx;
struct r600_bc_output output;
struct r600_bc_alu *curr_bs_head;
+ struct r600_bc_alu *prev_bs_head;
+ struct r600_bc_alu *prev2_bs_head;
};
#define FC_NONE 0
@@ -188,18 +188,24 @@ struct r600_bc {
/* eg_asm.c */
int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf);
+void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
/* r600_asm.c */
int r600_bc_init(struct r600_bc *bc, enum radeon_family family);
void r600_bc_clear(struct r600_bc *bc);
int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu);
-int r600_bc_add_literal(struct r600_bc *bc, const u32 *value);
int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx);
int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex);
int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output);
int r600_bc_build(struct r600_bc *bc);
int r600_bc_add_cfinst(struct r600_bc *bc, int inst);
int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type);
+void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg);
+void r600_bc_dump(struct r600_bc *bc);
+void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
+void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
+
+int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve);
/* r700_asm.c */
int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id);
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 74cf9687999..b9ec9592e35 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -78,19 +78,24 @@ static void r600_blitter_end(struct pipe_context *ctx)
r600_context_queries_resume(&rctx->ctx);
}
-int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture)
+void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct pipe_surface *zsurf, *cbsurf;
+ struct pipe_surface *zsurf, *cbsurf, surf_tmpl;
int level = 0;
float depth = 1.0f;
+ surf_tmpl.format = texture->resource.base.b.format;
+ surf_tmpl.u.tex.level = level;
+ surf_tmpl.u.tex.first_layer = 0;
+ surf_tmpl.u.tex.last_layer = 0;
+ surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL;
- zsurf = ctx->screen->get_tex_surface(ctx->screen, &texture->resource.base.b, 0, level, 0,
- PIPE_BIND_DEPTH_STENCIL);
+ zsurf = ctx->create_surface(ctx, &texture->resource.base.b, &surf_tmpl);
- cbsurf = ctx->screen->get_tex_surface(ctx->screen,
- (struct pipe_resource*)texture->flushed_depth_texture,
- 0, level, 0, PIPE_BIND_RENDER_TARGET);
+ surf_tmpl.format = ((struct pipe_resource*)texture->flushed_depth_texture)->format;
+ surf_tmpl.usage = PIPE_BIND_RENDER_TARGET;
+ cbsurf = ctx->create_surface(ctx,
+ (struct pipe_resource*)texture->flushed_depth_texture, &surf_tmpl);
if (rctx->family == CHIP_RV610 || rctx->family == CHIP_RV630 ||
rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635)
@@ -102,9 +107,6 @@ int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_te
pipe_surface_reference(&zsurf, NULL);
pipe_surface_reference(&cbsurf, NULL);
-
-
- return 0;
}
static void r600_clear(struct pipe_context *ctx, unsigned buffers,
@@ -154,41 +156,38 @@ static void r600_clear_depth_stencil(struct pipe_context *ctx,
/* Copy a block of pixels from one surface to another using HW. */
static void r600_hw_copy_region(struct pipe_context *ctx,
- struct pipe_resource *dst,
- struct pipe_subresource subdst,
- unsigned dstx, unsigned dsty, unsigned dstz,
- struct pipe_resource *src,
- struct pipe_subresource subsrc,
- unsigned srcx, unsigned srcy, unsigned srcz,
- unsigned width, unsigned height)
+ struct pipe_resource *dst,
+ unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ unsigned src_level,
+ const struct pipe_box *src_box)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
r600_blitter_begin(ctx, R600_COPY);
- util_blitter_copy_region(rctx->blitter, dst, subdst, dstx, dsty, dstz,
- src, subsrc, srcx, srcy, srcz, width, height,
- TRUE);
+ util_blitter_copy_region(rctx->blitter, dst, dst_level, dstx, dsty, dstz,
+ src, src_level, src_box, TRUE);
r600_blitter_end(ctx);
}
static void r600_resource_copy_region(struct pipe_context *ctx,
struct pipe_resource *dst,
- struct pipe_subresource subdst,
+ unsigned dst_level,
unsigned dstx, unsigned dsty, unsigned dstz,
struct pipe_resource *src,
- struct pipe_subresource subsrc,
- unsigned srcx, unsigned srcy, unsigned srcz,
- unsigned width, unsigned height)
+ unsigned src_level,
+ const struct pipe_box *src_box)
{
boolean is_depth;
/* there is something wrong with depth resource copies at the moment so avoid them for now */
is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0;
if (is_depth)
- util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz,
- src, subsrc, srcx, srcy, srcz, width, height);
+ util_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
+ src, src_level, src_box);
else
- r600_hw_copy_region(ctx, dst, subdst, dstx, dsty, dstz,
- src, subsrc, srcx, srcy, srcz, width, height);
+ r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
+ src, src_level, src_box);
}
void r600_init_blit_functions(struct r600_pipe_context *rctx)
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index a432271b82d..469c8195fe9 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -29,7 +29,6 @@
#include <util/u_math.h>
#include <util/u_inlines.h>
#include <util/u_memory.h>
-#include <util/u_upload_mgr.h>
#include "state_tracker/drm_driver.h"
#include <xf86drm.h>
#include "radeon_drm.h"
@@ -53,12 +52,13 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
rbuffer->magic = R600_BUFFER_MAGIC;
rbuffer->user_buffer = NULL;
- rbuffer->num_ranges = 0;
rbuffer->r.base.b = *templ;
pipe_reference_init(&rbuffer->r.base.b.reference, 1);
rbuffer->r.base.b.screen = screen;
rbuffer->r.base.vtbl = &r600_buffer_vtbl;
rbuffer->r.size = rbuffer->r.base.b.width0;
+ rbuffer->r.bo_size = rbuffer->r.size;
+ rbuffer->uploaded = FALSE;
bo = r600_bo((struct radeon*)screen->winsys, rbuffer->r.base.b.width0, alignment, rbuffer->r.base.b.bind, rbuffer->r.base.b.usage);
if (bo == NULL) {
FREE(rbuffer);
@@ -89,10 +89,12 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen,
rbuffer->r.base.b.width0 = bytes;
rbuffer->r.base.b.height0 = 1;
rbuffer->r.base.b.depth0 = 1;
+ rbuffer->r.base.b.array_size = 1;
rbuffer->r.base.b.flags = 0;
- rbuffer->num_ranges = 0;
rbuffer->r.bo = NULL;
+ rbuffer->r.bo_size = 0;
rbuffer->user_buffer = ptr;
+ rbuffer->uploaded = FALSE;
return &rbuffer->r.base.b;
}
@@ -104,6 +106,7 @@ static void r600_buffer_destroy(struct pipe_screen *screen,
if (rbuffer->r.bo) {
r600_bo_reference((struct radeon*)screen->winsys, &rbuffer->r.bo, NULL);
}
+ rbuffer->r.bo = NULL;
FREE(rbuffer);
}
@@ -113,29 +116,10 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe,
struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource);
int write = 0;
uint8_t *data;
- int i;
- boolean flush = FALSE;
if (rbuffer->user_buffer)
return (uint8_t*)rbuffer->user_buffer + transfer->box.x;
- if (transfer->usage & PIPE_TRANSFER_DISCARD) {
- for (i = 0; i < rbuffer->num_ranges; i++) {
- if ((transfer->box.x >= rbuffer->ranges[i].start) &&
- (transfer->box.x < rbuffer->ranges[i].end))
- flush = TRUE;
-
- if (flush) {
- r600_bo_reference((struct radeon*)pipe->winsys, &rbuffer->r.bo, NULL);
- rbuffer->num_ranges = 0;
- rbuffer->r.bo = r600_bo((struct radeon*)pipe->winsys,
- rbuffer->r.base.b.width0, 0,
- rbuffer->r.base.b.bind,
- rbuffer->r.base.b.usage);
- break;
- }
- }
- }
if (transfer->usage & PIPE_TRANSFER_DONTBLOCK) {
/* FIXME */
}
@@ -154,41 +138,22 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe,
{
struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource);
+ if (rbuffer->user_buffer)
+ return;
+
if (rbuffer->r.bo)
r600_bo_unmap((struct radeon*)pipe->winsys, rbuffer->r.bo);
}
static void r600_buffer_transfer_flush_region(struct pipe_context *pipe,
- struct pipe_transfer *transfer,
- const struct pipe_box *box)
+ struct pipe_transfer *transfer,
+ const struct pipe_box *box)
{
- struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource);
- unsigned i;
- unsigned offset = transfer->box.x + box->x;
- unsigned length = box->width;
-
- assert(box->x + box->width <= transfer->box.width);
-
- if (rbuffer->user_buffer)
- return;
-
- /* mark the range as used */
- for(i = 0; i < rbuffer->num_ranges; ++i) {
- if(offset <= rbuffer->ranges[i].end && rbuffer->ranges[i].start <= (offset+box->width)) {
- rbuffer->ranges[i].start = MIN2(rbuffer->ranges[i].start, offset);
- rbuffer->ranges[i].end = MAX2(rbuffer->ranges[i].end, (offset+length));
- return;
- }
- }
-
- rbuffer->ranges[rbuffer->num_ranges].start = offset;
- rbuffer->ranges[rbuffer->num_ranges].end = offset+length;
- rbuffer->num_ranges++;
}
unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context,
struct pipe_resource *buf,
- unsigned face, unsigned level)
+ unsigned level, int layer)
{
/* FIXME */
return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
@@ -235,29 +200,25 @@ struct u_resource_vtbl r600_buffer_vtbl =
int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw)
{
- struct pipe_resource *upload_buffer = NULL;
- unsigned index_offset = draw->index_buffer_offset;
- int ret = 0;
-
if (r600_buffer_is_user_buffer(draw->index_buffer)) {
- ret = u_upload_buffer(rctx->upload_ib,
- index_offset,
- draw->count * draw->index_size,
- draw->index_buffer,
- &index_offset,
- &upload_buffer);
- if (ret) {
- goto done;
- }
- draw->index_buffer_offset = index_offset;
-
- /* Transfer ownership. */
- pipe_resource_reference(&draw->index_buffer, upload_buffer);
- pipe_resource_reference(&upload_buffer, NULL);
+ struct r600_resource_buffer *rbuffer = r600_buffer(draw->index_buffer);
+ unsigned upload_offset;
+ int ret = 0;
+
+ ret = r600_upload_buffer(rctx->rupload_vb,
+ draw->index_buffer_offset,
+ draw->count * draw->index_size,
+ rbuffer,
+ &upload_offset,
+ &rbuffer->r.bo_size,
+ &rbuffer->r.bo);
+ if (ret)
+ return ret;
+ rbuffer->uploaded = TRUE;
+ draw->index_buffer_offset = upload_offset;
}
-done:
- return ret;
+ return 0;
}
int r600_upload_user_buffers(struct r600_pipe_context *rctx)
@@ -266,27 +227,52 @@ int r600_upload_user_buffers(struct r600_pipe_context *rctx)
int i, nr;
nr = rctx->vertex_elements->count;
+ nr = rctx->nvertex_buffer;
for (i = 0; i < nr; i++) {
- struct pipe_vertex_buffer *vb =
- &rctx->vertex_buffer[rctx->vertex_elements->elements[i].vertex_buffer_index];
+ struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i];
if (r600_buffer_is_user_buffer(vb->buffer)) {
- struct pipe_resource *upload_buffer = NULL;
- unsigned offset = 0; /*vb->buffer_offset * 4;*/
- unsigned size = vb->buffer->width0;
+ struct r600_resource_buffer *rbuffer = r600_buffer(vb->buffer);
unsigned upload_offset;
- ret = u_upload_buffer(rctx->upload_vb,
- offset, size,
- vb->buffer,
- &upload_offset, &upload_buffer);
+
+ ret = r600_upload_buffer(rctx->rupload_vb,
+ 0, vb->buffer->width0,
+ rbuffer,
+ &upload_offset,
+ &rbuffer->r.bo_size,
+ &rbuffer->r.bo);
if (ret)
return ret;
-
- pipe_resource_reference(&vb->buffer, NULL);
- vb->buffer = upload_buffer;
+ rbuffer->uploaded = TRUE;
vb->buffer_offset = upload_offset;
}
}
return ret;
}
+
+
+int r600_upload_const_buffer(struct r600_pipe_context *rctx, struct pipe_resource *cbuffer,
+ uint32_t *const_offset)
+{
+ if (r600_buffer_is_user_buffer(cbuffer)) {
+ struct r600_resource_buffer *rbuffer = r600_buffer(cbuffer);
+ unsigned upload_offset;
+ int ret = 0;
+
+ ret = r600_upload_buffer(rctx->rupload_const,
+ 0, cbuffer->width0,
+ rbuffer,
+ &upload_offset,
+ &rbuffer->r.bo_size,
+ &rbuffer->r.bo);
+ if (ret)
+ return ret;
+ rbuffer->uploaded = TRUE;
+ *const_offset = upload_offset;
+ return 0;
+ }
+
+ *const_offset = 0;
+ return 0;
+}
diff --git a/src/gallium/drivers/r600/r600_opcodes.h b/src/gallium/drivers/r600/r600_opcodes.h
index 2ee0c83e5d3..a85d0bbf1e1 100644
--- a/src/gallium/drivers/r600/r600_opcodes.h
+++ b/src/gallium/drivers/r600/r600_opcodes.h
@@ -330,10 +330,14 @@
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED_64 0x00000098
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_64 0x00000099
/* TODO Fill in more ALU */
+#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT 0x0000009B
+#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT 0x0000009C
+/* TODO Fill in more ALU */
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR 0x000000B1
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 0x000000BE
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE 0x000000BF
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE 0x000000C0
+#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4 0x000000C1
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT 0x000000CC
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY 0x000000D6
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 511e52fbce1..20838e4d98f 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -35,7 +35,6 @@
#include <util/u_pack_color.h>
#include <util/u_memory.h>
#include <util/u_inlines.h>
-#include <util/u_upload_mgr.h>
#include <pipebuffer/pb_buffer.h>
#include "r600.h"
#include "r600d.h"
@@ -59,9 +58,6 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags,
if (!rctx->ctx.pm4_cdwords)
return;
- u_upload_flush(rctx->upload_vb);
- u_upload_flush(rctx->upload_ib);
-
#if 0
sprintf(dname, "gallium-%08d.bof", dc);
if (dc < 20) {
@@ -71,6 +67,9 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags,
dc++;
#endif
r600_context_flush(&rctx->ctx);
+
+ r600_upload_flush(rctx->rupload_vb);
+ r600_upload_flush(rctx->rupload_const);
}
static void r600_destroy_context(struct pipe_context *context)
@@ -79,6 +78,8 @@ static void r600_destroy_context(struct pipe_context *context)
rctx->context.delete_depth_stencil_alpha_state(&rctx->context, rctx->custom_dsa_flush);
+ r600_end_vertex_translate(rctx);
+
r600_context_fini(&rctx->ctx);
util_blitter_destroy(rctx->blitter);
@@ -87,8 +88,8 @@ static void r600_destroy_context(struct pipe_context *context)
free(rctx->states[i]);
}
- u_upload_destroy(rctx->upload_vb);
- u_upload_destroy(rctx->upload_ib);
+ r600_upload_destroy(rctx->rupload_vb);
+ r600_upload_destroy(rctx->rupload_const);
if (rctx->tran.translate_cache)
translate_cache_destroy(rctx->tran.translate_cache);
@@ -120,6 +121,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
r600_init_blit_functions(rctx);
r600_init_query_functions(rctx);
r600_init_context_resource_functions(rctx);
+ r600_init_surface_functions(rctx);
switch (r600_get_family(rctx->radeon)) {
case CHIP_R600:
@@ -148,6 +150,9 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
case CHIP_CYPRESS:
case CHIP_HEMLOCK:
case CHIP_PALM:
+ case CHIP_BARTS:
+ case CHIP_TURKS:
+ case CHIP_CAICOS:
rctx->context.draw_vbo = evergreen_draw;
evergreen_init_state_functions(rctx);
if (evergreen_context_init(&rctx->ctx, rctx->radeon)) {
@@ -162,16 +167,14 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
return NULL;
}
- rctx->upload_ib = u_upload_create(&rctx->context, 32 * 1024, 16,
- PIPE_BIND_INDEX_BUFFER);
- if (rctx->upload_ib == NULL) {
+ rctx->rupload_vb = r600_upload_create(rctx, 128 * 1024, 16);
+ if (rctx->rupload_vb == NULL) {
r600_destroy_context(&rctx->context);
return NULL;
}
- rctx->upload_vb = u_upload_create(&rctx->context, 128 * 1024, 16,
- PIPE_BIND_VERTEX_BUFFER);
- if (rctx->upload_vb == NULL) {
+ rctx->rupload_const = r600_upload_create(rctx, 128 * 1024, 256);
+ if (rctx->rupload_const == NULL) {
r600_destroy_context(&rctx->context);
return NULL;
}
@@ -206,8 +209,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
else
rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx);
- r600_blit_uncompress_depth_ptr = r600_blit_uncompress_depth;
-
return &rctx->context;
}
@@ -240,6 +241,9 @@ static const char *r600_get_family_name(enum radeon_family family)
case CHIP_CYPRESS: return "AMD CYPRESS";
case CHIP_HEMLOCK: return "AMD HEMLOCK";
case CHIP_PALM: return "AMD PALM";
+ case CHIP_BARTS: return "AMD BARTS";
+ case CHIP_TURKS: return "AMD TURKS";
+ case CHIP_CAICOS: return "AMD CAICOS";
default: return "AMD unknown";
}
}
@@ -254,6 +258,9 @@ static const char* r600_get_name(struct pipe_screen* pscreen)
static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
{
+ struct r600_screen *rscreen = (struct r600_screen *)pscreen;
+ enum radeon_family family = r600_get_family(rscreen->radeon);
+
switch (param) {
/* Supported features (boolean caps). */
case PIPE_CAP_NPOT_TEXTURES:
@@ -286,7 +293,10 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
- return 14;
+ if (family >= CHIP_CEDAR)
+ return 15;
+ else
+ return 14;
case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
/* FIXME allow this once infrastructure is there */
return 16;
@@ -315,12 +325,18 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
static float r600_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param)
{
+ struct r600_screen *rscreen = (struct r600_screen *)pscreen;
+ enum radeon_family family = r600_get_family(rscreen->radeon);
+
switch (param) {
case PIPE_CAP_MAX_LINE_WIDTH:
case PIPE_CAP_MAX_LINE_WIDTH_AA:
case PIPE_CAP_MAX_POINT_WIDTH:
case PIPE_CAP_MAX_POINT_WIDTH_AA:
- return 8192.0f;
+ if (family >= CHIP_CEDAR)
+ return 16384.0f;
+ else
+ return 8192.0f;
case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
return 16.0f;
case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
@@ -407,9 +423,9 @@ static boolean r600_is_format_supported(struct pipe_screen* screen,
}
if ((usage & (PIPE_BIND_RENDER_TARGET |
- PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SCANOUT |
- PIPE_BIND_SHARED)) &&
+ PIPE_BIND_DISPLAY_TARGET |
+ PIPE_BIND_SCANOUT |
+ PIPE_BIND_SHARED)) &&
r600_is_colorbuffer_format_supported(format)) {
retval |= usage &
(PIPE_BIND_RENDER_TARGET |
@@ -467,7 +483,6 @@ struct pipe_screen *r600_screen_create(struct radeon *radeon)
rscreen->screen.get_paramf = r600_get_paramf;
rscreen->screen.is_format_supported = r600_is_format_supported;
rscreen->screen.context_create = r600_create_context;
- r600_init_screen_texture_functions(&rscreen->screen);
r600_init_screen_resource_functions(&rscreen->screen);
rscreen->tiling_info = r600_get_tiling_info(radeon);
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index ba9fedf0b6c..2112a40f696 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -53,6 +53,8 @@ enum r600_pipe_state_id {
R600_PIPE_STATE_CONSTANT,
R600_PIPE_STATE_SAMPLER,
R600_PIPE_STATE_RESOURCE,
+ R600_PIPE_STATE_POLYGON_OFFSET,
+ R600_PIPE_STATE_FETCH_SHADER,
R600_PIPE_NSTATES
};
@@ -86,7 +88,15 @@ struct r600_vertex_element
struct pipe_vertex_element elements[PIPE_MAX_ATTRIBS];
enum pipe_format hw_format[PIPE_MAX_ATTRIBS];
unsigned hw_format_size[PIPE_MAX_ATTRIBS];
- boolean incompatible_layout;
+ boolean incompatible_layout;
+ struct r600_bo *fetch_shader;
+ unsigned fs_size;
+ struct r600_pipe_state rstate;
+ /* if offset is to big for fetch instructio we need to alterate
+ * offset of vertex buffer, record here the offset need to add
+ */
+ unsigned vbuffer_need_offset;
+ unsigned vbuffer_offset[PIPE_MAX_ATTRIBS];
};
struct r600_pipe_shader {
@@ -101,29 +111,31 @@ struct r600_pipe_shader {
#define NUM_TEX_UNITS 16
struct r600_textures_info {
- struct r600_pipe_sampler_view *views[NUM_TEX_UNITS];
- unsigned n_views;
+ struct r600_pipe_sampler_view *views[NUM_TEX_UNITS];
+ unsigned n_views;
void *samplers[NUM_TEX_UNITS];
- unsigned n_samplers;
+ unsigned n_samplers;
};
+/* vertex buffer translation context, used to translate vertex input that
+ * hw doesn't natively support, so far only FLOAT64 is unsupported.
+ */
struct r600_translate_context {
/* Translate cache for incompatible vertex offset/stride/format fallback. */
- struct translate_cache *translate_cache;
-
+ struct translate_cache *translate_cache;
/* The vertex buffer slot containing the translated buffer. */
- unsigned vb_slot;
- /* Saved and new vertex element state. */
- void *saved_velems, *new_velems;
+ unsigned vb_slot;
+ void *new_velems;
};
#define R600_CONSTANT_ARRAY_SIZE 256
#define R600_RESOURCE_ARRAY_SIZE 160
+struct r600_upload;
+
struct r600_pipe_context {
struct pipe_context context;
struct blitter_context *blitter;
- struct pipe_framebuffer_state *pframebuffer;
unsigned family;
void *custom_dsa_flush;
struct r600_screen *screen;
@@ -140,6 +152,7 @@ struct r600_pipe_context {
struct pipe_stencil_ref stencil_ref;
struct pipe_viewport_state viewport;
struct pipe_clip_state clip;
+ unsigned nvs_resource;
struct r600_pipe_state *vs_resource;
struct r600_pipe_state *ps_resource;
struct r600_pipe_state config;
@@ -151,14 +164,12 @@ struct r600_pipe_context {
/* shader information */
unsigned sprite_coord_enable;
bool flatshade;
- struct u_upload_mgr *upload_vb;
- struct u_upload_mgr *upload_ib;
+ struct r600_upload *rupload_vb;
unsigned any_user_vbs;
- struct r600_textures_info ps_samplers;
-
- unsigned vb_max_index;
- struct r600_translate_context tran;
-
+ struct r600_textures_info ps_samplers;
+ unsigned vb_max_index;
+ struct r600_translate_context tran;
+ struct r600_upload *rupload_const;
};
struct r600_drawl {
@@ -181,10 +192,12 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader);
void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader);
void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx);
+void evergreen_polygon_offset_update(struct r600_pipe_context *rctx);
+void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx);
/* r600_blit.c */
void r600_init_blit_functions(struct r600_pipe_context *rctx);
-int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture);
+void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture);
/* r600_buffer.c */
struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
@@ -194,7 +207,7 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen,
unsigned bind);
unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context,
struct pipe_resource *buf,
- unsigned face, unsigned level);
+ unsigned level, int layer);
struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen,
struct winsys_handle *whandle);
int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw);
@@ -207,7 +220,7 @@ void r600_init_query_functions(struct r600_pipe_context *rctx);
void r600_init_context_resource_functions(struct r600_pipe_context *r600);
/* r600_shader.c */
-int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader);
int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens);
void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader);
int r600_find_vs_semantic_index(struct r600_shader *vs,
@@ -218,14 +231,20 @@ void r600_init_state_functions(struct r600_pipe_context *rctx);
void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info);
void r600_init_config(struct r600_pipe_context *rctx);
void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx);
+void r600_polygon_offset_update(struct r600_pipe_context *rctx);
+void r600_vertex_buffer_update(struct r600_pipe_context *rctx);
+
/* r600_helper.h */
int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
/* r600_texture.c */
void r600_init_screen_texture_functions(struct pipe_screen *screen);
+void r600_init_surface_functions(struct r600_pipe_context *r600);
uint32_t r600_translate_texformat(enum pipe_format format,
const unsigned char *swizzle_view,
uint32_t *word4_p, uint32_t *yuv_format_p);
+unsigned r600_texture_get_offset(struct r600_resource_texture *rtex,
+ unsigned level, unsigned layer);
/* r600_translate.c */
void r600_begin_vertex_translate(struct r600_pipe_context *rctx);
@@ -252,13 +271,13 @@ void r600_sampler_view_destroy(struct pipe_context *ctx,
void r600_bind_state(struct pipe_context *ctx, void *state);
void r600_delete_state(struct pipe_context *ctx, void *state);
void r600_bind_vertex_elements(struct pipe_context *ctx, void *state);
-
void *r600_create_shader_state(struct pipe_context *ctx,
const struct pipe_shader_state *state);
void r600_bind_ps_shader(struct pipe_context *ctx, void *state);
void r600_bind_vs_shader(struct pipe_context *ctx, void *state);
void r600_delete_ps_shader(struct pipe_context *ctx, void *state);
void r600_delete_vs_shader(struct pipe_context *ctx, void *state);
+
/*
* common helpers
*/
diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h
index 7a2d1f44122..28b3e1e5e40 100644
--- a/src/gallium/drivers/r600/r600_resource.h
+++ b/src/gallium/drivers/r600/r600_resource.h
@@ -46,6 +46,7 @@ struct r600_resource {
struct u_resource base;
struct r600_bo *bo;
u32 size;
+ unsigned bo_size;
};
struct r600_resource_texture {
@@ -61,7 +62,21 @@ struct r600_resource_texture {
unsigned tile_type;
unsigned depth;
unsigned dirty;
- struct r600_resource_texture *flushed_depth_texture;
+ struct r600_resource_texture *flushed_depth_texture;
+};
+
+#define R600_BUFFER_MAGIC 0xabcd1600
+
+struct r600_resource_buffer {
+ struct r600_resource r;
+ uint32_t magic;
+ void *user_buffer;
+ bool uploaded;
+};
+
+struct r600_surface {
+ struct pipe_surface base;
+ unsigned aligned_height;
};
void r600_init_screen_resource_functions(struct pipe_screen *screen);
@@ -73,46 +88,29 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
const struct pipe_resource *base,
struct winsys_handle *whandle);
-#define R600_BUFFER_MAGIC 0xabcd1600
-#define R600_BUFFER_MAX_RANGES 32
-
-struct r600_buffer_range {
- uint32_t start;
- uint32_t end;
-};
-
-struct r600_resource_buffer {
- struct r600_resource r;
- uint32_t magic;
- void *user_buffer;
- struct r600_buffer_range ranges[R600_BUFFER_MAX_RANGES];
- unsigned num_ranges;
-};
-
/* r600_buffer */
static INLINE struct r600_resource_buffer *r600_buffer(struct pipe_resource *buffer)
{
if (buffer) {
assert(((struct r600_resource_buffer *)buffer)->magic == R600_BUFFER_MAGIC);
return (struct r600_resource_buffer *)buffer;
- }
- return NULL;
+ }
+ return NULL;
}
static INLINE boolean r600_buffer_is_user_buffer(struct pipe_resource *buffer)
{
- return r600_buffer(buffer)->user_buffer ? TRUE : FALSE;
+ if (r600_buffer(buffer)->uploaded)
+ return FALSE;
+ return r600_buffer(buffer)->user_buffer ? TRUE : FALSE;
}
-int r600_texture_depth_flush(struct pipe_context *ctx,
- struct pipe_resource *texture);
-
-extern int (*r600_blit_uncompress_depth_ptr)(struct pipe_context *ctx, struct r600_resource_texture *texture);
+int r600_texture_depth_flush(struct pipe_context *ctx, struct pipe_resource *texture);
/* r600_texture.c texture transfer functions. */
struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
struct pipe_resource *texture,
- struct pipe_subresource sr,
+ unsigned level,
unsigned usage,
const struct pipe_box *box);
void r600_texture_transfer_destroy(struct pipe_context *ctx,
@@ -122,9 +120,16 @@ void* r600_texture_transfer_map(struct pipe_context *ctx,
void r600_texture_transfer_unmap(struct pipe_context *ctx,
struct pipe_transfer* transfer);
-struct r600_surface {
- struct pipe_surface base;
- unsigned aligned_height;
-};
-
+struct r600_pipe_context;
+struct r600_upload *r600_upload_create(struct r600_pipe_context *rctx,
+ unsigned default_size,
+ unsigned alignment);
+void r600_upload_flush(struct r600_upload *upload);
+void r600_upload_destroy(struct r600_upload *upload);
+int r600_upload_buffer(struct r600_upload *upload, unsigned offset,
+ unsigned size, struct r600_resource_buffer *in_buffer,
+ unsigned *out_offset, unsigned *out_size,
+ struct r600_bo **out_buffer);
+
+int r600_upload_const_buffer(struct r600_pipe_context *rctx, struct pipe_resource *cbuffer, uint32_t *offset);
#endif
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 77b180984dd..e85e829badd 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -44,6 +44,9 @@ static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shade
rstate->nregs = 0;
/* so far never got proper semantic id from tgsi */
+ /* FIXME better to move this in config things so they get emited
+ * only one time per cs
+ */
for (i = 0; i < 10; i++) {
spi_vs_out_id[i] = 0;
}
@@ -67,20 +70,11 @@ static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shade
S_028868_STACK_SIZE(rshader->bc.nstack),
0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate,
- R_0288A4_SQ_PGM_RESOURCES_FS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
R_0288D0_SQ_PGM_CF_OFFSET_VS,
0x00000000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate,
- R_0288DC_SQ_PGM_CF_OFFSET_FS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
R_028858_SQ_PGM_START_VS,
r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
- r600_pipe_state_add_reg(rstate,
- R_028894_SQ_PGM_START_FS,
- r600_bo_offset(shader->bo_fetch) >> 8, 0xFFFFFFFF, shader->bo_fetch);
r600_pipe_state_add_reg(rstate,
R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
@@ -104,37 +98,20 @@ int r600_find_vs_semantic_index(struct r600_shader *vs,
static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_state *rstate = &shader->rstate;
struct r600_shader *rshader = &shader->shader;
- unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
+ unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
int pos_index = -1, face_index = -1;
- /* clear previous register */
rstate->nregs = 0;
for (i = 0; i < rshader->ninput; i++) {
- tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
- if (rshader->input[i].centroid)
- tmp |= S_028644_SEL_CENTROID(1);
- if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
- tmp |= S_028644_SEL_LINEAR(1);
-
if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
pos_index = i;
- if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
- tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
- }
if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
face_index = i;
- if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
- rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
- tmp |= S_028644_PT_SPRITE_TEX(1);
- }
- r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
}
+
for (i = 0; i < rshader->noutput; i++) {
if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
r600_pipe_state_add_reg(rstate,
@@ -210,22 +187,13 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade
0xFFFFFFFF, NULL);
}
-static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
+int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_shader *rshader = &shader->shader;
void *ptr;
/* copy new shader */
- if (rshader->processor_type == TGSI_PROCESSOR_VERTEX && shader->bo_fetch == NULL) {
- shader->bo_fetch = r600_bo(rctx->radeon, rshader->bc_fetch.ndw * 4, 4096, 0, 0);
- if (shader->bo_fetch == NULL) {
- return -ENOMEM;
- }
- ptr = r600_bo_map(rctx->radeon, shader->bo_fetch, 0, NULL);
- memcpy(ptr, rshader->bc_fetch.bytecode, rshader->bc_fetch.ndw * 4);
- r600_bo_unmap(rctx->radeon, shader->bo_fetch);
- }
if (shader->bo == NULL) {
shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
if (shader->bo == NULL) {
@@ -236,7 +204,6 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s
r600_bo_unmap(rctx->radeon, shader->bo);
}
/* build state */
- rshader->flat_shade = rctx->flatshade;
switch (rshader->processor_type) {
case TGSI_PROCESSOR_VERTEX:
if (rshader->family >= CHIP_CEDAR) {
@@ -255,116 +222,51 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s
default:
return -EINVAL;
}
- r600_context_pipe_state_set(&rctx->ctx, &shader->rstate);
return 0;
}
-static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_shader *shader = &rshader->shader;
- const struct util_format_description *desc;
- enum pipe_format resource_format[160];
- unsigned i, nresources = 0;
- struct r600_bc *bc = &shader->bc_fetch;
- struct r600_bc_cf *cf;
- struct r600_bc_vtx *vtx;
-
- if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
- return 0;
- /* doing a full memcmp fell over the refcount */
- if ((rshader->vertex_elements.count == rctx->vertex_elements->count) &&
- (!memcmp(&rshader->vertex_elements.elements, &rctx->vertex_elements->elements,
- rctx->vertex_elements->count * sizeof(struct pipe_vertex_element)))) {
- return 0;
- }
- rshader->vertex_elements = *rctx->vertex_elements;
- for (i = 0; i < rctx->vertex_elements->count; i++) {
- resource_format[nresources++] = rctx->vertex_elements->hw_format[i];
- }
- r600_bo_reference(rctx->radeon, &rshader->bo_fetch, NULL);
- LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
- switch (cf->inst) {
- case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
- case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
- LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
- desc = util_format_description(resource_format[vtx->buffer_id]);
- if (desc == NULL) {
- R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
- return -EINVAL;
- }
- vtx->dst_sel_x = desc->swizzle[0];
- vtx->dst_sel_y = desc->swizzle[1];
- vtx->dst_sel_z = desc->swizzle[2];
- vtx->dst_sel_w = desc->swizzle[3];
- }
- break;
- default:
- break;
- }
- }
- return r600_bc_build(&shader->bc_fetch);
-}
-
-int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- int r;
-
- if (shader == NULL)
- return -EINVAL;
- /* there should be enough input */
- if (rctx->vertex_elements->count < shader->shader.bc.nresource) {
- R600_ERR("%d resources provided, expecting %d\n",
- rctx->vertex_elements->count, shader->shader.bc.nresource);
- return -EINVAL;
- }
- r = r600_shader_update(ctx, shader);
- if (r)
- return r;
- return r600_pipe_shader(ctx, shader);
-}
-
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
+int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals);
int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
{
+ static int dump_shaders = -1;
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ u32 *literals;
int r;
-//fprintf(stderr, "--------------------------------------------------------------\n");
-//tgsi_dump(tokens, 0);
+ /* Would like some magic "get_bool_option_once" routine.
+ */
+ if (dump_shaders == -1)
+ dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
+
+ if (dump_shaders) {
+ fprintf(stderr, "--------------------------------------------------------------\n");
+ tgsi_dump(tokens, 0);
+ }
shader->shader.family = r600_get_family(rctx->radeon);
- r = r600_shader_from_tgsi(tokens, &shader->shader);
+ r = r600_shader_from_tgsi(tokens, &shader->shader, &literals);
if (r) {
R600_ERR("translation from TGSI failed !\n");
return r;
}
r = r600_bc_build(&shader->shader.bc);
+ free(literals);
if (r) {
R600_ERR("building bytecode failed !\n");
return r;
}
- if (shader->shader.processor_type == TGSI_PROCESSOR_VERTEX) {
- r = r600_bc_build(&shader->shader.bc_fetch);
- if (r) {
- R600_ERR("building bytecode failed !\n");
- return r;
- }
+ if (dump_shaders) {
+ r600_bc_dump(&shader->shader.bc);
+ fprintf(stderr, "______________________________________________________________\n");
}
-//fprintf(stderr, "______________________________________________________________\n");
- return 0;
+ return r600_pipe_shader(ctx, shader);
}
-void
-r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
+void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
r600_bo_reference(rctx->radeon, &shader->bo, NULL);
-
r600_bc_clear(&shader->shader.bc);
-
- /* FIXME: is there more stuff to free? */
}
/*
@@ -381,9 +283,7 @@ struct r600_shader_ctx {
unsigned temp_reg;
struct r600_shader_tgsi_instruction *inst_info;
struct r600_bc *bc;
- struct r600_bc *bc_fetch;
struct r600_shader *shader;
- u32 value[4];
u32 *literals;
u32 nliterals;
u32 max_driver_temp_used;
@@ -501,9 +401,7 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
static int tgsi_declaration(struct r600_shader_ctx *ctx)
{
struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
- struct r600_bc_vtx vtx;
unsigned i;
- int r;
switch (d->Declaration.File) {
case TGSI_FILE_INPUT:
@@ -513,26 +411,6 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
ctx->shader->input[i].centroid = d->Declaration.Centroid;
ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
- if (ctx->type == TGSI_PROCESSOR_VERTEX) {
- /* turn input into fetch */
- memset(&vtx, 0, sizeof(struct r600_bc_vtx));
- vtx.inst = 0;
- vtx.fetch_type = 0;
- vtx.buffer_id = i;
- /* register containing the index into the buffer */
- vtx.src_gpr = 0;
- vtx.src_sel_x = 0;
- vtx.mega_fetch_count = 0x1F;
- vtx.dst_gpr = ctx->shader->input[i].gpr;
- vtx.dst_sel_x = 0;
- vtx.dst_sel_y = 1;
- vtx.dst_sel_z = 2;
- vtx.dst_sel_w = 3;
- vtx.use_const_fields = 1;
- r = r600_bc_add_vtx(ctx->bc_fetch, &vtx);
- if (r)
- return r;
- }
if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
/* turn input into interpolate on EG */
if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
@@ -614,7 +492,7 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
return ctx->num_interp_gpr;
}
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
+int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals)
{
struct tgsi_full_immediate *immediate;
struct r600_shader_ctx ctx;
@@ -624,7 +502,6 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
int i, r = 0, pos0;
ctx.bc = &shader->bc;
- ctx.bc_fetch = &shader->bc_fetch;
ctx.shader = shader;
r = r600_bc_init(ctx.bc, shader->family);
if (r)
@@ -634,19 +511,15 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
tgsi_parse_init(&ctx.parse, tokens);
ctx.type = ctx.parse.FullHeader.Processor.Processor;
shader->processor_type = ctx.type;
- if (shader->processor_type == TGSI_PROCESSOR_VERTEX) {
- r = r600_bc_init(ctx.bc_fetch, shader->family);
- if (r)
- return r;
- ctx.bc_fetch->type = -1;
- }
ctx.bc->type = shader->processor_type;
/* register allocations */
/* Values [0,127] correspond to GPR[0..127].
* Values [128,159] correspond to constant buffer bank 0
* Values [160,191] correspond to constant buffer bank 1
- * Values [256,511] correspond to cfile constants c[0..255].
+ * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
+ * Values [256,287] correspond to constant buffer bank 2 (EG)
+ * Values [288,319] correspond to constant buffer bank 3 (EG)
* Other special values are shown in the list below.
* 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
* 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
@@ -680,9 +553,11 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
ctx.info.file_count[TGSI_FILE_OUTPUT];
- ctx.file_offset[TGSI_FILE_CONSTANT] = 128;
+ /* Outside the GPR range. This will be translated to one of the
+ * kcache banks later. */
+ ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
- ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
+ ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
ctx.info.file_count[TGSI_FILE_TEMPORARY];
@@ -725,9 +600,8 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
r = ctx.inst_info->process(&ctx);
if (r)
goto out_err;
- r = r600_bc_add_literal(ctx.bc, ctx.value);
- if (r)
- goto out_err;
+ break;
+ case TGSI_TOKEN_TYPE_PROPERTY:
break;
default:
R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
@@ -840,21 +714,13 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
}
}
- /* add return to fetch shader */
- if (ctx.type == TGSI_PROCESSOR_VERTEX) {
- if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
- r600_bc_add_cfinst(ctx.bc_fetch, EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
- } else {
- r600_bc_add_cfinst(ctx.bc_fetch, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
- }
- }
/* add output to bytecode */
for (i = 0; i < noutput; i++) {
r = r600_bc_add_output(ctx.bc, &output[i]);
if (r)
goto out_err;
}
- free(ctx.literals);
+ *literals = ctx.literals;
tgsi_parse_free(&ctx.parse);
return 0;
out_err:
@@ -878,22 +744,29 @@ static int tgsi_src(struct r600_shader_ctx *ctx,
const struct tgsi_full_src_register *tgsi_src,
struct r600_bc_alu_src *r600_src)
{
- int index;
memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
- r600_src->sel = tgsi_src->Register.Index;
- if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
- r600_src->sel = 0;
- index = tgsi_src->Register.Index;
- ctx->value[0] = ctx->literals[index * 4 + 0];
- ctx->value[1] = ctx->literals[index * 4 + 1];
- ctx->value[2] = ctx->literals[index * 4 + 2];
- ctx->value[3] = ctx->literals[index * 4 + 3];
- }
- if (tgsi_src->Register.Indirect)
- r600_src->rel = V_SQ_REL_RELATIVE;
r600_src->neg = tgsi_src->Register.Negate;
r600_src->abs = tgsi_src->Register.Absolute;
- r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
+ if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
+ int index;
+ if((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
+ (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
+ (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
+
+ index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
+ r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
+ if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
+ return 0;
+ }
+ index = tgsi_src->Register.Index;
+ r600_src->sel = V_SQ_ALU_SRC_LITERAL;
+ r600_src->value = ctx->literals + index * 4;
+ } else {
+ if (tgsi_src->Register.Indirect)
+ r600_src->rel = V_SQ_REL_RELATIVE;
+ r600_src->sel = tgsi_src->Register.Index;
+ r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
+ }
return 0;
}
@@ -981,18 +854,19 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_
int i, j, k, nliteral, r;
for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
- if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
+ if (r600_src[i].sel == V_SQ_ALU_SRC_LITERAL) {
nliteral++;
}
}
for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
- if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
+ if (j > 0 && r600_src[i].sel == V_SQ_ALU_SRC_LITERAL) {
int treg = r600_get_temp(ctx);
for (k = 0; k < 4; k++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.src[0].sel = r600_src[i].sel;
alu.src[0].chan = k;
+ alu.src[0].value = r600_src[i].value;
alu.dst.sel = treg;
alu.dst.chan = k;
alu.dst.write = 1;
@@ -1002,9 +876,6 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_
if (r)
return r;
}
- r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]);
- if (r)
- return r;
r600_src[i].sel = treg;
j--;
}
@@ -1012,19 +883,25 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_
return 0;
}
-static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
+static int tgsi_last_instruction(unsigned writemask)
{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
- struct r600_bc_alu alu;
- int i, j, r;
- int lasti = 0;
+ int i, lasti = 0;
for (i = 0; i < 4; i++) {
- if (inst->Dst[0].Register.WriteMask & (1 << i)) {
+ if (writemask & (1 << i)) {
lasti = i;
}
}
+ return lasti;
+}
+
+static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu_src r600_src[3];
+ struct r600_bc_alu alu;
+ int i, j, r;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
r = tgsi_split_constant(ctx, r600_src);
if (r)
@@ -1093,12 +970,14 @@ static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
struct r600_bc_alu_src r600_src[3])
{
+ static float half_inv_pi = 1.0 /(3.1415926535 * 2);
+ static float double_pi = 3.1415926535 * 2;
+ static float neg_pi = -3.1415926535;
+
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
int r;
- uint32_t lit_vals[4];
struct r600_bc_alu alu;
- memset(lit_vals, 0, 4*4);
r = tgsi_split_constant(ctx, r600_src);
if (r)
return r;
@@ -1106,9 +985,6 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
if (r)
return r;
- lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
- lit_vals[1] = fui(0.5f);
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
alu.is_op3 = 1;
@@ -1122,15 +998,13 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[1].chan = 0;
- alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = (uint32_t *)&half_inv_pi;
+ alu.src[2].sel = V_SQ_ALU_SRC_0_5;
alu.src[2].chan = 1;
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, lit_vals);
- if (r)
- return r;
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
@@ -1146,14 +1020,6 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
if (r)
return r;
- if (ctx->bc->chiprev == CHIPREV_R600) {
- lit_vals[0] = fui(3.1415926535897f * 2.0f);
- lit_vals[1] = fui(-3.1415926535897f);
- } else {
- lit_vals[0] = fui(1.0f);
- lit_vals[1] = fui(-0.5f);
- }
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
alu.is_op3 = 1;
@@ -1169,13 +1035,20 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
alu.src[1].chan = 0;
alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[2].chan = 1;
+
+ if (ctx->bc->chiprev == CHIPREV_R600) {
+ alu.src[1].value = (uint32_t *)&double_pi;
+ alu.src[2].value = (uint32_t *)&neg_pi;
+ } else {
+ alu.src[1].sel = V_SQ_ALU_SRC_1;
+ alu.src[2].sel = V_SQ_ALU_SRC_0_5;
+ alu.src[2].neg = 1;
+ }
+
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, lit_vals);
- if (r)
- return r;
return 0;
}
@@ -1185,7 +1058,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, r;
- int lasti = 0;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
r = tgsi_setup_trig(ctx, r600_src);
if (r)
@@ -1205,10 +1078,6 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
return r;
/* replicate result */
- for (i = 0; i < 4; i++) {
- if (inst->Dst[0].Register.WriteMask & (1 << i))
- lasti = i;
- }
for (i = 0; i < lasti + 1; i++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
@@ -1295,10 +1164,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* dst.w = 1.0; */
@@ -1319,10 +1184,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
return 0;
@@ -1358,9 +1219,6 @@ static int tgsi_kill(struct r600_shader_ctx *ctx)
if (r)
return r;
}
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* kill must be last in ALU */
ctx->bc->force_add_cf = 1;
@@ -1423,10 +1281,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
if (inst->Dst[0].Register.WriteMask & (1 << 2))
{
int chan;
@@ -1445,10 +1299,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
chan = alu.dst.chan;
sel = alu.dst.sel;
@@ -1471,9 +1321,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* dst.z = exp(tmp.x) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
@@ -1517,9 +1364,6 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* replicate result */
return tgsi_helper_tempx_replicate(ctx);
}
@@ -1568,9 +1412,6 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* replicate result */
return tgsi_helper_tempx_replicate(ctx);
}
@@ -1594,12 +1435,9 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc,ctx->value);
- if (r)
- return r;
/* b * LOG2(a) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE);
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
if (r)
return r;
@@ -1611,9 +1449,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc,ctx->value);
- if (r)
- return r;
/* POW(a,b) = EXP2(b * LOG2(a))*/
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
@@ -1624,9 +1459,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc,ctx->value);
- if (r)
- return r;
return tgsi_helper_tempx_replicate(ctx);
}
@@ -1666,9 +1498,6 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx)
if (r)
return r;
}
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* dst = (-tmp > 0 ? -1 : tmp) */
for (i = 0; i < 4; i++) {
@@ -1703,9 +1532,6 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru
struct r600_bc_alu alu;
int i, r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
@@ -1735,6 +1561,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx)
struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, j, r;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
r = tgsi_split_constant(ctx, r600_src);
if (r)
@@ -1742,26 +1569,32 @@ static int tgsi_op3(struct r600_shader_ctx *ctx)
r = tgsi_split_literal_constant(ctx, r600_src);
if (r)
return r;
- /* do it in 2 step as op3 doesn't support writemask */
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = ctx->inst_info->r600_opcode;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
alu.src[j] = r600_src[j];
alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
}
- alu.dst.sel = ctx->temp_reg;
+
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+
alu.dst.chan = i;
alu.dst.write = 1;
alu.is_op3 = 1;
- if (i == 3) {
+ if (i == lasti) {
alu.last = 1;
}
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
}
- return tgsi_helper_copy(ctx, inst);
+ return 0;
}
static int tgsi_dp(struct r600_shader_ctx *ctx)
@@ -1784,9 +1617,13 @@ static int tgsi_dp(struct r600_shader_ctx *ctx)
alu.src[j] = r600_src[j];
alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
}
- alu.dst.sel = ctx->temp_reg;
+
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+
alu.dst.chan = i;
- alu.dst.write = 1;
+ alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
/* handle some special cases */
switch (ctx->inst_info->tgsi_opcode) {
case TGSI_OPCODE_DP2:
@@ -1818,19 +1655,21 @@ static int tgsi_dp(struct r600_shader_ctx *ctx)
if (r)
return r;
}
- return tgsi_helper_copy(ctx, inst);
+ return 0;
}
static int tgsi_tex(struct r600_shader_ctx *ctx)
{
+ static float one_point_five = 1.5f;
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_tex tex;
struct r600_bc_alu alu;
unsigned src_gpr;
int r, i;
int opcode;
- boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY;
- uint32_t lit_vals[4];
+ boolean src_not_temp =
+ inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
+ inst->Src[0].Register.File != TGSI_FILE_INPUT;
src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
@@ -1959,6 +1798,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[2].chan = 0;
+ alu.src[2].value = (u32*)&one_point_five;
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
@@ -1979,6 +1819,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[2].chan = 0;
+ alu.src[2].value = (u32*)&one_point_five;
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 1;
@@ -1989,11 +1830,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
if (r)
return r;
- lit_vals[0] = fui(1.5f);
-
- r = r600_bc_add_literal(ctx->bc, lit_vals);
- if (r)
- return r;
src_not_temp = FALSE;
src_gpr = ctx->temp_reg;
}
@@ -2066,6 +1902,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
unsigned i;
int r;
@@ -2075,8 +1912,40 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
r = tgsi_split_literal_constant(ctx, r600_src);
if (r)
return r;
+
+ /* optimize if it's just an equal balance */
+ if(r600_src[0].sel == V_SQ_ALU_SRC_0_5) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
+ alu.src[0] = r600_src[1];
+ alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
+ alu.src[1] = r600_src[2];
+ alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
+ alu.omod = 3;
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+
+ alu.dst.chan = i;
+ if (i == lasti) {
+ alu.last = 1;
+ }
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+ }
+
/* 1 - src0 */
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
alu.src[0].sel = V_SQ_ALU_SRC_1;
@@ -2086,7 +1955,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
alu.src[1].neg = 1;
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
- if (i == 3) {
+ if (i == lasti) {
alu.last = 1;
}
alu.dst.write = 1;
@@ -2094,12 +1963,12 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
if (r)
return r;
}
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* (1 - src0) * src2 */
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
alu.src[0].sel = ctx->temp_reg;
@@ -2108,7 +1977,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
- if (i == 3) {
+ if (i == lasti) {
alu.last = 1;
}
alu.dst.write = 1;
@@ -2116,12 +1985,12 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
if (r)
return r;
}
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* src0 * src1 + (1 - src0) * src2 */
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
alu.is_op3 = 1;
@@ -2131,16 +2000,20 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
alu.src[2].sel = ctx->temp_reg;
alu.src[2].chan = i;
- alu.dst.sel = ctx->temp_reg;
+
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+
alu.dst.chan = i;
- if (i == 3) {
+ if (i == lasti) {
alu.last = 1;
}
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
}
- return tgsi_helper_copy(ctx, inst);
+ return 0;
}
static int tgsi_cmp(struct r600_shader_ctx *ctx)
@@ -2148,8 +2021,8 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
- int use_temp = 0;
int i, r;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
r = tgsi_split_constant(ctx, r600_src);
if (r)
@@ -2158,10 +2031,10 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
if (r)
return r;
- if (inst->Dst[0].Register.WriteMask != 0xf)
- use_temp = 1;
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
- for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
alu.src[0] = r600_src[0];
@@ -2173,24 +2046,19 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
alu.src[2] = r600_src[1];
alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
- if (use_temp)
- alu.dst.sel = ctx->temp_reg;
- else {
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
- }
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+
alu.dst.chan = i;
alu.dst.write = 1;
alu.is_op3 = 1;
- if (i == 3)
+ if (i == lasti)
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
}
- if (use_temp)
- return tgsi_helper_copy(ctx, inst);
return 0;
}
@@ -2257,10 +2125,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
for (i = 0; i < 4; i++) {
@@ -2318,10 +2182,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
if (use_temp)
return tgsi_helper_copy(ctx, inst);
@@ -2354,10 +2214,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
@@ -2369,10 +2225,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.y = tmp - floor(tmp); */
@@ -2398,9 +2250,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.z = RoughApprox2ToX(tmp);*/
@@ -2421,9 +2270,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.w = 1.0;*/
@@ -2441,9 +2287,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
return tgsi_helper_copy(ctx, inst);
}
@@ -2473,10 +2316,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
@@ -2489,10 +2328,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.y = src.x / (2 ^ floor(log2(src.x))); */
@@ -2515,10 +2350,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
@@ -2534,10 +2365,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
@@ -2553,10 +2380,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
@@ -2572,10 +2395,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
@@ -2597,10 +2416,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.z = log2(src);*/
@@ -2622,10 +2437,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.w = 1.0; */
@@ -2644,10 +2455,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
return tgsi_helper_copy(ctx, inst);
@@ -2802,8 +2609,25 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
static int pops(struct r600_shader_ctx *ctx, int pops)
{
- r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
- ctx->bc->cf_last->pop_count = pops;
+ int alu_pop = 3;
+ if (ctx->bc->cf_last) {
+ if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
+ alu_pop = 0;
+ else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
+ alu_pop = 1;
+ }
+ alu_pop += pops;
+ if (alu_pop == 1) {
+ ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
+ ctx->bc->force_add_cf = 1;
+ } else if (alu_pop == 2) {
+ ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
+ ctx->bc->force_add_cf = 1;
+ } else {
+ r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
+ ctx->bc->cf_last->pop_count = pops;
+ ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
+ }
return 0;
}
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index cd108da4915..935dd6fe3ab 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -38,7 +38,6 @@ struct r600_shader_io {
struct r600_shader {
unsigned processor_type;
struct r600_bc bc;
- boolean flat_shade;
unsigned ninput;
unsigned noutput;
unsigned nlds;
@@ -46,9 +45,8 @@ struct r600_shader {
struct r600_shader_io output[32];
enum radeon_family family;
boolean uses_kill;
- struct r600_bc bc_fetch;
};
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
+int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals);
#endif
diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h
index 0573e63dc82..56ed35e8b32 100644
--- a/src/gallium/drivers/r600/r600_sq.h
+++ b/src/gallium/drivers/r600/r600_sq.h
@@ -74,6 +74,10 @@
#define S_SQ_CF_ALU_WORD0_KCACHE_MODE0(x) (((x) & 0x3) << 30)
#define G_SQ_CF_ALU_WORD0_KCACHE_MODE0(x) (((x) >> 30) & 0x3)
#define C_SQ_CF_ALU_WORD0_KCACHE_MODE0 0x3FFFFFFF
+#define V_SQ_CF_KCACHE_NOP 0x00000000
+#define V_SQ_CF_KCACHE_LOCK_1 0x00000001
+#define V_SQ_CF_KCACHE_LOCK_2 0x00000002
+#define V_SQ_CF_KCACHE_LOCK_LOOP_INDEX 0x00000003
#define P_SQ_CF_ALU_WORD1
#define S_SQ_CF_ALU_WORD1_KCACHE_MODE1(x) (((x) & 0x3) << 0)
#define G_SQ_CF_ALU_WORD1_KCACHE_MODE1(x) (((x) >> 0) & 0x3)
@@ -187,6 +191,8 @@
#define V_SQ_ALU_SRC_M_1_INT 0x000000FB
#define V_SQ_ALU_SRC_0_5 0x000000FC
#define V_SQ_ALU_SRC_LITERAL 0x000000FD
+#define V_SQ_ALU_SRC_PV 0x000000FE
+#define V_SQ_ALU_SRC_PS 0x000000FF
#define V_SQ_ALU_SRC_PARAM_BASE 0x000001C0
#define S_SQ_ALU_WORD0_SRC0_REL(x) (((x) & 0x1) << 9)
#define G_SQ_ALU_WORD0_SRC0_REL(x) (((x) >> 9) & 0x1)
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index bf4ca057d28..96b02d72b94 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -36,7 +36,6 @@
#include <util/u_pack_color.h>
#include <util/u_memory.h>
#include <util/u_inlines.h>
-#include <util/u_upload_mgr.h>
#include <util/u_framebuffer.h>
#include <pipebuffer/pb_buffer.h>
#include "r600.h"
@@ -46,14 +45,164 @@
#include "r600_pipe.h"
#include "r600_state_inlines.h"
+void r600_polygon_offset_update(struct r600_pipe_context *rctx)
+{
+ struct r600_pipe_state state;
+
+ state.id = R600_PIPE_STATE_POLYGON_OFFSET;
+ state.nregs = 0;
+ if (rctx->rasterizer && rctx->framebuffer.zsbuf) {
+ float offset_units = rctx->rasterizer->offset_units;
+ unsigned offset_db_fmt_cntl = 0, depth;
+
+ switch (rctx->framebuffer.zsbuf->texture->format) {
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+ depth = -24;
+ offset_units *= 2.0f;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ depth = -23;
+ offset_units *= 1.0f;
+ offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
+ break;
+ case PIPE_FORMAT_Z16_UNORM:
+ depth = -16;
+ offset_units *= 4.0f;
+ break;
+ default:
+ return;
+ }
+ /* FIXME some of those reg can be computed with cso */
+ offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth);
+ r600_pipe_state_add_reg(&state,
+ R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE,
+ fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&state,
+ R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET,
+ fui(offset_units), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&state,
+ R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE,
+ fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&state,
+ R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET,
+ fui(offset_units), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&state,
+ R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
+ offset_db_fmt_cntl, 0xFFFFFFFF, NULL);
+ r600_context_pipe_state_set(&rctx->ctx, &state);
+ }
+}
+
+/* FIXME optimize away spi update when it's not needed */
+static void r600_spi_update(struct r600_pipe_context *rctx)
+{
+ struct r600_pipe_shader *shader = rctx->ps_shader;
+ struct r600_pipe_state rstate;
+ struct r600_shader *rshader = &shader->shader;
+ unsigned i, tmp;
+
+ rstate.nregs = 0;
+ for (i = 0; i < rshader->ninput; i++) {
+ tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
+ if (rshader->input[i].centroid)
+ tmp |= S_028644_SEL_CENTROID(1);
+ if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
+ tmp |= S_028644_SEL_LINEAR(1);
+
+ if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
+ rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
+ rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
+ tmp |= S_028644_FLAT_SHADE(rctx->flatshade);
+ }
+ if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
+ rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
+ tmp |= S_028644_PT_SPRITE_TEX(1);
+ }
+ r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
+ }
+ r600_context_pipe_state_set(&rctx->ctx, &rstate);
+}
+
+void r600_vertex_buffer_update(struct r600_pipe_context *rctx)
+{
+ struct r600_pipe_state *rstate;
+ struct r600_resource *rbuffer;
+ struct pipe_vertex_buffer *vertex_buffer;
+ unsigned i, offset;
+
+ /* we don't update until we know vertex elements */
+ if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer)
+ return;
+
+ if (rctx->vertex_elements->incompatible_layout) {
+ /* translate rebind new vertex elements so
+ * return once translated
+ */
+ r600_begin_vertex_translate(rctx);
+ return;
+ }
+
+ if (rctx->any_user_vbs) {
+ r600_upload_user_buffers(rctx);
+ rctx->any_user_vbs = FALSE;
+ }
+
+ if (rctx->vertex_elements->vbuffer_need_offset) {
+ /* one resource per vertex elements */
+ rctx->nvs_resource = rctx->vertex_elements->count;
+ } else {
+ /* bind vertex buffer once */
+ rctx->nvs_resource = rctx->nvertex_buffer;
+ }
+
+ for (i = 0 ; i < rctx->nvs_resource; i++) {
+ rstate = &rctx->vs_resource[i];
+ rstate->id = R600_PIPE_STATE_RESOURCE;
+ rstate->nregs = 0;
+
+ if (rctx->vertex_elements->vbuffer_need_offset) {
+ /* one resource per vertex elements */
+ unsigned vbuffer_index;
+ vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index;
+ vertex_buffer = &rctx->vertex_buffer[vbuffer_index];
+ rbuffer = (struct r600_resource*)vertex_buffer->buffer;
+ offset = rctx->vertex_elements->vbuffer_offset[i];
+ } else {
+ /* bind vertex buffer once */
+ vertex_buffer = &rctx->vertex_buffer[i];
+ rbuffer = (struct r600_resource*)vertex_buffer->buffer;
+ offset = 0;
+ }
+ if (vertex_buffer == NULL || rbuffer == NULL)
+ continue;
+ offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo);
+
+ r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0,
+ offset, 0xFFFFFFFF, rbuffer->bo);
+ r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1,
+ rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2,
+ S_038008_STRIDE(vertex_buffer->stride),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6,
+ 0xC0000000, 0xFFFFFFFF, NULL);
+ r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
+ }
+}
+
static void r600_draw_common(struct r600_drawl *draw)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)draw->ctx;
- struct r600_pipe_state *rstate;
struct r600_resource *rbuffer;
- unsigned i, j, offset, prim;
+ unsigned prim;
u32 vgt_dma_index_type, vgt_draw_initiator, mask;
- struct pipe_vertex_buffer *vertex_buffer;
struct r600_draw rdraw;
struct r600_pipe_state vgt;
@@ -76,42 +225,23 @@ static void r600_draw_common(struct r600_drawl *draw)
}
if (r600_conv_pipe_prim(draw->mode, &prim))
return;
-
-
- /* rebuild vertex shader if input format changed */
- if (r600_pipe_shader_update(&rctx->context, rctx->vs_shader))
+ if (unlikely(rctx->ps_shader == NULL)) {
+ R600_ERR("missing vertex shader\n");
return;
- if (r600_pipe_shader_update(&rctx->context, rctx->ps_shader))
+ }
+ if (unlikely(rctx->vs_shader == NULL)) {
+ R600_ERR("missing vertex shader\n");
+ return;
+ }
+ /* there should be enough input */
+ if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) {
+ R600_ERR("%d resources provided, expecting %d\n",
+ rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource);
return;
-
- for (i = 0 ; i < rctx->vertex_elements->count; i++) {
- uint32_t word2, format;
-
- rstate = &rctx->vs_resource[i];
- rstate->id = R600_PIPE_STATE_RESOURCE;
- rstate->nregs = 0;
-
- j = rctx->vertex_elements->elements[i].vertex_buffer_index;
- vertex_buffer = &rctx->vertex_buffer[j];
- rbuffer = (struct r600_resource*)vertex_buffer->buffer;
- offset = rctx->vertex_elements->elements[i].src_offset +
- vertex_buffer->buffer_offset +
- r600_bo_offset(rbuffer->bo);
-
- format = r600_translate_vertex_data_type(rctx->vertex_elements->hw_format[i]);
-
- word2 = format | S_038008_STRIDE(vertex_buffer->stride);
-
- r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo);
- r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, word2, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, 0xC0000000, 0xFFFFFFFF, NULL);
- r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
}
+ r600_spi_update(rctx);
+
mask = 0;
for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
mask |= (0xF << (i * 4));
@@ -126,46 +256,6 @@ static void r600_draw_common(struct r600_drawl *draw)
r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL);
- /* build late state */
- if (rctx->rasterizer && rctx->framebuffer.zsbuf) {
- float offset_units = rctx->rasterizer->offset_units;
- unsigned offset_db_fmt_cntl = 0, depth;
-
- switch (rctx->framebuffer.zsbuf->texture->format) {
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- depth = -24;
- offset_units *= 2.0f;
- break;
- case PIPE_FORMAT_Z32_FLOAT:
- depth = -23;
- offset_units *= 1.0f;
- offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
- break;
- case PIPE_FORMAT_Z16_UNORM:
- depth = -16;
- offset_units *= 4.0f;
- break;
- default:
- return;
- }
- offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth);
- r600_pipe_state_add_reg(&vgt,
- R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE,
- fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt,
- R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET,
- fui(offset_units), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt,
- R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE,
- fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt,
- R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET,
- fui(offset_units), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt,
- R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
- offset_db_fmt_cntl, 0xFFFFFFFF, NULL);
- }
r600_context_pipe_state_set(&rctx->ctx, &vgt);
rdraw.vgt_num_indices = draw->count;
@@ -185,17 +275,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_drawl draw;
- boolean translate = FALSE;
- if (rctx->vertex_elements->incompatible_layout) {
- r600_begin_vertex_translate(rctx);
- translate = TRUE;
- }
-
- if (rctx->any_user_vbs) {
- r600_upload_user_buffers(rctx);
- rctx->any_user_vbs = FALSE;
- }
memset(&draw, 0, sizeof(struct r600_drawl));
draw.ctx = ctx;
draw.mode = info->mode;
@@ -226,9 +306,6 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
}
r600_draw_common(&draw);
- if (translate)
- r600_end_vertex_translate(rctx);
-
pipe_resource_reference(&draw.index_buffer, NULL);
}
@@ -603,9 +680,9 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
word4 | S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) |
S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) |
S_038010_REQUEST_SIZE(1) |
- S_038010_BASE_LEVEL(state->first_level), 0xFFFFFFFF, NULL);
+ S_038010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5,
- S_038014_LAST_LEVEL(state->last_level) |
+ S_038014_LAST_LEVEL(state->u.tex.last_level) |
S_038014_BASE_ARRAY(0) |
S_038014_LAST_ARRAY(0), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6,
@@ -824,10 +901,11 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
struct r600_resource_texture *rtex;
struct r600_resource *rbuffer;
struct r600_surface *surf;
- unsigned level = state->cbufs[cb]->level;
+ unsigned level = state->cbufs[cb]->u.tex.level;
unsigned pitch, slice;
unsigned color_info;
unsigned format, swap, ntype;
+ unsigned offset;
const struct util_format_description *desc;
struct r600_bo *bo[3];
@@ -838,6 +916,9 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
bo[1] = rbuffer->bo;
bo[2] = rbuffer->bo;
+ /* XXX quite sure for dx10+ hw don't need any offset hacks */
+ offset = r600_texture_get_offset((struct r600_resource_texture *)state->cbufs[cb]->texture,
+ level, state->cbufs[cb]->u.tex.first_layer);
pitch = rtex->pitch_in_pixels[level] / 8 - 1;
slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
ntype = 0;
@@ -857,7 +938,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
r600_pipe_state_add_reg(rstate,
R_028040_CB_COLOR0_BASE + cb * 4,
- (state->cbufs[cb]->offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
+ (offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
r600_pipe_state_add_reg(rstate,
R_0280A0_CB_COLOR0_INFO + cb * 4,
color_info, 0xFFFFFFFF, bo[0]);
@@ -888,11 +969,12 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
struct r600_surface *surf;
unsigned level;
unsigned pitch, slice, format;
+ unsigned offset;
if (state->zsbuf == NULL)
return;
- level = state->zsbuf->level;
+ level = state->zsbuf->u.tex.level;
surf = (struct r600_surface *)state->zsbuf;
rtex = (struct r600_resource_texture*)state->zsbuf->texture;
@@ -902,12 +984,15 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
rtex->depth = 1;
rbuffer = &rtex->resource;
+ /* XXX quite sure for dx10+ hw don't need any offset hacks */
+ offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture,
+ level, state->zsbuf->u.tex.first_layer);
pitch = rtex->pitch_in_pixels[level] / 8 - 1;
slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
format = r600_translate_dbformat(state->zsbuf->texture->format);
r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE,
- (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ (offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
r600_pipe_state_add_reg(rstate, R_028000_DB_DEPTH_SIZE,
S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice),
0xFFFFFFFF, NULL);
@@ -934,8 +1019,6 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
util_copy_framebuffer_state(&rctx->framebuffer, state);
- rctx->pframebuffer = &rctx->framebuffer;
-
/* build states */
for (int i = 0; i < state->nr_cbufs; i++) {
r600_cb(rctx, rstate, state, i);
@@ -1015,6 +1098,10 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]);
rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate;
r600_context_pipe_state_set(&rctx->ctx, rstate);
+
+ if (state->zsbuf) {
+ r600_polygon_offset_update(rctx);
+ }
}
static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
@@ -1022,6 +1109,7 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_resource *rbuffer = (struct r600_resource*)buffer;
+ uint32_t offset;
/* Note that the state tracker can unbind constant buffers by
* passing NULL here.
@@ -1030,6 +1118,8 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint
return;
}
+ r600_upload_const_buffer(rctx, buffer, &offset);
+
switch (shader) {
case PIPE_SHADER_VERTEX:
rctx->vs_const_buffer.nregs = 0;
@@ -1039,7 +1129,7 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint
0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(&rctx->vs_const_buffer,
R_028980_ALU_CONST_CACHE_VS_0,
- r600_bo_offset(rbuffer->bo) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo);
r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
break;
case PIPE_SHADER_FRAGMENT:
@@ -1050,7 +1140,7 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint
0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(&rctx->ps_const_buffer,
R_028940_ALU_CONST_CACHE_PS_0,
- r600_bo_offset(rbuffer->bo) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo);
r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);
break;
default:
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 55bc5d0d22b..3603376f738 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -58,6 +58,12 @@ void r600_bind_rs_state(struct pipe_context *ctx, void *state)
rctx->states[rs->rstate.id] = &rs->rstate;
r600_context_pipe_state_set(&rctx->ctx, &rs->rstate);
+
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_polygon_offset_update(rctx);
+ } else {
+ r600_polygon_offset_update(rctx);
+ }
}
void r600_delete_rs_state(struct pipe_context *ctx, void *state)
@@ -113,8 +119,23 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_vertex_element *v = (struct r600_vertex_element*)state;
+ /* delete previous translated vertex elements */
+ if (rctx->tran.new_velems) {
+ r600_end_vertex_translate(rctx);
+ }
+
rctx->vertex_elements = v;
if (v) {
+ rctx->states[v->rstate.id] = &v->rstate;
+ r600_context_pipe_state_set(&rctx->ctx, &v->rstate);
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_vertex_buffer_update(rctx);
+ } else {
+ r600_vertex_buffer_update(rctx);
+ }
+ }
+
+ if (v) {
// rctx->vs_rebuild = TRUE;
}
}
@@ -122,11 +143,16 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
void r600_delete_vertex_element(struct pipe_context *ctx, void *state)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_vertex_element *v = (struct r600_vertex_element*)state;
- FREE(state);
-
+ if (rctx->states[v->rstate.id] == &v->rstate) {
+ rctx->states[v->rstate.id] = NULL;
+ }
if (rctx->vertex_elements == state)
rctx->vertex_elements = NULL;
+
+ r600_bo_reference(rctx->radeon, &v->fetch_shader, NULL);
+ FREE(state);
}
@@ -153,8 +179,16 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
struct pipe_vertex_buffer *vbo;
unsigned max_index = (unsigned)-1;
- for (int i = 0; i < rctx->nvertex_buffer; i++) {
- pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL);
+ if (rctx->family >= CHIP_CEDAR) {
+ for (int i = 0; i < rctx->nvertex_buffer; i++) {
+ pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL);
+ evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i);
+ }
+ } else {
+ for (int i = 0; i < rctx->nvertex_buffer; i++) {
+ pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL);
+ r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i);
+ }
}
memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count);
@@ -162,20 +196,29 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
vbo = (struct pipe_vertex_buffer*)&buffers[i];
rctx->vertex_buffer[i].buffer = NULL;
+ if (buffers[i].buffer == NULL)
+ continue;
if (r600_buffer_is_user_buffer(buffers[i].buffer))
rctx->any_user_vbs = TRUE;
pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer);
+ /* The stride of zero means we will be fetching only the first
+ * vertex, so don't care about max_index. */
+ if (!vbo->stride)
+ continue;
+
if (vbo->max_index == ~0) {
- if (!vbo->stride)
- vbo->max_index = 1;
- else
- vbo->max_index = (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride;
+ vbo->max_index = (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride;
}
max_index = MIN2(vbo->max_index, max_index);
}
rctx->nvertex_buffer = count;
rctx->vb_max_index = max_index;
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_vertex_buffer_update(rctx);
+ } else {
+ r600_vertex_buffer_update(rctx);
+ }
}
@@ -186,9 +229,10 @@ void *r600_create_vertex_elements(struct pipe_context *ctx,
unsigned count,
const struct pipe_vertex_element *elements)
{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element);
- int i;
enum pipe_format *format;
+ int i;
assert(count < 32);
if (!v)
@@ -210,12 +254,16 @@ void *r600_create_vertex_elements(struct pipe_context *ctx,
}
v->incompatible_layout =
v->incompatible_layout ||
- v->elements[i].src_format != v->hw_format[i] ||
- v->elements[i].src_offset % 4 != 0;
+ v->elements[i].src_format != v->hw_format[i];
v->hw_format_size[i] = align(util_format_get_blocksize(v->hw_format[i]), 4);
}
+ if (r600_vertex_elements_build_fetch_shader(rctx, v)) {
+ FREE(v);
+ return NULL;
+ }
+
return v;
}
@@ -238,6 +286,9 @@ void r600_bind_ps_shader(struct pipe_context *ctx, void *state)
/* TODO delete old shader */
rctx->ps_shader = (struct r600_pipe_shader *)state;
+ if (state) {
+ r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_shader->rstate);
+ }
}
void r600_bind_vs_shader(struct pipe_context *ctx, void *state)
@@ -246,6 +297,9 @@ void r600_bind_vs_shader(struct pipe_context *ctx, void *state)
/* TODO delete old shader */
rctx->vs_shader = (struct r600_pipe_shader *)state;
+ if (state) {
+ r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_shader->rstate);
+ }
}
void r600_delete_ps_shader(struct pipe_context *ctx, void *state)
diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h
index 781612af570..d994196e19d 100644
--- a/src/gallium/drivers/r600/r600_state_inlines.h
+++ b/src/gallium/drivers/r600/r600_state_inlines.h
@@ -305,6 +305,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
return V_0280A0_SWAP_STD;
case PIPE_FORMAT_L8A8_UNORM:
+ return V_0280A0_SWAP_ALT;
case PIPE_FORMAT_R8G8_UNORM:
return V_0280A0_SWAP_STD;
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 8ecd434a43a..e2745624575 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -45,14 +45,10 @@ static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_t
{
struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
struct pipe_resource *texture = transfer->resource;
- struct pipe_subresource subdst;
- subdst.face = 0;
- subdst.level = 0;
ctx->resource_copy_region(ctx, rtransfer->staging_texture,
- subdst, 0, 0, 0, texture, transfer->sr,
- transfer->box.x, transfer->box.y, transfer->box.z,
- transfer->box.width, transfer->box.height);
+ 0, 0, 0, 0, texture, transfer->level,
+ &transfer->box);
}
@@ -61,34 +57,32 @@ static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600
{
struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
struct pipe_resource *texture = transfer->resource;
- struct pipe_subresource subsrc;
-
- subsrc.face = 0;
- subsrc.level = 0;
- ctx->resource_copy_region(ctx, texture, transfer->sr,
+ struct pipe_box sbox;
+
+ sbox.x = sbox.y = sbox.z = 0;
+ sbox.width = transfer->box.width;
+ sbox.height = transfer->box.height;
+ /* XXX that might be wrong */
+ sbox.depth = 1;
+ ctx->resource_copy_region(ctx, texture, transfer->level,
transfer->box.x, transfer->box.y, transfer->box.z,
- rtransfer->staging_texture, subsrc,
- 0, 0, 0,
- transfer->box.width, transfer->box.height);
+ rtransfer->staging_texture,
+ 0, &sbox);
ctx->flush(ctx, 0, NULL);
}
-static unsigned r600_texture_get_offset(struct r600_resource_texture *rtex,
- unsigned level, unsigned zslice,
- unsigned face)
+unsigned r600_texture_get_offset(struct r600_resource_texture *rtex,
+ unsigned level, unsigned layer)
{
unsigned offset = rtex->offset[level];
switch (rtex->resource.base.b.target) {
case PIPE_TEXTURE_3D:
- assert(face == 0);
- return offset + zslice * rtex->layer_size[level];
case PIPE_TEXTURE_CUBE:
- assert(zslice == 0);
- return offset + face * rtex->layer_size[level];
+ return offset + layer * rtex->layer_size[level];
default:
- assert(zslice == 0 && face == 0);
+ assert(layer == 0);
return offset;
}
}
@@ -175,10 +169,7 @@ static unsigned r600_texture_get_stride(struct pipe_screen *screen,
struct r600_resource_texture *rtex,
unsigned level)
{
- struct r600_screen* rscreen = (struct r600_screen *)screen;
struct pipe_resource *ptex = &rtex->resource.base.b;
- struct radeon *radeon = (struct radeon *)screen->winsys;
- enum chip_class chipc = r600_get_family_class(radeon);
unsigned width, stride, tile_width;
if (rtex->pitch_override)
@@ -212,11 +203,10 @@ static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen,
}
/* Get a width in pixels from a stride in bytes. */
-static unsigned pitch_to_width(enum pipe_format format,
- unsigned pitch_in_bytes)
+static unsigned pitch_to_width(enum pipe_format format, unsigned pitch_in_bytes)
{
- return (pitch_in_bytes / util_format_get_blocksize(format)) *
- util_format_get_blockwidth(format);
+ return (pitch_in_bytes / util_format_get_blocksize(format)) *
+ util_format_get_blockwidth(format);
}
static void r600_texture_set_array_mode(struct pipe_screen *screen,
@@ -335,12 +325,12 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
unsigned array_mode = 0;
- static int force_tiling = -1;
+ static int force_tiling = -1;
- /* Would like some magic "get_bool_option_once" routine.
+ /* Would like some magic "get_bool_option_once" routine.
*/
if (force_tiling == -1)
- force_tiling = debug_get_bool_option("R600_FORCE_TILING", FALSE);
+ force_tiling = debug_get_bool_option("R600_FORCE_TILING", FALSE);
if (force_tiling) {
if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) &&
@@ -371,8 +361,8 @@ static void r600_texture_destroy(struct pipe_screen *screen,
}
static boolean r600_texture_get_handle(struct pipe_screen* screen,
- struct pipe_resource *ptex,
- struct winsys_handle *whandle)
+ struct pipe_resource *ptex,
+ struct winsys_handle *whandle)
{
struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex;
struct r600_resource *resource = &rtex->resource;
@@ -382,36 +372,39 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
rtex->pitch_in_bytes[0], whandle);
}
-static struct pipe_surface *r600_get_tex_surface(struct pipe_screen *screen,
+static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
struct pipe_resource *texture,
- unsigned face, unsigned level,
- unsigned zslice, unsigned flags)
+ const struct pipe_surface *surf_tmpl)
{
struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture;
struct r600_surface *surface = CALLOC_STRUCT(r600_surface);
- unsigned offset, tile_height;
+ unsigned tile_height;
+ unsigned level = surf_tmpl->u.tex.level;
+ assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
if (surface == NULL)
return NULL;
- offset = r600_texture_get_offset(rtex, level, zslice, face);
+ /* XXX no offset */
+/* offset = r600_texture_get_offset(rtex, level, surf_tmpl->u.tex.first_layer);*/
pipe_reference_init(&surface->base.reference, 1);
pipe_resource_reference(&surface->base.texture, texture);
- surface->base.format = texture->format;
+ surface->base.context = pipe;
+ surface->base.format = surf_tmpl->format;
surface->base.width = mip_minify(texture->width0, level);
surface->base.height = mip_minify(texture->height0, level);
- surface->base.offset = offset;
- surface->base.usage = flags;
- surface->base.zslice = zslice;
+ surface->base.usage = surf_tmpl->usage;
surface->base.texture = texture;
- surface->base.face = face;
- surface->base.level = level;
+ surface->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer;
+ surface->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer;
+ surface->base.u.tex.level = level;
- tile_height = r600_get_height_alignment(screen, rtex->array_mode[level]);
+ tile_height = r600_get_height_alignment(pipe->screen, rtex->array_mode[level]);
surface->aligned_height = align(surface->base.height, tile_height);
return &surface->base;
}
-static void r600_tex_surface_destroy(struct pipe_surface *surface)
+static void r600_surface_destroy(struct pipe_context *pipe,
+ struct pipe_surface *surface)
{
pipe_resource_reference(&surface->texture, NULL);
FREE(surface);
@@ -444,14 +437,12 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
static unsigned int r600_texture_is_referenced(struct pipe_context *context,
struct pipe_resource *texture,
- unsigned face, unsigned level)
+ unsigned level, int layer)
{
/* FIXME */
return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
}
-int (*r600_blit_uncompress_depth_ptr)(struct pipe_context *ctx, struct r600_resource_texture *texture);
-
int r600_texture_depth_flush(struct pipe_context *ctx,
struct pipe_resource *texture)
{
@@ -483,7 +474,7 @@ int r600_texture_depth_flush(struct pipe_context *ctx,
out:
/* XXX: only do this if the depth texture has actually changed:
*/
- r600_blit_uncompress_depth_ptr(ctx, rtex);
+ r600_blit_uncompress_depth(ctx, rtex);
return 0;
}
@@ -491,7 +482,7 @@ out:
*/
static INLINE unsigned u_box_volume( const struct pipe_box *box )
{
- return box->width * box->depth * box->height;
+ return box->width * box->depth * box->height;
};
@@ -499,44 +490,44 @@ static INLINE unsigned u_box_volume( const struct pipe_box *box )
* If so, don't use a staging resource.
*/
static boolean permit_hardware_blit(struct pipe_screen *screen,
- struct pipe_resource *res)
+ struct pipe_resource *res)
{
- unsigned bind;
+ unsigned bind;
- if (util_format_is_depth_or_stencil(res->format))
- bind = PIPE_BIND_DEPTH_STENCIL;
- else
- bind = PIPE_BIND_RENDER_TARGET;
+ if (util_format_is_depth_or_stencil(res->format))
+ bind = PIPE_BIND_DEPTH_STENCIL;
+ else
+ bind = PIPE_BIND_RENDER_TARGET;
/* See r600_resource_copy_region: there is something wrong
- * with depth resource copies at the moment so avoid them for
- * now.
- */
+ * with depth resource copies at the moment so avoid them for
+ * now.
+ */
if (util_format_get_component_bits(res->format,
- UTIL_FORMAT_COLORSPACE_ZS,
- 0) != 0)
- return FALSE;
-
- if (!screen->is_format_supported(screen,
- res->format,
- res->target,
- res->nr_samples,
- bind, 0))
- return FALSE;
-
- if (!screen->is_format_supported(screen,
- res->format,
- res->target,
- res->nr_samples,
- PIPE_BIND_SAMPLER_VIEW, 0))
- return FALSE;
-
- return TRUE;
+ UTIL_FORMAT_COLORSPACE_ZS,
+ 0) != 0)
+ return FALSE;
+
+ if (!screen->is_format_supported(screen,
+ res->format,
+ res->target,
+ res->nr_samples,
+ bind, 0))
+ return FALSE;
+
+ if (!screen->is_format_supported(screen,
+ res->format,
+ res->target,
+ res->nr_samples,
+ PIPE_BIND_SAMPLER_VIEW, 0))
+ return FALSE;
+
+ return TRUE;
}
struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
struct pipe_resource *texture,
- struct pipe_subresource sr,
+ unsigned level,
unsigned usage,
const struct pipe_box *box)
{
@@ -556,37 +547,36 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
if (rtex->tiled)
use_staging_texture = TRUE;
- if ((usage & PIPE_TRANSFER_READ) &&
- u_box_volume(box) > 1024)
- use_staging_texture = TRUE;
-
- /* XXX: Use a staging texture for uploads if the underlying BO
- * is busy. No interface for checking that currently? so do
- * it eagerly whenever the transfer doesn't require a readback
- * and might block.
- */
- if ((usage & PIPE_TRANSFER_WRITE) &&
- !(usage & (PIPE_TRANSFER_READ |
- PIPE_TRANSFER_DONTBLOCK |
- PIPE_TRANSFER_UNSYNCHRONIZED)))
- use_staging_texture = TRUE;
-
- if (!permit_hardware_blit(ctx->screen, texture) ||
- (texture->flags & R600_RESOURCE_FLAG_TRANSFER))
- use_staging_texture = FALSE;
+ if ((usage & PIPE_TRANSFER_READ) && u_box_volume(box) > 1024)
+ use_staging_texture = TRUE;
+
+ /* XXX: Use a staging texture for uploads if the underlying BO
+ * is busy. No interface for checking that currently? so do
+ * it eagerly whenever the transfer doesn't require a readback
+ * and might block.
+ */
+ if ((usage & PIPE_TRANSFER_WRITE) &&
+ !(usage & (PIPE_TRANSFER_READ |
+ PIPE_TRANSFER_DONTBLOCK |
+ PIPE_TRANSFER_UNSYNCHRONIZED)))
+ use_staging_texture = TRUE;
+
+ if (!permit_hardware_blit(ctx->screen, texture) ||
+ (texture->flags & R600_RESOURCE_FLAG_TRANSFER))
+ use_staging_texture = FALSE;
trans = CALLOC_STRUCT(r600_transfer);
if (trans == NULL)
return NULL;
pipe_resource_reference(&trans->transfer.resource, texture);
- trans->transfer.sr = sr;
+ trans->transfer.level = level;
trans->transfer.usage = usage;
trans->transfer.box = *box;
if (rtex->depth) {
- /* XXX: only readback the rectangle which is being mapped?
- */
- /* XXX: when discard is true, no need to read back from depth texture
- */
+ /* XXX: only readback the rectangle which is being mapped?
+ */
+ /* XXX: when discard is true, no need to read back from depth texture
+ */
r = r600_texture_depth_flush(ctx, texture);
if (r < 0) {
R600_ERR("failed to create temporary texture to hold untiled copy\n");
@@ -600,6 +590,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
resource.width0 = box->width;
resource.height0 = box->height;
resource.depth0 = 1;
+ resource.array_size = 1;
resource.last_level = 0;
resource.nr_samples = 0;
resource.usage = PIPE_USAGE_STAGING;
@@ -625,7 +616,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
}
trans->transfer.stride =
- ((struct r600_resource_texture *)trans->staging_texture)->pitch_in_bytes[0];
+ ((struct r600_resource_texture *)trans->staging_texture)->pitch_in_bytes[0];
if (usage & PIPE_TRANSFER_READ) {
r600_copy_to_staging_texture(ctx, trans);
/* Always referenced in the blit. */
@@ -633,8 +624,8 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
}
return &trans->transfer;
}
- trans->transfer.stride = rtex->pitch_in_bytes[sr.level];
- trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face);
+ trans->transfer.stride = rtex->pitch_in_bytes[level];
+ trans->offset = r600_texture_get_offset(rtex, level, box->z);
return &trans->transfer;
}
@@ -747,10 +738,10 @@ struct u_resource_vtbl r600_texture_vtbl =
u_default_transfer_inline_write /* transfer_inline_write */
};
-void r600_init_screen_texture_functions(struct pipe_screen *screen)
+void r600_init_surface_functions(struct r600_pipe_context *r600)
{
- screen->get_tex_surface = r600_get_tex_surface;
- screen->tex_surface_destroy = r600_tex_surface_destroy;
+ r600->context.create_surface = r600_create_surface;
+ r600->context.surface_destroy = r600_surface_destroy;
}
static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
@@ -851,8 +842,8 @@ uint32_t r600_translate_texformat(enum pipe_format format,
case UTIL_FORMAT_COLORSPACE_YUV:
yuv_format |= (1 << 30);
switch (format) {
- case PIPE_FORMAT_UYVY:
- case PIPE_FORMAT_YUYV:
+ case PIPE_FORMAT_UYVY:
+ case PIPE_FORMAT_YUYV:
default:
break;
}
@@ -870,29 +861,29 @@ uint32_t r600_translate_texformat(enum pipe_format format,
/* S3TC formats. TODO */
if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
- static int r600_enable_s3tc = -1;
+ static int r600_enable_s3tc = -1;
- if (r600_enable_s3tc == -1)
- r600_enable_s3tc =
- debug_get_bool_option("R600_ENABLE_S3TC", FALSE);
+ if (r600_enable_s3tc == -1)
+ r600_enable_s3tc =
+ debug_get_bool_option("R600_ENABLE_S3TC", FALSE);
- if (!r600_enable_s3tc)
- goto out_unknown;
+ if (!r600_enable_s3tc)
+ goto out_unknown;
switch (format) {
case PIPE_FORMAT_DXT1_RGB:
case PIPE_FORMAT_DXT1_RGBA:
- result = FMT_BC1;
- goto out_word4;
+ result = FMT_BC1;
+ goto out_word4;
case PIPE_FORMAT_DXT3_RGBA:
- result = FMT_BC2;
- goto out_word4;
+ result = FMT_BC2;
+ goto out_word4;
case PIPE_FORMAT_DXT5_RGBA:
- result = FMT_BC3;
- goto out_word4;
- default:
- goto out_unknown;
- }
+ result = FMT_BC3;
+ goto out_word4;
+ default:
+ goto out_unknown;
+ }
}
diff --git a/src/gallium/drivers/r600/r600_translate.c b/src/gallium/drivers/r600/r600_translate.c
index 9a07cf2073f..f80fa7af941 100644
--- a/src/gallium/drivers/r600/r600_translate.c
+++ b/src/gallium/drivers/r600/r600_translate.c
@@ -41,6 +41,8 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx)
struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer;
struct pipe_resource *out_buffer;
unsigned i, num_verts;
+ struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS];
+ void *tmp;
/* Initialize the translate key, i.e. the recipe how vertices should be
* translated. */
@@ -51,9 +53,7 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx)
unsigned output_format_size = ve->hw_format_size[i];
/* Check for support. */
- if (ve->elements[i].src_format == ve->hw_format[i] &&
- (vb->buffer_offset + ve->elements[i].src_offset) % 4 == 0 &&
- vb->stride % 4 == 0) {
+ if (ve->elements[i].src_format == ve->hw_format[i]) {
continue;
}
@@ -125,12 +125,11 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx)
/* Unmap all buffers. */
for (i = 0; i < rctx->nvertex_buffer; i++) {
if (vb_translated[i]) {
- pipe_buffer_unmap(pipe, rctx->vertex_buffer[i].buffer,
- vb_transfer[i]);
+ pipe_buffer_unmap(pipe, vb_transfer[i]);
}
}
- pipe_buffer_unmap(pipe, out_buffer, out_transfer);
+ pipe_buffer_unmap(pipe, out_transfer);
/* Setup the new vertex buffer in the first free slot. */
for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
@@ -147,29 +146,23 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx)
}
/* Save and replace vertex elements. */
- {
- struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS];
-
- rctx->tran.saved_velems = rctx->vertex_elements;
-
- for (i = 0; i < ve->count; i++) {
- if (vb_translated[ve->elements[i].vertex_buffer_index]) {
- te = &key.element[tr_elem_index[i]];
- new_velems[i].instance_divisor = ve->elements[i].instance_divisor;
- new_velems[i].src_format = te->output_format;
- new_velems[i].src_offset = te->output_offset;
- new_velems[i].vertex_buffer_index = rctx->tran.vb_slot;
- } else {
- memcpy(&new_velems[i], &ve->elements[i],
- sizeof(struct pipe_vertex_element));
- }
+ for (i = 0; i < ve->count; i++) {
+ if (vb_translated[ve->elements[i].vertex_buffer_index]) {
+ te = &key.element[tr_elem_index[i]];
+ new_velems[i].instance_divisor = ve->elements[i].instance_divisor;
+ new_velems[i].src_format = te->output_format;
+ new_velems[i].src_offset = te->output_offset;
+ new_velems[i].vertex_buffer_index = rctx->tran.vb_slot;
+ } else {
+ memcpy(&new_velems[i], &ve->elements[i],
+ sizeof(struct pipe_vertex_element));
}
-
- rctx->tran.new_velems =
- pipe->create_vertex_elements_state(pipe, ve->count, new_velems);
- pipe->bind_vertex_elements_state(pipe, rctx->tran.new_velems);
}
+ tmp = pipe->create_vertex_elements_state(pipe, ve->count, new_velems);
+ pipe->bind_vertex_elements_state(pipe, tmp);
+ rctx->tran.new_velems = tmp;
+
pipe_resource_reference(&out_buffer, NULL);
}
@@ -177,13 +170,15 @@ void r600_end_vertex_translate(struct r600_pipe_context *rctx)
{
struct pipe_context *pipe = &rctx->context;
+ if (rctx->tran.new_velems == NULL) {
+ return;
+ }
/* Restore vertex elements. */
- pipe->bind_vertex_elements_state(pipe, rctx->tran.saved_velems);
pipe->delete_vertex_elements_state(pipe, rctx->tran.new_velems);
+ rctx->tran.new_velems = NULL;
/* Delete the now-unused VBO. */
- pipe_resource_reference(&rctx->vertex_buffer[rctx->tran.vb_slot].buffer,
- NULL);
+ pipe_resource_reference(&rctx->vertex_buffer[rctx->tran.vb_slot].buffer, NULL);
}
void r600_translate_index_buffer(struct r600_pipe_context *r600,
@@ -197,14 +192,7 @@ void r600_translate_index_buffer(struct r600_pipe_context *r600,
*index_size = 2;
*start = 0;
break;
-
case 2:
- if (*start % 2 != 0) {
- util_rebuild_ushort_elts(&r600->context, index_buffer, 0, *start, count);
- *start = 0;
- }
- break;
-
case 4:
break;
}
diff --git a/src/gallium/drivers/r600/r600_upload.c b/src/gallium/drivers/r600/r600_upload.c
new file mode 100644
index 00000000000..44102ff55b6
--- /dev/null
+++ b/src/gallium/drivers/r600/r600_upload.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jerome Glisse <jglisse@redhat.com>
+ */
+#include <errno.h>
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "r600.h"
+#include "r600_pipe.h"
+#include "r600_resource.h"
+
+struct r600_upload {
+ struct r600_pipe_context *rctx;
+ struct r600_bo *buffer;
+ char *ptr;
+ unsigned size;
+ unsigned default_size;
+ unsigned total_alloc_size;
+ unsigned offset;
+ unsigned alignment;
+};
+
+struct r600_upload *r600_upload_create(struct r600_pipe_context *rctx,
+ unsigned default_size,
+ unsigned alignment)
+{
+ struct r600_upload *upload = CALLOC_STRUCT(r600_upload);
+
+ if (upload == NULL)
+ return NULL;
+
+ upload->rctx = rctx;
+ upload->size = 0;
+ upload->default_size = default_size;
+ upload->alignment = alignment;
+ upload->ptr = NULL;
+ upload->buffer = NULL;
+ upload->total_alloc_size = 0;
+
+ return upload;
+}
+
+void r600_upload_flush(struct r600_upload *upload)
+{
+ if (upload->buffer) {
+ r600_bo_reference(upload->rctx->radeon, &upload->buffer, NULL);
+ }
+ upload->default_size = MAX2(upload->total_alloc_size, upload->default_size);
+ upload->total_alloc_size = 0;
+ upload->size = 0;
+ upload->offset = 0;
+ upload->ptr = NULL;
+ upload->buffer = NULL;
+}
+
+void r600_upload_destroy(struct r600_upload *upload)
+{
+ r600_upload_flush(upload);
+ FREE(upload);
+}
+
+int r600_upload_buffer(struct r600_upload *upload, unsigned offset,
+ unsigned size, struct r600_resource_buffer *in_buffer,
+ unsigned *out_offset, unsigned *out_size,
+ struct r600_bo **out_buffer)
+{
+ unsigned alloc_size = align(size, upload->alignment);
+ const void *in_ptr = NULL;
+
+ if (upload->offset + alloc_size > upload->size) {
+ if (upload->size) {
+ r600_bo_reference(upload->rctx->radeon, &upload->buffer, NULL);
+ }
+ upload->size = align(MAX2(upload->default_size, alloc_size), 4096);
+ upload->total_alloc_size += upload->size;
+ upload->offset = 0;
+ upload->buffer = r600_bo(upload->rctx->radeon, upload->size, 4096, PIPE_BIND_VERTEX_BUFFER, 0);
+ if (upload->buffer == NULL) {
+ return -ENOMEM;
+ }
+ upload->ptr = r600_bo_map(upload->rctx->radeon, upload->buffer, 0, NULL);
+ }
+
+ in_ptr = in_buffer->user_buffer;
+ memcpy(upload->ptr + upload->offset, (uint8_t *) in_ptr + offset, size);
+ *out_offset = upload->offset;
+ *out_size = upload->size;
+ *out_buffer = NULL;
+ r600_bo_reference(upload->rctx->radeon, out_buffer, upload->buffer);
+ upload->offset += alloc_size;
+
+ return 0;
+}
diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c
index 892dee86baf..a7f2f54736e 100644
--- a/src/gallium/drivers/r600/r700_asm.c
+++ b/src/gallium/drivers/r600/r700_asm.c
@@ -29,8 +29,6 @@
int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
{
- unsigned i;
-
bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
@@ -61,18 +59,11 @@ int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
+ S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) |
S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) |
S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) |
S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate);
}
- if (alu->last) {
- if (alu->nliteral && !alu->literal_added) {
- R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst);
- }
- for (i = 0; i < alu->nliteral; i++) {
- bc->bytecode[id++] = alu->value[i];
- }
- }
return 0;
}