diff options
Diffstat (limited to 'src/gallium/drivers/r300')
31 files changed, 3550 insertions, 913 deletions
diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index e83d943cd84..0e4e1155325 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -7,15 +7,17 @@ C_SOURCES = \ r300_chipset.c \ r300_clear.c \ r300_context.c \ + r300_debug.c \ r300_emit.c \ r300_flush.c \ + r300_query.c \ r300_screen.c \ r300_state.c \ + r300_state_derived.c \ + r300_state_invariant.c \ r300_state_shader.c \ r300_surface.c \ r300_swtcl_emit.c \ r300_texture.c include ../../Makefile.template - -symlinks: diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 7def62422a0..e01a0546b22 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -21,7 +21,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_chipset.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" /* r300_chipset: A file all to itself for deducing the various properties of * Radeons. */ @@ -30,7 +30,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) { /* Reasonable defaults */ - caps->has_tcl = TRUE; + caps->has_tcl = getenv("RADEON_NO_TCL") ? TRUE : FALSE; caps->is_r500 = FALSE; caps->num_vert_fpus = 4; @@ -204,6 +204,13 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->has_tcl = FALSE; break; + case 0x793F: + case 0x7941: + case 0x7942: + caps->family = CHIP_FAMILY_RS600; + caps->has_tcl = FALSE; + break; + case 0x796C: case 0x796D: case 0x796E: @@ -343,6 +350,6 @@ void r300_parse_chipset(struct r300_capabilities* caps) break; } - /* Force off TCL for now */ - caps->has_tcl = FALSE; + /* XXX SW TCL is broken so no forcing it off right now + caps->has_tcl = FALSE; */ } diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index a9cd372ec55..21eebeae600 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -64,6 +64,7 @@ enum { CHIP_FAMILY_RC410, CHIP_FAMILY_RS480, CHIP_FAMILY_RS482, + CHIP_FAMILY_RS600, CHIP_FAMILY_RS690, CHIP_FAMILY_RS740, CHIP_FAMILY_RV515, diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 7b605ae87ac..b8584702aad 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -22,18 +22,87 @@ #include "r300_context.h" +static boolean r300_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) +{ + struct r300_context* r300 = r300_context(pipe); + int i; + + if (r300->dirty_state) { + r300_emit_dirty_state(r300); + } + + for (i = 0; i < r300->vertex_buffer_count; i++) { + void* buf = pipe_buffer_map(pipe->screen, + r300->vertex_buffers[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(r300->draw, i, buf); + } + + if (indexBuffer) { + void* indices = pipe_buffer_map(pipe->screen, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer_range(r300->draw, indexSize, + minIndex, maxIndex, indices); + } else { + draw_set_mapped_element_buffer(r300->draw, 0, NULL); + } + + draw_set_mapped_constant_buffer(r300->draw, + r300->shader_constants[PIPE_SHADER_VERTEX].constants, + r300->shader_constants[PIPE_SHADER_VERTEX].user_count * + (sizeof(float) * 4)); + + draw_arrays(r300->draw, mode, start, count); + + for (i = 0; i < r300->vertex_buffer_count; i++) { + pipe_buffer_unmap(pipe->screen, r300->vertex_buffers[i].buffer); + draw_set_mapped_vertex_buffer(r300->draw, i, NULL); + } + + if (indexBuffer) { + pipe_buffer_unmap(pipe->screen, indexBuffer); + draw_set_mapped_element_buffer_range(r300->draw, 0, start, + start + count - 1, NULL); + } + + return TRUE; +} + +static boolean r300_draw_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, unsigned mode, + unsigned start, unsigned count) +{ + return r300_draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0, + mode, start, count); +} + +static boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, + unsigned start, unsigned count) +{ + return r300_draw_elements(pipe, NULL, 0, mode, start, count); +} + static void r300_destroy_context(struct pipe_context* context) { struct r300_context* r300 = r300_context(context); draw_destroy(r300->draw); FREE(r300->blend_color_state); + FREE(r300->rs_block); FREE(r300->scissor_state); + FREE(r300->viewport_state); FREE(r300); } struct pipe_context* r300_create_context(struct pipe_screen* screen, - struct pipe_winsys* winsys, struct r300_winsys* r300_winsys) { struct r300_context* r300 = CALLOC_STRUCT(r300_context); @@ -41,26 +110,37 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, if (!r300) return NULL; + /* XXX this could be refactored now? */ r300->winsys = r300_winsys; - r300->context.winsys = winsys; - r300->context.screen = r300_create_screen(winsys, r300_winsys); + + r300->context.winsys = (struct pipe_winsys*)r300_winsys; + r300->context.screen = r300_screen(screen); r300->context.destroy = r300_destroy_context; r300->context.clear = r300_clear; + r300->context.draw_arrays = r300_draw_arrays; + r300->context.draw_elements = r300_draw_elements; + r300->context.draw_range_elements = r300_draw_range_elements; + r300->draw = draw_create(); - /*XXX draw_set_rasterize_stage(r300->draw, r300_draw_swtcl_stage(r300));*/ + draw_set_rasterize_stage(r300->draw, r300_draw_swtcl_stage(r300)); r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state); + r300->rs_block = CALLOC_STRUCT(r300_rs_block); r300->scissor_state = CALLOC_STRUCT(r300_scissor_state); + r300->viewport_state = CALLOC_STRUCT(r300_viewport_state); r300_init_flush_functions(r300); + r300_init_query_functions(r300); + r300_init_surface_functions(r300); r300_init_state_functions(r300); + r300_emit_invariant_state(r300); r300->dirty_state = R300_NEW_KITCHEN_SINK; r300->dirty_hw++; diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 376c57639d8..0e5e471d116 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -24,11 +24,13 @@ #define R300_CONTEXT_H #include "draw/draw_context.h" +#include "draw/draw_vertex.h" #include "pipe/p_context.h" #include "tgsi/tgsi_scan.h" #include "util/u_memory.h" #include "r300_clear.h" +#include "r300_query.h" #include "r300_screen.h" #include "r300_winsys.h" @@ -58,8 +60,12 @@ struct r300_dsa_state { }; struct r300_rs_state { + /* XXX icky as fucking hell */ + struct pipe_rasterizer_state rs; + uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ + uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */ uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */ uint32_t depth_scale_front; /* R300_SU_POLY_OFFSET_FRONT_SCALE: 0x42a4 */ uint32_t depth_offset_front;/* R300_SU_POLY_OFFSET_FRONT_OFFSET: 0x42a8 */ @@ -69,6 +75,14 @@ struct r300_rs_state { uint32_t cull_mode; /* R300_SU_CULL_MODE: 0x42b8 */ uint32_t line_stipple_config; /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */ uint32_t line_stipple_value; /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */ + uint32_t color_control; /* R300_GA_COLOR_CONTROL: 0x4278 */ +}; + +struct r300_rs_block { + uint32_t ip[8]; /* R300_RS_IP_[0-7], R500_RS_IP_[0-7] */ + uint32_t count; /* R300_RS_COUNT */ + uint32_t inst_count; /* R300_RS_INST_COUNT */ + uint32_t inst[8]; /* R300_RS_INST_[0-7] */ }; struct r300_sampler_state { @@ -83,23 +97,52 @@ struct r300_scissor_state { }; struct r300_texture_state { + uint32_t format0; /* R300_TX_FORMAT0: 0x4480 */ + uint32_t format1; /* R300_TX_FORMAT1: 0x44c0 */ + uint32_t format2; /* R300_TX_FORMAT2: 0x4500 */ +}; + +struct r300_viewport_state { + float xscale; /* R300_VAP_VPORT_XSCALE: 0x2098 */ + float xoffset; /* R300_VAP_VPORT_XOFFSET: 0x209c */ + float yscale; /* R300_VAP_VPORT_YSCALE: 0x20a0 */ + float yoffset; /* R300_VAP_VPORT_YOFFSET: 0x20a4 */ + float zscale; /* R300_VAP_VPORT_ZSCALE: 0x20a8 */ + float zoffset; /* R300_VAP_VPORT_ZOFFSET: 0x20ac */ + uint32_t vte_control; /* R300_VAP_VTE_CNTL: 0x20b0 */ }; -#define R300_NEW_BLEND 0x000001 -#define R300_NEW_BLEND_COLOR 0x000002 -#define R300_NEW_DSA 0x000004 -#define R300_NEW_FRAMEBUFFERS 0x000008 -#define R300_NEW_FRAGMENT_SHADER 0x000010 -#define R300_NEW_RASTERIZER 0x000020 -#define R300_NEW_SAMPLER 0x000040 -#define R300_NEW_SCISSOR 0x004000 -#define R300_NEW_TEXTURE 0x008000 -#define R300_NEW_VERTEX_SHADER 0x800000 -#define R300_NEW_KITCHEN_SINK 0xffffff +#define R300_NEW_BLEND 0x0000001 +#define R300_NEW_BLEND_COLOR 0x0000002 +#define R300_NEW_CONSTANTS 0x0000004 +#define R300_NEW_DSA 0x0000008 +#define R300_NEW_FRAMEBUFFERS 0x0000010 +#define R300_NEW_FRAGMENT_SHADER 0x0000020 +#define R300_NEW_RASTERIZER 0x0000040 +#define R300_NEW_RS_BLOCK 0x0000080 +#define R300_NEW_SAMPLER 0x0000100 +#define R300_ANY_NEW_SAMPLERS 0x000ff00 +#define R300_NEW_SCISSOR 0x0010000 +#define R300_NEW_TEXTURE 0x0020000 +#define R300_ANY_NEW_TEXTURES 0x1fe0000 +#define R300_NEW_VERTEX_FORMAT 0x2000000 +#define R300_NEW_VERTEX_SHADER 0x4000000 +#define R300_NEW_VIEWPORT 0x8000000 +#define R300_NEW_KITCHEN_SINK 0xfffffff /* The next several objects are not pure Radeon state; they inherit from * various Gallium classes. */ +struct r300_constant_buffer { + /* Buffer of constants */ + /* XXX first number should be raised */ + float constants[8][4]; + /* Number of user-defined constants */ + int user_count; + /* Total number of constants */ + int count; +}; + struct r3xx_fragment_shader { /* Parent class */ struct pipe_shader_state state; @@ -107,16 +150,55 @@ struct r3xx_fragment_shader { /* Has this shader been translated yet? */ boolean translated; + + /* Pixel stack size */ + int stack_size; }; struct r300_fragment_shader { /* Parent class */ struct r3xx_fragment_shader shader; + + /* Number of ALU instructions */ + int alu_instruction_count; + + /* Number of texture instructions */ + int tex_instruction_count; + + /* Number of texture indirections */ + int indirections; + + /* Indirection node offsets */ + int offset0; + int offset1; + int offset2; + int offset3; + + /* Machine instructions */ + struct { + uint32_t alu_rgb_inst; + uint32_t alu_rgb_addr; + uint32_t alu_alpha_inst; + uint32_t alu_alpha_addr; + } instructions[64]; /* XXX magic num */ }; struct r500_fragment_shader { /* Parent class */ struct r3xx_fragment_shader shader; + + /* Number of used instructions */ + int instruction_count; + + /* Machine instructions */ + struct { + uint32_t inst0; + uint32_t inst1; + uint32_t inst2; + uint32_t inst3; + uint32_t inst4; + uint32_t inst5; + } instructions[256]; /*< XXX magic number */ }; struct r300_texture { @@ -126,11 +208,30 @@ struct r300_texture { /* Offsets into the buffer. */ unsigned offset[PIPE_MAX_TEXTURE_LEVELS]; + /* Stride (pitch?) of this texture in bytes */ + unsigned stride; + /* Total size of this texture, in bytes. */ unsigned size; /* Pipe buffer backing this texture. */ struct pipe_buffer* buffer; + + /* Registers carrying texture format data. */ + struct r300_texture_state state; +}; + +struct r300_vertex_format { + /* Parent class */ + struct vertex_info vinfo; + /* R300_VAP_PROG_STREAK_CNTL_[0-7] */ + uint32_t vap_prog_stream_cntl[8]; + /* R300_VAP_PROG_STREAK_CNTL_EXT_[0-7] */ + uint32_t vap_prog_stream_cntl_ext[8]; + /* This is a map of VAP/SW TCL outputs into the GA/RS. + * tab[i] is the location of input i in GA/RS input memory. + * Named tab for historical reasons. */ + int tab[16]; }; struct r300_context { @@ -147,6 +248,8 @@ struct r300_context { struct r300_blend_state* blend_state; /* Blend color state. */ struct r300_blend_color_state* blend_color_state; + /* Shader constants. */ + struct r300_constant_buffer shader_constants[PIPE_SHADER_TYPES]; /* Depth, stencil, and alpha state. */ struct r300_dsa_state* dsa_state; /* Fragment shader. */ @@ -155,6 +258,8 @@ struct r300_context { struct pipe_framebuffer_state framebuffer_state; /* Rasterizer state. */ struct r300_rs_state* rs_state; + /* RS block state. */ + struct r300_rs_block* rs_block; /* Sampler states. */ struct r300_sampler_state* sampler_states[8]; int sampler_count; @@ -162,8 +267,14 @@ struct r300_context { struct r300_scissor_state* scissor_state; /* Texture states. */ struct r300_texture* textures[8]; - struct r300_texture_state* texture_states[8]; int texture_count; + /* Vertex buffers. */ + struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; + int vertex_buffer_count; + /* Vertex information. */ + struct r300_vertex_format vertex_info; + /* Viewport state. */ + struct r300_viewport_state* viewport_state; /* Bitmask of dirty state objects. */ uint32_t dirty_state; /* Flag indicating whether or not the HW is dirty. */ @@ -176,6 +287,7 @@ static struct r300_context* r300_context(struct pipe_context* context) { } /* Context initialization. */ +struct draw_stage* r300_draw_swtcl_stage(struct r300_context* r300); void r300_init_state_functions(struct r300_context* r300); void r300_init_surface_functions(struct r300_context* r300); @@ -183,7 +295,6 @@ void r300_init_surface_functions(struct r300_context* r300); * We'll just step out in that case... */ #ifndef R300_WINSYS_H struct pipe_context* r300_create_context(struct pipe_screen* screen, - struct pipe_winsys* winsys, struct r300_winsys* r300_winsys); #endif diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 385b61a096c..d8038ff1e19 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -23,27 +23,19 @@ #ifndef R300_CS_H #define R300_CS_H +#include "util/u_math.h" + #include "r300_reg.h" #include "r300_winsys.h" -/* Pack a 32-bit float into a dword. */ -static uint32_t pack_float_32(float f) -{ - union { - float f; - uint32_t u; - } u; - - u.f = f; - return u.u; -} - /* Yes, I know macros are ugly. However, they are much prettier than the code * that they neatly hide away, and don't have the cost of function setup,so * we're going to use them. */ #define MAX_CS_SIZE 64 * 1024 / 4 +#define VERY_VERBOSE_REGISTERS 0 + /* XXX stolen from radeon_drm.h */ #define RADEON_GEM_DOMAIN_CPU 0x1 #define RADEON_GEM_DOMAIN_GTT 0x2 @@ -80,13 +72,14 @@ static uint32_t pack_float_32(float f) } while (0) #define OUT_CS_32F(value) do { \ - cs_winsys->write_cs_dword(cs, pack_float_32(value)); \ + cs_winsys->write_cs_dword(cs, fui(value)); \ cs_count--; \ } while (0) #define OUT_CS_REG(register, value) do { \ - debug_printf("r300: writing 0x%08X to register 0x%04X\n", \ - value, register); \ + if (VERY_VERBOSE_REGISTERS) \ + debug_printf("r300: writing 0x%08X to register 0x%04X\n", \ + value, register); \ assert(register); \ OUT_CS(CP_PACKET0(register, 0)); \ OUT_CS(value); \ @@ -95,8 +88,9 @@ static uint32_t pack_float_32(float f) /* Note: This expects count to be the number of registers, * not the actual packet0 count! */ #define OUT_CS_REG_SEQ(register, count) do { \ - debug_printf("r300: writing register sequence of %d to 0x%04X\n", \ - count, register); \ + if (VERY_VERBOSE_REGISTERS) \ + debug_printf("r300: writing register sequence of %d to 0x%04X\n", \ + count, register); \ assert(register); \ OUT_CS(CP_PACKET0(register, ((count) - 1))); \ } while (0) @@ -119,7 +113,7 @@ static uint32_t pack_float_32(float f) } while (0) #define FLUSH_CS do { \ - debug_printf("r300: FLUSH_CS in %s (%s:%d)\n", __FUNCTION__, __FILE__, \ + debug_printf("r300: FLUSH_CS in %s (%s:%d)\n\n", __FUNCTION__, __FILE__, \ __LINE__); \ cs_winsys->flush_cs(cs); \ } while (0) diff --git a/src/gallium/drivers/r300/r300_cs_inlines.h b/src/gallium/drivers/r300/r300_cs_inlines.h index 71e66236991..03bb608eb9a 100644 --- a/src/gallium/drivers/r300/r300_cs_inlines.h +++ b/src/gallium/drivers/r300/r300_cs_inlines.h @@ -26,12 +26,25 @@ #ifdef R300_CS_H +#define RADEON_ONE_REG_WR (1 << 15) + +#define OUT_CS_ONE_REG(register, count) do { \ + if (VERY_VERBOSE_REGISTERS) \ + debug_printf("r300: writing data sequence of %d to 0x%04X\n", \ + count, register); \ + assert(register); \ + OUT_CS(CP_PACKET0(register, ((count) - 1)) | RADEON_ONE_REG_WR); \ +} while (0) + #define R300_PACIFY do { \ + OUT_CS_REG(RADEON_WAIT_UNTIL, (1 << 14) | (1 << 15) | (1 << 16) | (1 << 17) | \ + (1 << 18)); \ +} while (0) + +#define R300_SCREENDOOR do { \ OUT_CS_REG(R300_SC_SCREENDOOR, 0x0); \ - OUT_CS_REG(RADEON_WAIT_UNTIL, (1 << 15) | (1 << 17) | \ - (1 << 18) | (1 << 31)); \ + R300_PACIFY; \ OUT_CS_REG(R300_SC_SCREENDOOR, 0xffffff); \ } while (0) - #endif /* R300_CS_H */ diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c new file mode 100644 index 00000000000..f657588c720 --- /dev/null +++ b/src/gallium/drivers/r300/r300_debug.c @@ -0,0 +1,218 @@ +/* + * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_debug.h" + +static char* r500_fs_swiz[] = { + " R", + " G", + " B", + " A", + " 0", + ".5", + " 1", + " U", +}; + +static char* r500_fs_op_rgb[] = { + "MAD", + "DP3", + "DP4", + "D2A", + "MIN", + "MAX", + "---", + "CND", + "CMP", + "FRC", + "SOP", + "MDH", + "MDV", +}; + +static char* r500_fs_op_alpha[] = { + "MAD", + " DP", + "MIN", + "MAX", + "---", + "CND", + "CMP", + "FRC", + "EX2", + "LN2", + "RCP", + "RSQ", + "SIN", + "COS", + "MDH", + "MDV", +}; + +static char* r500_fs_mask[] = { + "NONE", + "R ", + " G ", + "RG ", + " B ", + "R B ", + " GB ", + "RGB ", + " A", + "R A", + " G A", + "RG A", + " BA", + "R BA", + " GBA", + "RGBA", +}; + +static char* r500_fs_tex[] = { + " NOP", + " LD", + "TEXKILL", + " PROJ", + "LODBIAS", + " LOD", + " DXDY", +}; + +void r500_fs_dump(struct r500_fragment_shader* fs) +{ + int i; + uint32_t inst; + + for (i = 0; i < fs->instruction_count; i++) { + inst = fs->instructions[i].inst0; + debug_printf("%d: 0: CMN_INST 0x%08x:", i, inst); + switch (inst & 0x3) { + case R500_INST_TYPE_ALU: + debug_printf("ALU "); + break; + case R500_INST_TYPE_OUT: + debug_printf("OUT "); + break; + case R500_INST_TYPE_FC: + debug_printf("FC "); + break; + case R500_INST_TYPE_TEX: + debug_printf("TEX "); + break; + } + debug_printf("%s %s %s %s ", + inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "", + inst & R500_INST_LAST ? "LAST" : "", + inst & R500_INST_NOP ? "NOP" : "", + inst & R500_INST_ALU_WAIT ? "ALU_WAIT" : ""); + debug_printf("wmask: %s omask: %s\n", + r500_fs_mask[(inst >> 11) & 0xf], + r500_fs_mask[(inst >> 15) & 0xf]); + switch (inst & 0x3) { + case R500_INST_TYPE_ALU: + case R500_INST_TYPE_OUT: + inst = fs->instructions[i].inst1; + debug_printf(" 1: RGB_ADDR 0x%08x:", inst); + debug_printf("Addr0: %d%c, Addr1: %d%c, " + "Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1 << 8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1 << 18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1 << 28)) ? 'c' : 't', + (inst >> 30)); + + inst = fs->instructions[i].inst2; + debug_printf(" 2: ALPHA_ADDR 0x%08x:", inst); + debug_printf("Addr0: %d%c, Addr1: %d%c, " + "Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1 << 8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1 << 18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1 << 28)) ? 'c' : 't', + (inst >> 30)); + + inst = fs->instructions[i].inst3; + debug_printf(" 3: RGB_INST 0x%08x:", inst); + debug_printf("rgb_A_src:%d %s/%s/%s %d " + "rgb_B_src:%d %s/%s/%s %d\n", + inst & 0x3, r500_fs_swiz[(inst >> 2) & 0x7], + r500_fs_swiz[(inst >> 5) & 0x7], + r500_fs_swiz[(inst >> 8) & 0x7], + (inst >> 11) & 0x3, (inst >> 13) & 0x3, + r500_fs_swiz[(inst >> 15) & 0x7], + r500_fs_swiz[(inst >> 18) & 0x7], + r500_fs_swiz[(inst >> 21) & 0x7], + (inst >> 24) & 0x3); + + inst = fs->instructions[i].inst4; + debug_printf(" 4: ALPHA_INST 0x%08x:", inst); + debug_printf("%s dest:%d%s alp_A_src:%d %s %d " + "alp_B_src:%d %s %d w:%d\n", + r500_fs_op_alpha[inst & 0xf], (inst >> 4) & 0x7f, + inst & (1<<11) ? "(rel)":"", (inst >> 12) & 0x3, + r500_fs_swiz[(inst >> 14) & 0x7], (inst >> 17) & 0x3, + (inst >> 19) & 0x3, r500_fs_swiz[(inst >> 21) & 0x7], + (inst >> 24) & 0x3, (inst >> 31) & 0x1); + + inst = fs->instructions[i].inst5; + debug_printf(" 5: RGBA_INST 0x%08x:", inst); + debug_printf("%s dest:%d%s rgb_C_src:%d %s/%s/%s %d " + "alp_C_src:%d %s %d\n", + r500_fs_op_rgb[inst & 0xf], (inst >> 4) & 0x7f, + inst & (1 << 11) ? "(rel)":"", (inst >> 12) & 0x3, + r500_fs_swiz[(inst >> 14) & 0x7], + r500_fs_swiz[(inst >> 17) & 0x7], + r500_fs_swiz[(inst >> 20) & 0x7], + (inst >> 23) & 0x3, (inst >> 25) & 0x3, + r500_fs_swiz[(inst >> 27) & 0x7], (inst >> 30) & 0x3); + break; + case R500_INST_TYPE_FC: + /* XXX don't even bother yet */ + break; + case R500_INST_TYPE_TEX: + inst = fs->instructions[i].inst1; + debug_printf(" 1: TEX_INST 0x%08x: id: %d " + "op:%s, %s, %s %s\n", + inst, (inst >> 16) & 0xf, + r500_fs_tex[(inst >> 22) & 0x7], + (inst & (1 << 25)) ? "ACQ" : "", + (inst & (1 << 26)) ? "IGNUNC" : "", + (inst & (1 << 27)) ? "UNSCALED" : "SCALED"); + + inst = fs->instructions[i].inst2; + debug_printf(" 2: TEX_ADDR 0x%08x: " + "src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", + inst, inst & 0x7f, inst & (1 << 7) ? "(rel)" : "", + r500_fs_swiz[(inst >> 8) & 0x3], + r500_fs_swiz[(inst >> 10) & 0x3], + r500_fs_swiz[(inst >> 12) & 0x3], + r500_fs_swiz[(inst >> 14) & 0x3], + (inst >> 16) & 0x7f, inst & (1 << 23) ? "(rel)" : "", + r500_fs_swiz[(inst >> 24) & 0x3], + r500_fs_swiz[(inst >> 26) & 0x3], + r500_fs_swiz[(inst >> 28) & 0x3], + r500_fs_swiz[(inst >> 30) & 0x3]); + + inst = fs->instructions[i].inst3; + debug_printf(" 3: TEX_DXDY 0x%08x\n", inst); + break; + } + } +} diff --git a/src/gallium/drivers/r300/r300_debug.h b/src/gallium/drivers/r300/r300_debug.h new file mode 100644 index 00000000000..de5d701ed9c --- /dev/null +++ b/src/gallium/drivers/r300/r300_debug.h @@ -0,0 +1,31 @@ +/* + * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_DEBUG_H +#define R300_DEBUG_H + +#include "r300_reg.h" +#include "r300_state_shader.h" + +void r500_fs_dump(struct r500_fragment_shader* fs); + +#endif /* R300_DEBUG_H */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 585a9e729d7..a2e771bd1b2 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -40,9 +40,9 @@ void r300_emit_blend_state(struct r300_context* r300, void r300_emit_blend_color_state(struct r300_context* r300, struct r300_blend_color_state* bc) { - struct r300_screen* r300screen = - (struct r300_screen*)r300->context.screen; + struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); + if (r300screen->caps->is_r500) { BEGIN_CS(3); OUT_CS_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2); @@ -59,9 +59,9 @@ void r300_emit_blend_color_state(struct r300_context* r300, void r300_emit_dsa_state(struct r300_context* r300, struct r300_dsa_state* dsa) { - struct r300_screen* r300screen = - (struct r300_screen*)r300->context.screen; + struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); + BEGIN_CS(r300screen->caps->is_r500 ? 8 : 8); OUT_CS_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); /* XXX figure out the r300 counterpart for this */ @@ -79,7 +79,95 @@ void r300_emit_dsa_state(struct r300_context* r300, END_CS; } -/* XXX add pitch, stride, z/stencil buf */ +void r300_emit_fragment_shader(struct r300_context* r300, + struct r300_fragment_shader* fs) +{ + CS_LOCALS(r300); + int i; + + BEGIN_CS(22); + + OUT_CS_REG(R300_US_CONFIG, MAX2(fs->indirections - 1, 0)); + OUT_CS_REG(R300_US_PIXSIZE, fs->shader.stack_size); + /* XXX figure out exactly how big the sizes are on this reg */ + OUT_CS_REG(R300_US_CODE_OFFSET, 0x0); + /* XXX figure these ones out a bit better kthnx */ + OUT_CS_REG(R300_US_CODE_ADDR_0, 0x0); + OUT_CS_REG(R300_US_CODE_ADDR_1, 0x0); + OUT_CS_REG(R300_US_CODE_ADDR_2, 0x0); + OUT_CS_REG(R300_US_CODE_ADDR_3, R300_RGBA_OUT); + + for (i = 0; i < fs->alu_instruction_count; i++) { + OUT_CS_REG(R300_US_ALU_RGB_INST_0 + (4 * i), + fs->instructions[i].alu_rgb_inst); + OUT_CS_REG(R300_US_ALU_RGB_ADDR_0 + (4 * i), + fs->instructions[i].alu_rgb_addr); + OUT_CS_REG(R300_US_ALU_ALPHA_INST_0 + (4 * i), + fs->instructions[i].alu_alpha_inst); + OUT_CS_REG(R300_US_ALU_ALPHA_ADDR_0 + (4 * i), + fs->instructions[i].alu_alpha_addr); + } + + END_CS; +} + +void r500_emit_fragment_shader(struct r300_context* r300, + struct r500_fragment_shader* fs) +{ + CS_LOCALS(r300); + struct r300_constant_buffer* constants = + &r300->shader_constants[PIPE_SHADER_FRAGMENT]; + int i; + + BEGIN_CS(9 + (fs->instruction_count * 6) + (constants->count ? 3 : 0) + + (constants->count * 4)); + OUT_CS_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); + OUT_CS_REG(R500_US_PIXSIZE, fs->shader.stack_size); + OUT_CS_REG(R500_US_CODE_ADDR, R500_US_CODE_START_ADDR(0) | + R500_US_CODE_END_ADDR(fs->instruction_count)); + + OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_INSTR); + OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, fs->instruction_count * 6); + for (i = 0; i < fs->instruction_count; i++) { + OUT_CS(fs->instructions[i].inst0); + OUT_CS(fs->instructions[i].inst1); + OUT_CS(fs->instructions[i].inst2); + OUT_CS(fs->instructions[i].inst3); + OUT_CS(fs->instructions[i].inst4); + OUT_CS(fs->instructions[i].inst5); + } + + if (constants->count) { + OUT_CS_REG(R500_GA_US_VECTOR_INDEX, + R500_GA_US_VECTOR_INDEX_TYPE_CONST); + OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, constants->count * 4); + for (i = 0; i < constants->count; i++) { + OUT_CS_32F(constants->constants[i][0]); + OUT_CS_32F(constants->constants[i][1]); + OUT_CS_32F(constants->constants[i][2]); + OUT_CS_32F(constants->constants[i][3]); + } + } + + END_CS; +} + +/* Translate pipe_format into US_OUT_FMT. Note that formats are stored from + * C3 to C0. */ +uint32_t translate_out_fmt(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + return R300_US_OUT_FMT_C4_8 | + R300_C0_SEL_B | R300_C1_SEL_G | + R300_C2_SEL_R | R300_C3_SEL_A; + default: + return R300_US_OUT_FMT_UNUSED; + } + return 0; +} + +/* XXX add pitch, stride, clean up */ void r300_emit_fb_state(struct r300_context* r300, struct pipe_framebuffer_state* fb) { @@ -87,23 +175,47 @@ void r300_emit_fb_state(struct r300_context* r300, struct r300_texture* tex; int i; - BEGIN_CS((3 * fb->nr_cbufs) + 6); + BEGIN_CS((6 * fb->nr_cbufs) + (fb->zsbuf ? 6 : 0) + 4); for (i = 0; i < fb->nr_cbufs; i++) { tex = (struct r300_texture*)fb->cbufs[i]->texture; OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + + OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), + translate_out_fmt(fb->cbufs[i]->format)); + } + + if (fb->zsbuf) { + tex = (struct r300_texture*)fb->zsbuf->texture; + OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); + OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + if (fb->zsbuf->format == PIPE_FORMAT_Z24S8_UNORM) { + OUT_CS_REG(R300_ZB_FORMAT, + R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL); + } else { + OUT_CS_REG(R300_ZB_FORMAT, 0x0); + } } - R300_PACIFY; + + OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, + R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | + R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); + OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, + R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | + R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); END_CS; } void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs) { - struct r300_screen* r300screen = - (struct r300_screen*)r300->context.screen; CS_LOCALS(r300); - BEGIN_CS(14); + + BEGIN_CS(20); OUT_CS_REG(R300_VAP_CNTL_STATUS, rs->vap_control_status); + OUT_CS_REG(R300_GA_POINT_SIZE, rs->point_size); + OUT_CS_REG_SEQ(R300_GA_POINT_MINMAX, 2); + OUT_CS(rs->point_minmax); + OUT_CS(rs->line_control); OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 6); OUT_CS(rs->depth_scale_front); OUT_CS(rs->depth_offset_front); @@ -113,43 +225,241 @@ void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs) OUT_CS(rs->cull_mode); OUT_CS_REG(R300_GA_LINE_STIPPLE_CONFIG, rs->line_stipple_config); OUT_CS_REG(R300_GA_LINE_STIPPLE_VALUE, rs->line_stipple_value); + OUT_CS_REG(R300_GA_COLOR_CONTROL, rs->color_control); + END_CS; +} + +void r300_emit_rs_block_state(struct r300_context* r300, + struct r300_rs_block* rs) +{ + struct r300_screen* r300screen = r300_screen(r300->context.screen); + CS_LOCALS(r300); + int i; + + BEGIN_CS(21); + if (r300screen->caps->is_r500) { + OUT_CS_REG_SEQ(R500_RS_IP_0, 8); + } else { + OUT_CS_REG_SEQ(R300_RS_IP_0, 8); + } + for (i = 0; i < 8; i++) { + OUT_CS(rs->ip[i]); + debug_printf("ip %d: 0x%08x\n", i, rs->ip[i]); + } + + OUT_CS_REG_SEQ(R300_RS_COUNT, 2); + OUT_CS(rs->count); + OUT_CS(rs->inst_count); + + if (r300screen->caps->is_r500) { + OUT_CS_REG_SEQ(R500_RS_INST_0, 8); + } else { + OUT_CS_REG_SEQ(R300_RS_INST_0, 8); + } + for (i = 0; i < 8; i++) { + OUT_CS(rs->inst[i]); + debug_printf("inst %d: 0x%08x\n", i, rs->inst[i]); + } + + debug_printf("count: 0x%08x inst_count: 0x%08x\n", rs->count, + rs->inst_count); + END_CS; } -static void r300_emit_dirty_state(struct r300_context* r300) +void r300_emit_sampler(struct r300_context* r300, + struct r300_sampler_state* sampler, unsigned offset) { - struct r300_screen* r300screen = - (struct r300_screen*)r300->context.screen; CS_LOCALS(r300); + BEGIN_CS(6); + OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), sampler->filter0); + OUT_CS_REG(R300_TX_FILTER1_0 + (offset * 4), sampler->filter1); + OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (offset * 4), sampler->border_color); + END_CS; +} + +void r300_emit_scissor_state(struct r300_context* r300, + struct r300_scissor_state* scissor) +{ + CS_LOCALS(r300); + + BEGIN_CS(3); + OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); + OUT_CS(scissor->scissor_top_left); + OUT_CS(scissor->scissor_bottom_right); + END_CS; +} + +void r300_emit_texture(struct r300_context* r300, + struct r300_texture* tex, unsigned offset) +{ + CS_LOCALS(r300); + + BEGIN_CS(10); + OUT_CS_REG(R300_TX_FORMAT0_0 + (offset * 4), tex->state.format0); + OUT_CS_REG(R300_TX_FORMAT1_0 + (offset * 4), tex->state.format1); + OUT_CS_REG(R300_TX_FORMAT2_0 + (offset * 4), tex->state.format2); + OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (offset * 4), 1); + OUT_CS_RELOC(tex->buffer, 0, + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_CS; +} + +void r300_emit_vertex_format_state(struct r300_context* r300) +{ + CS_LOCALS(r300); + int i; + + BEGIN_CS(26); + OUT_CS_REG(R300_VAP_VTX_SIZE, r300->vertex_info.vinfo.size); + + OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); + OUT_CS(r300->vertex_info.vinfo.hwfmt[0]); + OUT_CS(r300->vertex_info.vinfo.hwfmt[1]); + OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); + OUT_CS(r300->vertex_info.vinfo.hwfmt[2]); + OUT_CS(r300->vertex_info.vinfo.hwfmt[3]); + for (i = 0; i < 4; i++) { + debug_printf("hwfmt%d: 0x%08x\n", i, + r300->vertex_info.vinfo.hwfmt[i]); + } + + OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, 8); + for (i = 0; i < 8; i++) { + OUT_CS(r300->vertex_info.vap_prog_stream_cntl[i]); + debug_printf("prog_stream_cntl%d: 0x%08x\n", i, + r300->vertex_info.vap_prog_stream_cntl[i]); + } + OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, 8); + for (i = 0; i < 8; i++) { + OUT_CS(r300->vertex_info.vap_prog_stream_cntl_ext[i]); + debug_printf("prog_stream_cntl_ext%d: 0x%08x\n", i, + r300->vertex_info.vap_prog_stream_cntl_ext[i]); + } + END_CS; +} + +void r300_emit_viewport_state(struct r300_context* r300, + struct r300_viewport_state* viewport) +{ + return; + CS_LOCALS(r300); + + BEGIN_CS(7); + OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 7); + OUT_CS_32F(viewport->xscale); + OUT_CS_32F(viewport->xoffset); + OUT_CS_32F(viewport->yscale); + OUT_CS_32F(viewport->yoffset); + OUT_CS_32F(viewport->zscale); + OUT_CS_32F(viewport->zoffset); + OUT_CS(viewport->vte_control); + END_CS; +} + +static void r300_flush_textures(struct r300_context* r300) +{ + CS_LOCALS(r300); + + BEGIN_CS(4); + OUT_CS_REG(R300_TX_INVALTAGS, 0); + OUT_CS_REG(R300_TX_ENABLE, (1 << r300->texture_count) - 1); + END_CS; +} + +/* Emit all dirty state. */ +void r300_emit_dirty_state(struct r300_context* r300) +{ + struct r300_screen* r300screen = r300_screen(r300->context.screen); + int i; + int dirty_tex = 0; + if (!(r300->dirty_state) && !(r300->dirty_hw)) { return; } + r300_update_derived_state(r300); + /* XXX check size */ if (r300->dirty_state & R300_NEW_BLEND) { r300_emit_blend_state(r300, r300->blend_state); + r300->dirty_state &= ~R300_NEW_BLEND; } if (r300->dirty_state & R300_NEW_BLEND_COLOR) { r300_emit_blend_color_state(r300, r300->blend_color_state); + r300->dirty_state &= ~R300_NEW_BLEND_COLOR; } if (r300->dirty_state & R300_NEW_DSA) { r300_emit_dsa_state(r300, r300->dsa_state); + r300->dirty_state &= ~R300_NEW_DSA; + } + + if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER) { + if (r300screen->caps->is_r500) { + r500_emit_fragment_shader(r300, + (struct r500_fragment_shader*)r300->fs); + } else { + r300_emit_fragment_shader(r300, + (struct r300_fragment_shader*)r300->fs); + } + r300->dirty_state &= ~R300_NEW_FRAGMENT_SHADER; + } + + if (r300->dirty_state & R300_NEW_FRAMEBUFFERS) { + r300_emit_fb_state(r300, &r300->framebuffer_state); + r300->dirty_state &= ~R300_NEW_FRAMEBUFFERS; } if (r300->dirty_state & R300_NEW_RASTERIZER) { r300_emit_rs_state(r300, r300->rs_state); + r300->dirty_state &= ~R300_NEW_RASTERIZER; + } + + if (r300->dirty_state & R300_NEW_RS_BLOCK) { + r300_emit_rs_block_state(r300, r300->rs_block); + r300->dirty_state &= ~R300_NEW_RS_BLOCK; + } + + if (r300->dirty_state & R300_ANY_NEW_SAMPLERS) { + for (i = 0; i < r300->sampler_count; i++) { + if (r300->dirty_state & (R300_NEW_SAMPLER << i)) { + r300_emit_sampler(r300, r300->sampler_states[i], i); + r300->dirty_state &= ~(R300_NEW_SAMPLER << i); + dirty_tex++; + } + } } if (r300->dirty_state & R300_NEW_SCISSOR) { - struct r300_scissor_state* scissor = r300->scissor_state; - /* XXX next two are contiguous regs */ - OUT_CS_REG(R300_SC_SCISSORS_TL, scissor->scissor_top_left); - OUT_CS_REG(R300_SC_SCISSORS_BR, scissor->scissor_bottom_right); + r300_emit_scissor_state(r300, r300->scissor_state); + r300->dirty_state &= ~R300_NEW_SCISSOR; } - r300->dirty_state = 0; + if (r300->dirty_state & R300_ANY_NEW_TEXTURES) { + for (i = 0; i < r300->texture_count; i++) { + if (r300->dirty_state & (R300_NEW_TEXTURE << i)) { + r300_emit_texture(r300, r300->textures[i], i); + r300->dirty_state &= ~(R300_NEW_TEXTURE << i); + dirty_tex++; + } + } + } + + if (r300->dirty_state & R300_NEW_VIEWPORT) { + r300_emit_viewport_state(r300, r300->viewport_state); + r300->dirty_state &= ~R300_NEW_VIEWPORT; + } + + if (dirty_tex) { + r300_flush_textures(r300); + } + + if (r300->dirty_state & R300_NEW_VERTEX_FORMAT) { + r300_emit_vertex_format_state(r300); + r300->dirty_state &= ~R300_NEW_VERTEX_FORMAT; + } } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index b6e69386f95..4aba1ee08ce 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -20,6 +20,11 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#ifndef R300_EMIT_H +#define R300_EMIT_H + +#include "util/u_math.h" + #include "r300_context.h" #include "r300_cs.h" #include "r300_screen.h" @@ -33,4 +38,26 @@ void r300_emit_blend_color_state(struct r300_context* r300, void r300_emit_dsa_state(struct r300_context* r300, struct r300_dsa_state* dsa); +void r300_emit_fragment_shader(struct r300_context* r300, + struct r300_fragment_shader* fs); + +void r500_emit_fragment_shader(struct r300_context* r300, + struct r500_fragment_shader* fs); + +void r300_emit_fb_state(struct r300_context* r300, + struct pipe_framebuffer_state* fb); + void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs); + +void r300_emit_rs_block_state(struct r300_context* r300, + struct r300_rs_block* rs); + +void r300_emit_scissor_state(struct r300_context* r300, + struct r300_scissor_state* scissor); + +void r300_emit_vertex_format_state(struct r300_context* r300); + +/* Emit all dirty state. */ +void r300_emit_dirty_state(struct r300_context* r300); + +#endif /* R300_EMIT_H */ diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 3766f0a0a7b..20ca6905ad2 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -31,6 +31,7 @@ static void r300_flush(struct pipe_context* pipe, if (r300->dirty_hw) { FLUSH_CS; + r300_emit_invariant_state(r300); r300->dirty_state = R300_NEW_KITCHEN_SINK; r300->dirty_hw = 0; } diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c new file mode 100644 index 00000000000..5f5f4c4dbd4 --- /dev/null +++ b/src/gallium/drivers/r300/r300_query.c @@ -0,0 +1,111 @@ +/* + * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_query.h" + +static struct pipe_query* r300_create_query(struct pipe_context* pipe, + unsigned query_type) +{ + struct r300_query* q = CALLOC_STRUCT(r300_query); + + q->type = query_type; + assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); + + /* XXX this is to force winsys to give us a GTT buffer */ + q->buf = pipe->screen->buffer_create(pipe->screen, 64, + PIPE_BUFFER_USAGE_VERTEX, 64); + + return (struct pipe_query*)q; +} + +static void r300_destroy_query(struct pipe_context* pipe, + struct pipe_query* query) +{ + FREE(query); +} + +static void r300_begin_query(struct pipe_context* pipe, + struct pipe_query* query) +{ + struct r300_context* r300 = r300_context(pipe); + struct r300_query* q = (struct r300_query*)query; + CS_LOCALS(r300); + + uint32_t* map = pipe_buffer_map(pipe->screen, q->buf, + PIPE_BUFFER_USAGE_CPU_WRITE); + *map = ~0; + pipe_buffer_unmap(pipe->screen, q->buf); + + BEGIN_CS(2); + OUT_CS_REG(R300_ZB_ZPASS_DATA, 0); + END_CS; +} + +static void r300_end_query(struct pipe_context* pipe, + struct pipe_query* query) +{ + struct r300_context* r300 = r300_context(pipe); + struct r300_query* q = (struct r300_query*)query; + CS_LOCALS(r300); + + BEGIN_CS(4); + OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_CS_RELOC(q->buf, 0, 0, RADEON_GEM_DOMAIN_GTT, 0); + END_CS; +} + +static boolean r300_get_query_result(struct pipe_context* pipe, + struct pipe_query* query, + boolean wait, + uint64_t* result) +{ + struct r300_query* q = (struct r300_query*)query; + uint32_t temp; + + if (wait) { + /* Well, we're expected to just sit here and spin, so let's go ahead + * and flush so we can be sure that the card's spinning... */ + /* XXX double-check these params */ + pipe->flush(pipe, 0, NULL); + } + + uint32_t* map = pipe_buffer_map(pipe->screen, q->buf, + PIPE_BUFFER_USAGE_CPU_READ); + temp = *map; + pipe_buffer_unmap(pipe->screen, q->buf); + + if (temp < 0) { + /* Our results haven't been written yet... */ + return FALSE; + } + + *result = temp; + return TRUE; +} + +void r300_init_query_functions(struct r300_context* r300) { + r300->context.create_query = r300_create_query; + r300->context.destroy_query = r300_destroy_query; + r300->context.begin_query = r300_begin_query; + r300->context.end_query = r300_end_query; + r300->context.get_query_result = r300_get_query_result; +} diff --git a/src/gallium/drivers/r300/r300_query.h b/src/gallium/drivers/r300/r300_query.h new file mode 100644 index 00000000000..4f447ea45b8 --- /dev/null +++ b/src/gallium/drivers/r300/r300_query.h @@ -0,0 +1,44 @@ +/* + * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_QUERY_H +#define R300_QUERY_H + +#include "r300_context.h" +#include "r300_cs.h" +#include "r300_reg.h" + +struct r300_query { + /* The kind of query. Currently only OQ is supported. */ + unsigned type; + /* Buffer object where we want our results to reside. */ + struct pipe_buffer* buf; +}; + +static INLINE struct r300_query* r300_query(struct pipe_query* q) +{ + return (struct r300_query*)q; +} + +void r300_init_query_functions(struct r300_context* r300); + +#endif /* R300_QUERY_H */ diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index dbd0cc28e23..6f3ad970abc 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -64,7 +64,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_SE_VPORT_ZSCALE 0x1DA8 #define R300_SE_VPORT_ZOFFSET 0x1DAC - +#define R300_VAP_PORT_IDX0 0x2040 /* * Vertex Array Processing (VAP) Control */ @@ -139,17 +139,25 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_VAP_OUTPUT_VTX_FMT_1__3_COMPONENTS 3 # define R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS 4 -#define R300_SE_VTE_CNTL 0x20b0 -# define R300_VPORT_X_SCALE_ENA (1 << 0) -# define R300_VPORT_X_OFFSET_ENA (1 << 1) -# define R300_VPORT_Y_SCALE_ENA (1 << 2) -# define R300_VPORT_Y_OFFSET_ENA (1 << 3) -# define R300_VPORT_Z_SCALE_ENA (1 << 4) -# define R300_VPORT_Z_OFFSET_ENA (1 << 5) -# define R300_VTX_XY_FMT (1 << 8) -# define R300_VTX_Z_FMT (1 << 9) -# define R300_VTX_W0_FMT (1 << 10) -# define R300_SERIAL_PROC_ENA (1 << 11) +#define R300_VAP_VPORT_XSCALE 0x2098 +#define R300_VAP_VPORT_XOFFSET 0x209c +#define R300_VAP_VPORT_YSCALE 0x20a0 +#define R300_VAP_VPORT_YOFFSET 0x20a4 +#define R300_VAP_VPORT_ZSCALE 0x20a8 +#define R300_VAP_VPORT_ZOFFSET 0x20ac + +#define R300_VAP_VTE_CNTL 0x20b0 +#define R300_SE_VTE_CNTL R300_VAP_VTE_CNTL +# define R300_VPORT_X_SCALE_ENA (1 << 0) +# define R300_VPORT_X_OFFSET_ENA (1 << 1) +# define R300_VPORT_Y_SCALE_ENA (1 << 2) +# define R300_VPORT_Y_OFFSET_ENA (1 << 3) +# define R300_VPORT_Z_SCALE_ENA (1 << 4) +# define R300_VPORT_Z_OFFSET_ENA (1 << 5) +# define R300_VTX_XY_FMT (1 << 8) +# define R300_VTX_Z_FMT (1 << 9) +# define R300_VTX_W0_FMT (1 << 10) +# define R300_SERIAL_PROC_ENA (1 << 11) #define R300_VAP_VTX_SIZE 0x20b4 @@ -326,6 +334,14 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_WRITE_ENA_Z 4 # define R300_WRITE_ENA_W 8 # define R300_SWIZZLE1_SHIFT 16 + +# define R300_VAP_SWIZZLE_XYZW \ + ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \ + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | \ + (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | \ + (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | \ + (0xf << R300_WRITE_ENA_SHIFT)) + #define R300_VAP_PROG_STREAM_CNTL_EXT_1 0x21e4 #define R300_VAP_PROG_STREAM_CNTL_EXT_2 0x21e8 #define R300_VAP_PROG_STREAM_CNTL_EXT_3 0x21ec @@ -732,8 +748,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R500_RS_IP_TEX_PTR_Q_SHIFT 18 #define R500_RS_IP_COL_PTR_SHIFT 24 #define R500_RS_IP_COL_FMT_SHIFT 27 -# define R500_RS_COL_PTR(x) (x << 24) -# define R500_RS_COL_FMT(x) (x << 27) +# define R500_RS_SEL_S(x) ((x) << 0) +# define R500_RS_SEL_T(x) ((x) << 6) +# define R500_RS_SEL_R(x) ((x) << 12) +# define R500_RS_SEL_Q(x) ((x) << 18) +# define R500_RS_COL_PTR(x) ((x) << 24) +# define R500_RS_COL_FMT(x) ((x) << 27) /* gap */ #define R500_RS_IP_OFFSET_DIS (0 << 31) #define R500_RS_IP_OFFSET_EN (1 << 31) @@ -1019,20 +1039,27 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_THIRD (2 << 16) # define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST (3 << 16) -/** TODO: might be candidate for removal */ -# define R300_RE_SHADE_MODEL_SMOOTH ( \ - R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD | \ - R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \ - R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD | \ - R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \ - R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST ) -/** TODO: might be candidate for removal, the GOURAUD stuff also looks buggy to me */ -# define R300_RE_SHADE_MODEL_FLAT ( \ - R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT | \ - R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \ - R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT | \ - R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \ - R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST ) +# define R300_SHADE_MODEL_FLAT ( \ + R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT | \ + R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT | \ + R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT | \ + R300_GA_COLOR_CONTROL_ALPHA1_SHADING_FLAT | \ + R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT | \ + R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT | \ + R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | \ + R300_GA_COLOR_CONTROL_ALPHA3_SHADING_FLAT | \ + R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST ) + +# define R300_SHADE_MODEL_SMOOTH ( \ + R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST ) /* Specifies red & green components of fill color -- S312 format -- Backwards comp. */ #define R300_GA_SOLID_RG 0x427c @@ -1146,6 +1173,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_W_ADDR_MASK 0x0003f000 # define R300_HIRES_DIS (0 << 18) # define R300_HIRES_EN (1 << 18) +# define R300_IT_COUNT(x) ((x) << 0) +# define R300_IC_COUNT(x) ((x) << 7) +# define R300_W_COUNT(x) ((x) << 12) #define R300_RS_INST_COUNT 0x4304 # define R300_RS_INST_COUNT_SHIFT 0 @@ -1175,8 +1205,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_RS_INTERP_SRC_SHIFT 2 /* TODO: check for removal */ # define R300_RS_INTERP_SRC_MASK (7 << 2) /* TODO: check for removal */ # define R300_RS_TEX_PTR(x) (x << 0) -# define R300_RS_COL_PTR(x) (x << 6) -# define R300_RS_COL_FMT(x) (x << 9) +# define R300_RS_COL_PTR(x) ((x) << 6) +# define R300_RS_COL_FMT(x) ((x) << 9) # define R300_RS_COL_FMT_RGBA 0 # define R300_RS_COL_FMT_RGB0 1 # define R300_RS_COL_FMT_RGB1 2 @@ -1186,10 +1216,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_RS_COL_FMT_111A 8 # define R300_RS_COL_FMT_1110 9 # define R300_RS_COL_FMT_1111 10 -# define R300_RS_SEL_S(x) (x << 13) -# define R300_RS_SEL_T(x) (x << 16) -# define R300_RS_SEL_R(x) (x << 19) -# define R300_RS_SEL_Q(x) (x << 22) +# define R300_RS_SEL_S(x) ((x) << 13) +# define R300_RS_SEL_T(x) ((x) << 16) +# define R300_RS_SEL_R(x) ((x) << 19) +# define R300_RS_SEL_Q(x) ((x) << 22) # define R300_RS_SEL_C0 0 # define R300_RS_SEL_C1 1 # define R300_RS_SEL_C2 2 @@ -1216,14 +1246,18 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R500_RS_INST_14 0x4358 #define R500_RS_INST_15 0x435c #define R500_RS_INST_TEX_ID_SHIFT 0 +# define R500_RS_INST_TEX_ID(x) ((x) << 0) #define R500_RS_INST_TEX_CN_WRITE (1 << 4) #define R500_RS_INST_TEX_ADDR_SHIFT 5 +# define R500_RS_INST_TEX_ADDR(x) ((x) << 0) #define R500_RS_INST_COL_ID_SHIFT 12 +# define R500_RS_INST_COL_ID(x) ((x) << 12) #define R500_RS_INST_COL_CN_NO_WRITE (0 << 16) #define R500_RS_INST_COL_CN_WRITE (1 << 16) #define R500_RS_INST_COL_CN_WRITE_FBUFFER (2 << 16) #define R500_RS_INST_COL_CN_WRITE_BACKFACE (3 << 16) #define R500_RS_INST_COL_ADDR_SHIFT 18 +# define R500_RS_INST_COL_ADDR(x) ((x) << 18) #define R500_RS_INST_TEX_ADJ (1 << 25) #define R500_RS_INST_W_CN (1 << 26) @@ -1240,9 +1274,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_RS_INST_7 0x434C # define R300_RS_INST_TEX_ID(x) ((x) << 0) # define R300_RS_INST_TEX_CN_WRITE (1 << 3) +# define R300_RS_INST_TEX_ADDR(x) ((x) << 6) # define R300_RS_INST_TEX_ADDR_SHIFT 6 # define R300_RS_INST_COL_ID(x) ((x) << 11) # define R300_RS_INST_COL_CN_WRITE (1 << 14) +# define R300_RS_INST_COL_ADDR(x) ((x) << 17) # define R300_RS_INST_COL_ADDR_SHIFT 17 # define R300_RS_INST_TEX_ADJ (1 << 22) # define R300_RS_COL_BIAS_UNUSED_SHIFT 23 @@ -1411,18 +1447,21 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R500_MACRO_SWITCH (1<<22) # define R500_BORDER_FIX (1<<31) -#define R300_TX_SIZE_0 0x4480 +#define R300_TX_FORMAT0_0 0x4480 # define R300_TX_WIDTHMASK_SHIFT 0 # define R300_TX_WIDTHMASK_MASK (2047 << 0) # define R300_TX_HEIGHTMASK_SHIFT 11 # define R300_TX_HEIGHTMASK_MASK (2047 << 11) -# define R300_TX_DEPTHMASK_SHIFT 22 -# define R300_TX_DEPTHMASK_MASK (0xf << 22) +# define R300_TX_DEPTHMASK_SHIFT 22 +# define R300_TX_DEPTHMASK_MASK (0xf << 22) # define R300_TX_MAX_MIP_LEVEL_SHIFT 26 # define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 26) -# define R300_TX_SIZE_PROJECTED (1<<30) -# define R300_TX_SIZE_TXPITCH_EN (1<<31) -#define R300_TX_FORMAT_0 0x44C0 +# define R300_TX_SIZE_PROJECTED (1 << 30) +# define R300_TX_PITCH_EN (1 << 31) +# define R300_TX_WIDTH(x) ((x) << 0) +# define R300_TX_HEIGHT(x) ((x) << 11) + +#define R300_TX_FORMAT1_0 0x44C0 /* The interpretation of the format word by Wladimir van der Laan */ /* The X, Y, Z and W refer to the layout of the components. They are given meanings as R, G, B and Alpha by the swizzle @@ -1708,7 +1747,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_C3_SEL_R (1 << 14) # define R300_C3_SEL_G (2 << 14) # define R300_C3_SEL_B (3 << 14) -# define R300_OUT_SIGN(x) (x << 16) +# define R300_OUT_SIGN(x) ((x) << 16) +# define R500_ROUND_ADJ (1 << 20) /* ALU * The ALU instructions register blocks are enumerated according to the order @@ -1795,6 +1835,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ALU_DSTC_OUTPUT_X (1 << 26) # define R300_ALU_DSTC_OUTPUT_Y (1 << 27) # define R300_ALU_DSTC_OUTPUT_Z (1 << 28) +# define R300_ALU_DSTC_OUTPUT_XYZ (7 << 26) +# define R300_RGB_ADDR0(x) ((x) << 0) +# define R300_RGB_ADDR1(x) ((x) << 6) +# define R300_RGB_ADDR2(x) ((x) << 12) #define R300_US_ALU_ALPHA_ADDR_0 0x47C0 # define R300_ALU_SRC0A_SHIFT 0 @@ -1812,6 +1856,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ALU_DSTA_REG (1 << 23) # define R300_ALU_DSTA_OUTPUT (1 << 24) # define R300_ALU_DSTA_DEPTH (1 << 27) +# define R300_ALPHA_ADDR0(x) ((x) << 0) +# define R300_ALPHA_ADDR1(x) ((x) << 6) +# define R300_ALPHA_ADDR2(x) ((x) << 12) #define R300_US_ALU_RGB_INST_0 0x48C0 # define R300_ALU_ARGC_SRC0C_XYZ 0 @@ -1846,6 +1893,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ALU_ARGC_SRC0CA_WZY 29 # define R300_ALU_ARGC_SRC1CA_WZY 30 # define R300_ALU_ARGC_SRC2CA_WZY 31 +# define R300_RGB_SWIZA(x) ((x) << 0) +# define R300_RGB_SWIZB(x) ((x) << 7) +# define R300_RGB_SWIZC(x) ((x) << 14) # define R300_ALU_ARG0C_SHIFT 0 # define R300_ALU_ARG0C_MASK (31 << 0) @@ -1909,10 +1959,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ALU_ARGA_SRCP_Y 13 # define R300_ALU_ARGA_SRCP_Z 14 # define R300_ALU_ARGA_SRCP_W 15 - # define R300_ALU_ARGA_ZERO 16 # define R300_ALU_ARGA_ONE 17 # define R300_ALU_ARGA_HALF 18 +# define R300_ALPHA_SWIZA(x) ((x) << 0) +# define R300_ALPHA_SWIZB(x) ((x) << 7) +# define R300_ALPHA_SWIZC(x) ((x) << 14) + # define R300_ALU_ARG0A_SHIFT 0 # define R300_ALU_ARG0A_MASK (31 << 0) # define R300_ALU_ARG0A_NOP (0 << 5) @@ -2731,7 +2784,7 @@ enum { # define R500_ALPHA_OP_COS 13 # define R500_ALPHA_OP_MDH 14 # define R500_ALPHA_OP_MDV 15 -# define R500_ALPHA_ADDRD(x) (x << 4) +# define R500_ALPHA_ADDRD(x) ((x) << 4) # define R500_ALPHA_ADDRD_REL (1 << 11) # define R500_ALPHA_SEL_A_SHIFT 12 # define R500_ALPHA_SEL_A_SRC0 (0 << 12) @@ -2775,16 +2828,16 @@ enum { # define R500_ALPHA_OMOD_DIV_4 (5 << 26) # define R500_ALPHA_OMOD_DIV_8 (6 << 26) # define R500_ALPHA_OMOD_DISABLE (7 << 26) -# define R500_ALPHA_TARGET(x) (x << 29) +# define R500_ALPHA_TARGET(x) ((x) << 29) # define R500_ALPHA_W_OMASK (1 << 31) #define R500_US_ALU_ALPHA_ADDR_0 0x9800 -# define R500_ALPHA_ADDR0(x) (x << 0) +# define R500_ALPHA_ADDR0(x) ((x) << 0) # define R500_ALPHA_ADDR0_CONST (1 << 8) # define R500_ALPHA_ADDR0_REL (1 << 9) -# define R500_ALPHA_ADDR1(x) (x << 10) +# define R500_ALPHA_ADDR1(x) ((x) << 10) # define R500_ALPHA_ADDR1_CONST (1 << 18) # define R500_ALPHA_ADDR1_REL (1 << 19) -# define R500_ALPHA_ADDR2(x) (x << 20) +# define R500_ALPHA_ADDR2(x) ((x) << 20) # define R500_ALPHA_ADDR2_CONST (1 << 28) # define R500_ALPHA_ADDR2_REL (1 << 29) # define R500_ALPHA_SRCP_OP_1_MINUS_2A0 (0 << 30) @@ -2805,7 +2858,7 @@ enum { # define R500_ALU_RGBA_OP_SOP (10 << 0) # define R500_ALU_RGBA_OP_MDH (11 << 0) # define R500_ALU_RGBA_OP_MDV (12 << 0) -# define R500_ALU_RGBA_ADDRD(x) (x << 4) +# define R500_ALU_RGBA_ADDRD(x) ((x) << 4) # define R500_ALU_RGBA_ADDRD_REL (1 << 11) # define R500_ALU_RGBA_SEL_C_SHIFT 12 # define R500_ALU_RGBA_SEL_C_SRC0 (0 << 12) @@ -2932,16 +2985,16 @@ enum { # define R500_ALU_RGB_OMOD_DIV_4 (5 << 26) # define R500_ALU_RGB_OMOD_DIV_8 (6 << 26) # define R500_ALU_RGB_OMOD_DISABLE (7 << 26) -# define R500_ALU_RGB_TARGET(x) (x << 29) +# define R500_ALU_RGB_TARGET(x) ((x) << 29) # define R500_ALU_RGB_WMASK (1 << 31) #define R500_US_ALU_RGB_ADDR_0 0x9000 -# define R500_RGB_ADDR0(x) (x << 0) +# define R500_RGB_ADDR0(x) ((x) << 0) # define R500_RGB_ADDR0_CONST (1 << 8) # define R500_RGB_ADDR0_REL (1 << 9) -# define R500_RGB_ADDR1(x) (x << 10) +# define R500_RGB_ADDR1(x) ((x) << 10) # define R500_RGB_ADDR1_CONST (1 << 18) # define R500_RGB_ADDR1_REL (1 << 19) -# define R500_RGB_ADDR2(x) (x << 20) +# define R500_RGB_ADDR2(x) ((x) << 20) # define R500_RGB_ADDR2_CONST (1 << 28) # define R500_RGB_ADDR2_REL (1 << 29) # define R500_RGB_SRCP_OP_1_MINUS_2RGB0 (0 << 30) @@ -2973,6 +3026,7 @@ enum { # define R500_INST_RGB_OMASK_R (1 << 15) # define R500_INST_RGB_OMASK_G (1 << 16) # define R500_INST_RGB_OMASK_B (1 << 17) +# define R500_INST_RGB_OMASK_RGB (7 << 15) # define R500_INST_ALPHA_OMASK (1 << 18) # define R500_INST_RGB_CLAMP (1 << 19) # define R500_INST_ALPHA_CLAMP (1 << 20) @@ -2996,19 +3050,19 @@ enum { /* note that these are 8 bit lengths, despite the offsets, at least for R500 */ #define R500_US_CODE_ADDR 0x4630 -# define R500_US_CODE_START_ADDR(x) (x << 0) -# define R500_US_CODE_END_ADDR(x) (x << 16) +# define R500_US_CODE_START_ADDR(x) ((x) << 0) +# define R500_US_CODE_END_ADDR(x) ((x) << 16) #define R500_US_CODE_OFFSET 0x4638 -# define R500_US_CODE_OFFSET_ADDR(x) (x << 0) +# define R500_US_CODE_OFFSET_ADDR(x) ((x) << 0) #define R500_US_CODE_RANGE 0x4634 -# define R500_US_CODE_RANGE_ADDR(x) (x << 0) -# define R500_US_CODE_RANGE_SIZE(x) (x << 16) +# define R500_US_CODE_RANGE_ADDR(x) ((x) << 0) +# define R500_US_CODE_RANGE_SIZE(x) ((x) << 16) #define R500_US_CONFIG 0x4600 # define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO (1 << 1) #define R500_US_FC_ADDR_0 0xa000 -# define R500_FC_BOOL_ADDR(x) (x << 0) -# define R500_FC_INT_ADDR(x) (x << 8) -# define R500_FC_JUMP_ADDR(x) (x << 16) +# define R500_FC_BOOL_ADDR(x) ((x) << 0) +# define R500_FC_INT_ADDR(x) ((x) << 8) +# define R500_FC_JUMP_ADDR(x) ((x) << 16) # define R500_FC_JUMP_GLOBAL (1 << 31) #define R500_US_FC_BOOL_CONST 0x4620 # define R500_FC_KBOOL(x) (x) @@ -3029,8 +3083,8 @@ enum { # define R500_FC_A_OP_NONE (0 << 6) # define R500_FC_A_OP_POP (1 << 6) # define R500_FC_A_OP_PUSH (2 << 6) -# define R500_FC_JUMP_FUNC(x) (x << 8) -# define R500_FC_B_POP_CNT(x) (x << 16) +# define R500_FC_JUMP_FUNC(x) ((x) << 8) +# define R500_FC_B_POP_CNT(x) ((x) << 16) # define R500_FC_B_OP0_NONE (0 << 24) # define R500_FC_B_OP0_DECR (1 << 24) # define R500_FC_B_OP0_INCR (2 << 24) @@ -3039,60 +3093,18 @@ enum { # define R500_FC_B_OP1_INCR (2 << 26) # define R500_FC_IGNORE_UNCOVERED (1 << 28) #define R500_US_FC_INT_CONST_0 0x4c00 -# define R500_FC_INT_CONST_KR(x) (x << 0) -# define R500_FC_INT_CONST_KG(x) (x << 8) -# define R500_FC_INT_CONST_KB(x) (x << 16) +# define R500_FC_INT_CONST_KR(x) ((x) << 0) +# define R500_FC_INT_CONST_KG(x) ((x) << 8) +# define R500_FC_INT_CONST_KB(x) ((x) << 16) /* _0 through _15 */ #define R500_US_FORMAT0_0 0x4640 -# define R500_FORMAT_TXWIDTH(x) (x << 0) -# define R500_FORMAT_TXHEIGHT(x) (x << 11) -# define R500_FORMAT_TXDEPTH(x) (x << 22) -/* _0 through _3 */ -#define R500_US_OUT_FMT_0 0x46A4 -# define R500_OUT_FMT_C4_8 (0 << 0) -# define R500_OUT_FMT_C4_10 (1 << 0) -# define R500_OUT_FMT_C4_10_GAMMA (2 << 0) -# define R500_OUT_FMT_C_16 (3 << 0) -# define R500_OUT_FMT_C2_16 (4 << 0) -# define R500_OUT_FMT_C4_16 (5 << 0) -# define R500_OUT_FMT_C_16_MPEG (6 << 0) -# define R500_OUT_FMT_C2_16_MPEG (7 << 0) -# define R500_OUT_FMT_C2_4 (8 << 0) -# define R500_OUT_FMT_C_3_3_2 (9 << 0) -# define R500_OUT_FMT_C_6_5_6 (10 << 0) -# define R500_OUT_FMT_C_11_11_10 (11 << 0) -# define R500_OUT_FMT_C_10_11_11 (12 << 0) -# define R500_OUT_FMT_C_2_10_10_10 (13 << 0) -/* #define R500_OUT_FMT_RESERVED (14 << 0) */ -# define R500_OUT_FMT_UNUSED (15 << 0) -# define R500_OUT_FMT_C_16_FP (16 << 0) -# define R500_OUT_FMT_C2_16_FP (17 << 0) -# define R500_OUT_FMT_C4_16_FP (18 << 0) -# define R500_OUT_FMT_C_32_FP (19 << 0) -# define R500_OUT_FMT_C2_32_FP (20 << 0) -# define R500_OUT_FMT_C4_32_FP (21 << 0) -# define R500_C0_SEL_A (0 << 8) -# define R500_C0_SEL_R (1 << 8) -# define R500_C0_SEL_G (2 << 8) -# define R500_C0_SEL_B (3 << 8) -# define R500_C1_SEL_A (0 << 10) -# define R500_C1_SEL_R (1 << 10) -# define R500_C1_SEL_G (2 << 10) -# define R500_C1_SEL_B (3 << 10) -# define R500_C2_SEL_A (0 << 12) -# define R500_C2_SEL_R (1 << 12) -# define R500_C2_SEL_G (2 << 12) -# define R500_C2_SEL_B (3 << 12) -# define R500_C3_SEL_A (0 << 14) -# define R500_C3_SEL_R (1 << 14) -# define R500_C3_SEL_G (2 << 14) -# define R500_C3_SEL_B (3 << 14) -# define R500_OUT_SIGN(x) (x << 16) -# define R500_ROUND_ADJ (1 << 20) +# define R500_FORMAT_TXWIDTH(x) ((x) << 0) +# define R500_FORMAT_TXHEIGHT(x) ((x) << 11) +# define R500_FORMAT_TXDEPTH(x) ((x) << 22) #define R500_US_PIXSIZE 0x4604 # define R500_PIX_SIZE(x) (x) #define R500_US_TEX_ADDR_0 0x9800 -# define R500_TEX_SRC_ADDR(x) (x << 0) +# define R500_TEX_SRC_ADDR(x) ((x) << 0) # define R500_TEX_SRC_ADDR_REL (1 << 7) # define R500_TEX_SRC_S_SWIZ_R (0 << 8) # define R500_TEX_SRC_S_SWIZ_G (1 << 8) @@ -3110,7 +3122,7 @@ enum { # define R500_TEX_SRC_Q_SWIZ_G (1 << 14) # define R500_TEX_SRC_Q_SWIZ_B (2 << 14) # define R500_TEX_SRC_Q_SWIZ_A (3 << 14) -# define R500_TEX_DST_ADDR(x) (x << 16) +# define R500_TEX_DST_ADDR(x) ((x) << 16) # define R500_TEX_DST_ADDR_REL (1 << 23) # define R500_TEX_DST_R_SWIZ_R (0 << 24) # define R500_TEX_DST_R_SWIZ_G (1 << 24) @@ -3129,7 +3141,7 @@ enum { # define R500_TEX_DST_A_SWIZ_B (2 << 30) # define R500_TEX_DST_A_SWIZ_A (3 << 30) #define R500_US_TEX_ADDR_DXDY_0 0xa000 -# define R500_DX_ADDR(x) (x << 0) +# define R500_DX_ADDR(x) ((x) << 0) # define R500_DX_ADDR_REL (1 << 7) # define R500_DX_S_SWIZ_R (0 << 8) # define R500_DX_S_SWIZ_G (1 << 8) @@ -3147,7 +3159,7 @@ enum { # define R500_DX_Q_SWIZ_G (1 << 14) # define R500_DX_Q_SWIZ_B (2 << 14) # define R500_DX_Q_SWIZ_A (3 << 14) -# define R500_DY_ADDR(x) (x << 16) +# define R500_DY_ADDR(x) ((x) << 16) # define R500_DY_ADDR_REL (1 << 17) # define R500_DY_S_SWIZ_R (0 << 24) # define R500_DY_S_SWIZ_G (1 << 24) @@ -3166,7 +3178,7 @@ enum { # define R500_DY_Q_SWIZ_B (2 << 30) # define R500_DY_Q_SWIZ_A (3 << 30) #define R500_US_TEX_INST_0 0x9000 -# define R500_TEX_ID(x) (x << 16) +# define R500_TEX_ID(x) ((x) << 16) # define R500_TEX_INST_NOP (0 << 22) # define R500_TEX_INST_LD (1 << 22) # define R500_TEX_INST_TEXKILL (2 << 22) @@ -3227,9 +3239,9 @@ enum { #define R300_PACKET3_3D_LOAD_VBPNTR 0x00002F00 #define R300_PACKET3_INDX_BUFFER 0x00003300 -# define R300_EB_UNK1_SHIFT 24 -# define R300_EB_UNK1 (0x80<<24) -# define R300_EB_UNK2 0x0810 +# define R300_INDX_BUFFER_DST_SHIFT 0 +# define R300_INDX_BUFFER_SKIP_SHIFT 16 +# define R300_INDX_BUFFER_ONE_REG_WR (1<<31) /* Same as R300_PACKET3_3D_DRAW_VBUF but without VAP_VTX_FMT */ #define R300_PACKET3_3D_DRAW_VBUF_2 0x00003400 diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 8ed66a1660c..d2c5998c261 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -50,6 +50,7 @@ static const char* chip_families[] = { "RC410", "RS480", "RS482", + "RS600", "RS690", "RS740", "RV515", @@ -100,11 +101,9 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) /* IN THEORY */ return 0; case PIPE_CAP_MAX_RENDER_TARGETS: - /* XXX 4 eventually */ - return 1; + return 4; case PIPE_CAP_OCCLUSION_QUERY: - /* IN THEORY */ - return 0; + return 1; case PIPE_CAP_TEXTURE_SHADOW_MAP: /* IN THEORY */ return 0; @@ -121,7 +120,7 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) * shows why this is silly. Assuming RGBA, 4cpp, we can see that * 4096*4096*4096 = 64.0 GiB exactly, so it's not exactly * practical. However, if at some point a game really wants this, - * then we can remove this limit. */ + * then we can remove or raise this limit. */ if (r300screen->caps->is_r500) { /* 9 == 256x256x256 */ return 9; @@ -142,7 +141,7 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) case PIPE_CAP_TEXTURE_MIRROR_REPEAT: return 1; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - /* XXX guessing */ + /* XXX guessing (what a terrible guess) */ return 2; default: debug_printf("r300: Implementation error: Bad param %d\n", @@ -175,15 +174,44 @@ static float r300_get_paramf(struct pipe_screen* pscreen, int param) } } -/* XXX moar formats */ -static boolean check_tex_2d_format(enum pipe_format format) +static boolean check_tex_2d_format(enum pipe_format format, boolean is_r500) { switch (format) { + /* Colorbuffer */ + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: case PIPE_FORMAT_A8R8G8B8_UNORM: + /* Colorbuffer or texture */ case PIPE_FORMAT_I8_UNORM: + /* Z buffer */ + case PIPE_FORMAT_Z16_UNORM: + /* Z buffer with stencil */ + case PIPE_FORMAT_Z24S8_UNORM: return TRUE; + + /* XXX These don't even exist + case PIPE_FORMAT_A32R32G32B32: + case PIPE_FORMAT_A16R16G16B16: */ + /* XXX Insert YUV422 packed VYUY and YVYU here */ + /* XXX What the deuce is UV88? (r3xx accel page 14) + debug_printf("r300: Warning: Got unimplemented format: %s in %s\n", + pf_name(format), __FUNCTION__); + return FALSE; */ + + /* XXX Supported yet unimplemented r5xx formats: */ + /* XXX Again, what is UV1010 this time? (r5xx accel page 148) */ + /* XXX Even more that don't exist + case PIPE_FORMAT_A10R10G10B10_UNORM: + case PIPE_FORMAT_A2R10G10B10_UNORM: + case PIPE_FORMAT_I10_UNORM: + debug_printf( + "r300: Warning: Got unimplemented r500 format: %s in %s\n", + pf_name(format), __FUNCTION__); + return FALSE; */ + default: - debug_printf("r300: Warning: Got unknown format: %s, in %s\n", + debug_printf("r300: Warning: Got unsupported format: %s in %s\n", pf_name(format), __FUNCTION__); break; } @@ -200,7 +228,8 @@ static boolean r300_is_format_supported(struct pipe_screen* pscreen, { switch (target) { case PIPE_TEXTURE_2D: - return check_tex_2d_format(format); + return check_tex_2d_format(format, + r300_screen(pscreen)->caps->is_r500); default: debug_printf("r300: Warning: Got unknown format target: %d\n", format); @@ -210,24 +239,84 @@ static boolean r300_is_format_supported(struct pipe_screen* pscreen, return FALSE; } -static void* r300_surface_map(struct pipe_screen* screen, - struct pipe_surface* surface, - unsigned flags) +static struct pipe_transfer* +r300_get_tex_transfer(struct pipe_screen *screen, + struct pipe_texture *texture, + unsigned face, unsigned level, unsigned zslice, + enum pipe_transfer_usage usage, unsigned x, unsigned y, + unsigned w, unsigned h) +{ + struct r300_texture *tex = (struct r300_texture *)texture; + struct r300_transfer *trans; + unsigned offset; /* in bytes */ + + /* XXX Add support for these things */ + if (texture->target == PIPE_TEXTURE_CUBE) { + debug_printf("PIPE_TEXTURE_CUBE is not yet supported.\n"); + /* offset = tex->image_offset[level][face]; */ + } + else if (texture->target == PIPE_TEXTURE_3D) { + debug_printf("PIPE_TEXTURE_3D is not yet supported.\n"); + /* offset = tex->image_offset[level][zslice]; */ + } + else { + offset = tex->offset[level]; + assert(face == 0); + assert(zslice == 0); + } + + trans = CALLOC_STRUCT(r300_transfer); + if (trans) { + pipe_texture_reference(&trans->transfer.texture, texture); + trans->transfer.format = trans->transfer.format; + trans->transfer.width = w; + trans->transfer.height = h; + trans->transfer.block = texture->block; + trans->transfer.nblocksx = texture->nblocksx[level]; + trans->transfer.nblocksy = texture->nblocksy[level]; + trans->transfer.stride = tex->stride; + trans->transfer.usage = usage; + trans->offset = offset; + } + return &trans->transfer; +} + +static void +r300_tex_transfer_destroy(struct pipe_transfer *trans) { - struct r300_texture* tex = (struct r300_texture*)surface->texture; - char* map = pipe_buffer_map(screen, tex->buffer, flags); + pipe_texture_reference(&trans->texture, NULL); + FREE(trans); +} + +static void* r300_transfer_map(struct pipe_screen* screen, + struct pipe_transfer* transfer) +{ + struct r300_texture* tex = (struct r300_texture*)transfer->texture; + char* map; + unsigned flags = 0; + + if (transfer->usage != PIPE_TRANSFER_WRITE) { + flags |= PIPE_BUFFER_USAGE_CPU_READ; + } + if (transfer->usage != PIPE_TRANSFER_READ) { + flags |= PIPE_BUFFER_USAGE_CPU_WRITE; + } + + map = pipe_buffer_map(screen, tex->buffer, flags); if (!map) { return NULL; } - return map + surface->offset; + return map + r300_transfer(transfer)->offset + + transfer->y / transfer->block.height * transfer->stride + + transfer->x / transfer->block.width * transfer->block.size; } -static void r300_surface_unmap(struct pipe_screen* screen, - struct pipe_surface* surface) +static void r300_transfer_unmap(struct pipe_screen* screen, + struct pipe_transfer* transfer) { - struct r300_texture* tex = (struct r300_texture*)surface->texture; + struct r300_texture* tex = (struct r300_texture*)transfer->texture; pipe_buffer_unmap(screen, tex->buffer); } @@ -239,8 +328,7 @@ static void r300_destroy_screen(struct pipe_screen* pscreen) FREE(r300screen); } -struct pipe_screen* r300_create_screen(struct pipe_winsys* winsys, - struct r300_winsys* r300_winsys) +struct pipe_screen* r300_create_screen(struct r300_winsys* r300_winsys) { struct r300_screen* r300screen = CALLOC_STRUCT(r300_screen); struct r300_capabilities* caps = CALLOC_STRUCT(r300_capabilities); @@ -254,15 +342,17 @@ struct pipe_screen* r300_create_screen(struct pipe_winsys* winsys, r300_parse_chipset(caps); r300screen->caps = caps; - r300screen->screen.winsys = winsys; + r300screen->screen.winsys = (struct pipe_winsys*)r300_winsys; r300screen->screen.destroy = r300_destroy_screen; r300screen->screen.get_name = r300_get_name; r300screen->screen.get_vendor = r300_get_vendor; r300screen->screen.get_param = r300_get_param; r300screen->screen.get_paramf = r300_get_paramf; r300screen->screen.is_format_supported = r300_is_format_supported; - r300screen->screen.surface_map = r300_surface_map; - r300screen->screen.surface_unmap = r300_surface_unmap; + r300screen->screen.get_tex_transfer = r300_get_tex_transfer; + r300screen->screen.tex_transfer_destroy = r300_tex_transfer_destroy; + r300screen->screen.transfer_map = r300_transfer_map; + r300screen->screen.transfer_unmap = r300_transfer_unmap; r300_init_screen_texture_functions(&r300screen->screen); u_simple_screen_init(&r300screen->screen); diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 2e25f61dbf1..3f52dbc3bea 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -40,13 +40,27 @@ struct r300_screen { struct r300_capabilities* caps; }; +struct r300_transfer { + /* Parent class */ + struct pipe_transfer transfer; + + /* Offset from start of buffer. */ + unsigned offset; +}; + /* Convenience cast wrapper. */ static struct r300_screen* r300_screen(struct pipe_screen* screen) { return (struct r300_screen*)screen; } +/* Convenience cast wrapper. */ +static INLINE struct r300_transfer* +r300_transfer(struct pipe_transfer* transfer) +{ + return (struct r300_transfer*)transfer; +} + /* Creates a new r300 screen. */ -struct pipe_screen* r300_create_screen(struct pipe_winsys* winsys, - struct r300_winsys* r300_winsys); +struct pipe_screen* r300_create_screen(struct r300_winsys* r300_winsys); #endif /* R300_SCREEN_H */ diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 9392d723427..58bce22fc81 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -22,92 +22,17 @@ #include "util/u_math.h" #include "util/u_pack_color.h" -#include "pipe/p_debug.h" + +#include "util/u_debug.h" +#include "pipe/internal/p_winsys_screen.h" #include "r300_context.h" #include "r300_reg.h" +#include "r300_state_inlines.h" +#include "r300_state_shader.h" /* r300_state: Functions used to intialize state context by translating - * Gallium state objects into semi-native r300 state objects. - * - * XXX break this file up into pieces if it gets too big! */ - -/* Pack a float into a dword. */ -static uint32_t pack_float_32(float f) -{ - union { - float f; - uint32_t u; - } u; - - u.f = f; - return u.u; -} - -static uint32_t translate_blend_function(int blend_func) { - switch (blend_func) { - case PIPE_BLEND_ADD: - return R300_COMB_FCN_ADD_CLAMP; - case PIPE_BLEND_SUBTRACT: - return R300_COMB_FCN_SUB_CLAMP; - case PIPE_BLEND_REVERSE_SUBTRACT: - return R300_COMB_FCN_RSUB_CLAMP; - case PIPE_BLEND_MIN: - return R300_COMB_FCN_MIN; - case PIPE_BLEND_MAX: - return R300_COMB_FCN_MAX; - default: - debug_printf("r300: Unknown blend function %d\n", blend_func); - break; - } - return 0; -} - -/* XXX we can also offer the D3D versions of some of these... */ -static uint32_t translate_blend_factor(int blend_fact) { - switch (blend_fact) { - case PIPE_BLENDFACTOR_ONE: - return R300_BLEND_GL_ONE; - case PIPE_BLENDFACTOR_SRC_COLOR: - return R300_BLEND_GL_SRC_COLOR; - case PIPE_BLENDFACTOR_SRC_ALPHA: - return R300_BLEND_GL_SRC_ALPHA; - case PIPE_BLENDFACTOR_DST_ALPHA: - return R300_BLEND_GL_DST_ALPHA; - case PIPE_BLENDFACTOR_DST_COLOR: - return R300_BLEND_GL_DST_COLOR; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - return R300_BLEND_GL_SRC_ALPHA_SATURATE; - case PIPE_BLENDFACTOR_CONST_COLOR: - return R300_BLEND_GL_CONST_COLOR; - case PIPE_BLENDFACTOR_CONST_ALPHA: - return R300_BLEND_GL_CONST_ALPHA; - /* XXX WTF are these? - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: */ - case PIPE_BLENDFACTOR_ZERO: - return R300_BLEND_GL_ZERO; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - return R300_BLEND_GL_ONE_MINUS_SRC_COLOR; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - return R300_BLEND_GL_ONE_MINUS_SRC_ALPHA; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - return R300_BLEND_GL_ONE_MINUS_DST_ALPHA; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - return R300_BLEND_GL_ONE_MINUS_DST_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - return R300_BLEND_GL_ONE_MINUS_CONST_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - return R300_BLEND_GL_ONE_MINUS_CONST_ALPHA; - /* XXX see above - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: */ - default: - debug_printf("r300: Unknown blend factor %d\n", blend_fact); - break; - } - return 0; -} + * Gallium state objects into semi-native r300 state objects. */ /* Create a new blend state based on the CSO blend state. * @@ -123,16 +48,16 @@ static void* r300_create_blend_state(struct pipe_context* pipe, blend->blend_control = R300_ALPHA_BLEND_ENABLE | R300_SEPARATE_ALPHA_ENABLE | R300_READ_ENABLE | - translate_blend_function(state->rgb_func) | - (translate_blend_factor(state->rgb_src_factor) << + r300_translate_blend_function(state->rgb_func) | + (r300_translate_blend_factor(state->rgb_src_factor) << R300_SRC_BLEND_SHIFT) | - (translate_blend_factor(state->rgb_dst_factor) << + (r300_translate_blend_factor(state->rgb_dst_factor) << R300_DST_BLEND_SHIFT); blend->alpha_blend_control = - translate_blend_function(state->alpha_func) | - (translate_blend_factor(state->alpha_src_factor) << + r300_translate_blend_function(state->alpha_func) | + (r300_translate_blend_factor(state->alpha_src_factor) << R300_SRC_BLEND_SHIFT) | - (translate_blend_factor(state->alpha_dst_factor) << + (r300_translate_blend_factor(state->alpha_dst_factor) << R300_DST_BLEND_SHIFT); } @@ -175,21 +100,17 @@ static void r300_set_blend_color(struct pipe_context* pipe, const struct pipe_blend_color* color) { struct r300_context* r300 = r300_context(pipe); - uint32_t r, g, b, a; ubyte ur, ug, ub, ua; - r = util_iround(color->color[0] * 1023.0f); - g = util_iround(color->color[1] * 1023.0f); - b = util_iround(color->color[2] * 1023.0f); - a = util_iround(color->color[3] * 1023.0f); - ur = float_to_ubyte(color->color[0]); ug = float_to_ubyte(color->color[1]); ub = float_to_ubyte(color->color[2]); ua = float_to_ubyte(color->color[3]); - r300->blend_color_state->blend_color = (a << 24) | (r << 16) | (g << 8) | b; + util_pack_color(color->color, PIPE_FORMAT_A8R8G8B8_UNORM, + &r300->blend_color_state->blend_color); + /* XXX this is wrong */ r300->blend_color_state->blend_color_red_alpha = ur | (ua << 16); r300->blend_color_state->blend_color_green_blue = ub | (ug << 16); @@ -210,83 +131,24 @@ static void uint shader, uint index, const struct pipe_constant_buffer* buffer) { - /* XXX */ -} - -static uint32_t translate_depth_stencil_function(int zs_func) { - switch (zs_func) { - case PIPE_FUNC_NEVER: - return R300_ZS_NEVER; - case PIPE_FUNC_LESS: - return R300_ZS_LESS; - case PIPE_FUNC_EQUAL: - return R300_ZS_EQUAL; - case PIPE_FUNC_LEQUAL: - return R300_ZS_LEQUAL; - case PIPE_FUNC_GREATER: - return R300_ZS_GREATER; - case PIPE_FUNC_NOTEQUAL: - return R300_ZS_NOTEQUAL; - case PIPE_FUNC_GEQUAL: - return R300_ZS_GEQUAL; - case PIPE_FUNC_ALWAYS: - return R300_ZS_ALWAYS; - default: - debug_printf("r300: Unknown depth/stencil function %d\n", - zs_func); - break; - } - return 0; -} + struct r300_context* r300 = r300_context(pipe); -static uint32_t translate_stencil_op(int s_op) { - switch (s_op) { - case PIPE_STENCIL_OP_KEEP: - return R300_ZS_KEEP; - case PIPE_STENCIL_OP_ZERO: - return R300_ZS_ZERO; - case PIPE_STENCIL_OP_REPLACE: - return R300_ZS_REPLACE; - case PIPE_STENCIL_OP_INCR: - return R300_ZS_INCR; - case PIPE_STENCIL_OP_DECR: - return R300_ZS_DECR; - case PIPE_STENCIL_OP_INCR_WRAP: - return R300_ZS_INCR_WRAP; - case PIPE_STENCIL_OP_DECR_WRAP: - return R300_ZS_DECR_WRAP; - case PIPE_STENCIL_OP_INVERT: - return R300_ZS_INVERT; - default: - debug_printf("r300: Unknown stencil op %d", s_op); - break; + /* This entire chunk of code seems ever-so-slightly baked. + * It's as if I've got pipe_buffer* matryoshkas... */ + if (buffer && buffer->buffer && buffer->buffer->size) { + void* map = pipe->winsys->buffer_map(pipe->winsys, buffer->buffer, + PIPE_BUFFER_USAGE_CPU_READ); + memcpy(r300->shader_constants[shader].constants, + map, buffer->buffer->size); + pipe->winsys->buffer_unmap(pipe->winsys, buffer->buffer); + + r300->shader_constants[shader].user_count = + buffer->buffer->size / (sizeof(float) * 4); + } else { + r300->shader_constants[shader].user_count = 0; } - return 0; -} -static uint32_t translate_alpha_function(int alpha_func) { - switch (alpha_func) { - case PIPE_FUNC_NEVER: - return R300_FG_ALPHA_FUNC_NEVER; - case PIPE_FUNC_LESS: - return R300_FG_ALPHA_FUNC_LESS; - case PIPE_FUNC_EQUAL: - return R300_FG_ALPHA_FUNC_EQUAL; - case PIPE_FUNC_LEQUAL: - return R300_FG_ALPHA_FUNC_LE; - case PIPE_FUNC_GREATER: - return R300_FG_ALPHA_FUNC_GREATER; - case PIPE_FUNC_NOTEQUAL: - return R300_FG_ALPHA_FUNC_NOTEQUAL; - case PIPE_FUNC_GEQUAL: - return R300_FG_ALPHA_FUNC_GE; - case PIPE_FUNC_ALWAYS: - return R300_FG_ALPHA_FUNC_ALWAYS; - default: - debug_printf("r300: Unknown alpha function %d", alpha_func); - break; - } - return 0; + r300->dirty_state |= R300_NEW_CONSTANTS; } /* Create a new depth, stencil, and alpha state based on the CSO dsa state. @@ -309,7 +171,7 @@ static void* } dsa->z_stencil_control |= - (translate_depth_stencil_function(state->depth.func) << + (r300_translate_depth_stencil_function(state->depth.func) << R300_Z_FUNC_SHIFT); } @@ -317,14 +179,14 @@ static void* if (state->stencil[0].enabled) { dsa->z_buffer_control |= R300_STENCIL_ENABLE; dsa->z_stencil_control |= - (translate_depth_stencil_function(state->stencil[0].func) << - R300_S_FRONT_FUNC_SHIFT) | - (translate_stencil_op(state->stencil[0].fail_op) << - R300_S_FRONT_SFAIL_OP_SHIFT) | - (translate_stencil_op(state->stencil[0].zpass_op) << - R300_S_FRONT_ZPASS_OP_SHIFT) | - (translate_stencil_op(state->stencil[0].zfail_op) << - R300_S_FRONT_ZFAIL_OP_SHIFT); + (r300_translate_depth_stencil_function(state->stencil[0].func) << + R300_S_FRONT_FUNC_SHIFT) | + (r300_translate_stencil_op(state->stencil[0].fail_op) << + R300_S_FRONT_SFAIL_OP_SHIFT) | + (r300_translate_stencil_op(state->stencil[0].zpass_op) << + R300_S_FRONT_ZPASS_OP_SHIFT) | + (r300_translate_stencil_op(state->stencil[0].zfail_op) << + R300_S_FRONT_ZFAIL_OP_SHIFT); dsa->stencil_ref_mask = (state->stencil[0].ref_value) | (state->stencil[0].valuemask << R300_STENCILMASK_SHIFT) | @@ -333,14 +195,14 @@ static void* if (state->stencil[1].enabled) { dsa->z_buffer_control |= R300_STENCIL_FRONT_BACK; dsa->z_stencil_control |= - (translate_depth_stencil_function(state->stencil[1].func) << - R300_S_BACK_FUNC_SHIFT) | - (translate_stencil_op(state->stencil[1].fail_op) << - R300_S_BACK_SFAIL_OP_SHIFT) | - (translate_stencil_op(state->stencil[1].zpass_op) << - R300_S_BACK_ZPASS_OP_SHIFT) | - (translate_stencil_op(state->stencil[1].zfail_op) << - R300_S_BACK_ZFAIL_OP_SHIFT); + (r300_translate_depth_stencil_function(state->stencil[1].func) << + R300_S_BACK_FUNC_SHIFT) | + (r300_translate_stencil_op(state->stencil[1].fail_op) << + R300_S_BACK_SFAIL_OP_SHIFT) | + (r300_translate_stencil_op(state->stencil[1].zpass_op) << + R300_S_BACK_ZPASS_OP_SHIFT) | + (r300_translate_stencil_op(state->stencil[1].zfail_op) << + R300_S_BACK_ZFAIL_OP_SHIFT); dsa->stencil_ref_bf = (state->stencil[1].ref_value) | (state->stencil[1].valuemask << R300_STENCILMASK_SHIFT) | @@ -350,7 +212,8 @@ static void* /* Alpha test setup. */ if (state->alpha.enabled) { - dsa->alpha_function = translate_alpha_function(state->alpha.func) | + dsa->alpha_function = + r300_translate_alpha_function(state->alpha.func) | R300_FG_ALPHA_FUNC_ENABLE; dsa->alpha_reference = CLAMP(state->alpha.ref_value * 1023.0f, 0, 1023); @@ -415,6 +278,8 @@ static void* r300_create_fs_state(struct pipe_context* pipe, /* Copy state directly into shader. */ fs->state = *shader; + tgsi_scan_shader(shader->tokens, &fs->info); + return (void*)fs; } @@ -424,14 +289,18 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) struct r300_context* r300 = r300_context(pipe); struct r3xx_fragment_shader* fs = (struct r3xx_fragment_shader*)shader; - if (!fs->translated) { + if (fs == NULL) { + r300->fs = NULL; + return; + } else if (!fs->translated) { if (r300_screen(r300->context.screen)->caps->is_r500) { - r500_translate_shader(r300, fs); + r500_translate_fragment_shader(r300, (struct r500_fragment_shader*)fs); } else { - r300_translate_shader(r300, fs); + r300_translate_fragment_shader(r300, (struct r300_fragment_shader*)fs); } } + fs->translated = TRUE; r300->fs = fs; r300->dirty_state |= R300_NEW_FRAGMENT_SHADER; @@ -449,10 +318,6 @@ static void r300_set_polygon_stipple(struct pipe_context* pipe, /* XXX */ } -static INLINE int pack_float_16_6x(float f) { - return ((int)(f * 6.0) & 0xffff); -} - /* Create a new rasterizer state based on the CSO rasterizer state. * * This is a very large chunk of state, and covers most of the graphics @@ -472,6 +337,12 @@ static void* r300_create_rs_state(struct pipe_context* pipe, rs->point_size = pack_float_16_6x(state->point_size) | (pack_float_16_6x(state->point_size) << R300_POINTSIZE_X_SHIFT); + rs->point_minmax = + ((int)(state->point_size_min * 6.0) << + R300_GA_POINT_MINMAX_MIN_SHIFT) | + ((int)(state->point_size_max * 6.0) << + R300_GA_POINT_MINMAX_MAX_SHIFT); + rs->line_control = pack_float_16_6x(state->line_width) | R300_GA_LINE_CNTL_END_TYPE_COMP; @@ -504,20 +375,28 @@ static void* r300_create_rs_state(struct pipe_context* pipe, if (rs->polygon_offset_enable) { rs->depth_offset_front = rs->depth_offset_back = - pack_float_32(state->offset_units); + fui(state->offset_units); rs->depth_scale_front = rs->depth_scale_back = - pack_float_32(state->offset_scale); + fui(state->offset_scale); } if (state->line_stipple_enable) { rs->line_stipple_config = R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE | - (pack_float_32((float)state->line_stipple_factor) & + (fui((float)state->line_stipple_factor) & R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK); /* XXX this might need to be scaled up */ rs->line_stipple_value = state->line_stipple_pattern; } + if (state->flatshade) { + rs->color_control = R300_SHADE_MODEL_FLAT; + } else { + rs->color_control = R300_SHADE_MODEL_SMOOTH; + } + + rs->rs = *state; + return (void*)rs; } @@ -525,8 +404,11 @@ static void* r300_create_rs_state(struct pipe_context* pipe, static void r300_bind_rs_state(struct pipe_context* pipe, void* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_rs_state* rs = (struct r300_rs_state*)state; + + draw_set_rasterizer_state(r300->draw, &rs->rs); - r300->rs_state = (struct r300_rs_state*)state; + r300->rs_state = rs; r300->dirty_state |= R300_NEW_RASTERIZER; } @@ -536,83 +418,6 @@ static void r300_delete_rs_state(struct pipe_context* pipe, void* state) FREE(state); } -static uint32_t translate_wrap(int wrap) { - switch (wrap) { - case PIPE_TEX_WRAP_REPEAT: - return R300_TX_REPEAT; - case PIPE_TEX_WRAP_CLAMP: - return R300_TX_CLAMP; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - return R300_TX_CLAMP_TO_EDGE; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - return R300_TX_CLAMP_TO_BORDER; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - return R300_TX_REPEAT | R300_TX_MIRRORED; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - return R300_TX_CLAMP | R300_TX_MIRRORED; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - return R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - return R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED; - default: - debug_printf("r300: Unknown texture wrap %d", wrap); - return 0; - } -} - -static uint32_t translate_tex_filters(int min, int mag, int mip) { - uint32_t retval = 0; - switch (min) { - case PIPE_TEX_FILTER_NEAREST: - retval |= R300_TX_MIN_FILTER_NEAREST; - case PIPE_TEX_FILTER_LINEAR: - retval |= R300_TX_MIN_FILTER_LINEAR; - case PIPE_TEX_FILTER_ANISO: - retval |= R300_TX_MIN_FILTER_ANISO; - default: - debug_printf("r300: Unknown texture filter %d", min); - break; - } - switch (mag) { - case PIPE_TEX_FILTER_NEAREST: - retval |= R300_TX_MAG_FILTER_NEAREST; - case PIPE_TEX_FILTER_LINEAR: - retval |= R300_TX_MAG_FILTER_LINEAR; - case PIPE_TEX_FILTER_ANISO: - retval |= R300_TX_MAG_FILTER_ANISO; - default: - debug_printf("r300: Unknown texture filter %d", mag); - break; - } - switch (mip) { - case PIPE_TEX_MIPFILTER_NONE: - retval |= R300_TX_MIN_FILTER_MIP_NONE; - case PIPE_TEX_MIPFILTER_NEAREST: - retval |= R300_TX_MIN_FILTER_MIP_NEAREST; - case PIPE_TEX_MIPFILTER_LINEAR: - retval |= R300_TX_MIN_FILTER_MIP_LINEAR; - default: - debug_printf("r300: Unknown texture filter %d", mip); - break; - } - - return retval; -} - -static uint32_t anisotropy(float max_aniso) { - if (max_aniso >= 16.0f) { - return R300_TX_MAX_ANISO_16_TO_1; - } else if (max_aniso >= 8.0f) { - return R300_TX_MAX_ANISO_8_TO_1; - } else if (max_aniso >= 4.0f) { - return R300_TX_MAX_ANISO_4_TO_1; - } else if (max_aniso >= 2.0f) { - return R300_TX_MAX_ANISO_2_TO_1; - } else { - return R300_TX_MAX_ANISO_1_TO_1; - } -} - static void* r300_create_sampler_state(struct pipe_context* pipe, const struct pipe_sampler_state* state) @@ -622,19 +427,19 @@ static void* int lod_bias; sampler->filter0 |= - (translate_wrap(state->wrap_s) << R300_TX_WRAP_S_SHIFT) | - (translate_wrap(state->wrap_t) << R300_TX_WRAP_T_SHIFT) | - (translate_wrap(state->wrap_r) << R300_TX_WRAP_R_SHIFT); + (r300_translate_wrap(state->wrap_s) << R300_TX_WRAP_S_SHIFT) | + (r300_translate_wrap(state->wrap_t) << R300_TX_WRAP_T_SHIFT) | + (r300_translate_wrap(state->wrap_r) << R300_TX_WRAP_R_SHIFT); - sampler->filter0 |= translate_tex_filters(state->min_img_filter, - state->mag_img_filter, - state->min_mip_filter); + sampler->filter0 |= r300_translate_tex_filters(state->min_img_filter, + state->mag_img_filter, + state->min_mip_filter); lod_bias = CLAMP((int)(state->lod_bias * 32), -(1 << 9), (1 << 9) - 1); sampler->filter1 |= lod_bias << R300_LOD_BIAS_SHIFT; - sampler->filter1 |= anisotropy(state->max_anisotropy); + sampler->filter1 |= r300_anisotropy(state->max_anisotropy); util_pack_color(state->border_color, PIPE_FORMAT_A8R8G8B8_UNORM, &sampler->border_color); @@ -710,20 +515,12 @@ static void r300_set_scissor_state(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); draw_flush(r300->draw); - uint32_t left, top, right, bottom; - - /* So, a bit of info. The scissors are offset by R300_SCISSORS_OFFSET in - * both directions for all values, and can only be 13 bits wide. Why? - * We may never know. */ - left = (state->minx + R300_SCISSORS_OFFSET) & 0x1fff; - top = (state->miny + R300_SCISSORS_OFFSET) & 0x1fff; - right = (state->maxx + R300_SCISSORS_OFFSET) & 0x1fff; - bottom = (state->maxy + R300_SCISSORS_OFFSET) & 0x1fff; - - r300->scissor_state->scissor_top_left = (left << R300_SCISSORS_X_SHIFT) | - (top << R300_SCISSORS_Y_SHIFT); + r300->scissor_state->scissor_top_left = + (state->minx << R300_SCISSORS_X_SHIFT) | + (state->miny << R300_SCISSORS_Y_SHIFT); r300->scissor_state->scissor_bottom_right = - (right << R300_SCISSORS_X_SHIFT) | (bottom << R300_SCISSORS_Y_SHIFT); + (state->maxx << R300_SCISSORS_X_SHIFT) | + (state->maxy << R300_SCISSORS_Y_SHIFT); r300->dirty_state |= R300_NEW_SCISSOR; } @@ -732,8 +529,26 @@ static void r300_set_viewport_state(struct pipe_context* pipe, const struct pipe_viewport_state* state) { struct r300_context* r300 = r300_context(pipe); - /* XXX handing this off to Draw for now */ - draw_set_viewport_state(r300->draw, state); + + r300->viewport_state->xscale = state->scale[0]; + r300->viewport_state->yscale = state->scale[1]; + r300->viewport_state->zscale = state->scale[2]; + + r300->viewport_state->xoffset = state->translate[0]; + r300->viewport_state->yoffset = state->translate[1]; + r300->viewport_state->zoffset = state->translate[2]; + + r300->viewport_state->vte_control = 0; + if (r300_screen(r300->context.screen)->caps->has_tcl) { + /* Do the transform in HW. */ + r300->viewport_state->vte_control |= + R300_VPORT_X_SCALE_ENA | R300_VPORT_X_OFFSET_ENA | + R300_VPORT_Y_SCALE_ENA | R300_VPORT_Y_OFFSET_ENA | + R300_VPORT_Z_SCALE_ENA | R300_VPORT_Z_OFFSET_ENA; + } else { + /* Have Draw do the actual transform. */ + draw_set_viewport_state(r300->draw, state); + } } static void r300_set_vertex_buffers(struct pipe_context* pipe, @@ -741,7 +556,12 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, const struct pipe_vertex_buffer* buffers) { struct r300_context* r300 = r300_context(pipe); - /* XXX Draw */ + + memcpy(r300->vertex_buffers, buffers, + sizeof(struct pipe_vertex_buffer) * count); + + r300->vertex_buffer_count = count; + draw_flush(r300->draw); draw_set_vertex_buffers(r300->draw, count, buffers); } diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c new file mode 100644 index 00000000000..d761a0302f0 --- /dev/null +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -0,0 +1,299 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_state_derived.h" + +/* r300_state_derived: Various bits of state which are dependent upon + * currently bound CSO data. */ + +/* Update the vertex_info struct in our r300_context. + * + * The vertex_info struct describes the post-TCL format of vertices. It is + * required for Draw when doing SW TCL, and also for describing the + * dreaded RS block on R300 chipsets. */ +/* XXX this function should be able to handle vert shaders as well as draw */ +static void r300_update_vertex_layout(struct r300_context* r300) +{ + struct r300_vertex_format vformat; + struct vertex_info vinfo; + boolean pos = FALSE, psize = FALSE, fog = FALSE; + int i, texs = 0, cols = 0; + int tab[16]; + + struct tgsi_shader_info* info = &r300->fs->info; + + memset(&vinfo, 0, sizeof(vinfo)); + for (i = 0; i < 16; i++) { + tab[i] = -1; + } + + assert(info->num_inputs <= 16); + + for (i = 0; i < info->num_inputs; i++) { + switch (info->input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + pos = TRUE; + tab[i] = 0; + break; + case TGSI_SEMANTIC_COLOR: + tab[i] = 2 + cols++; + break; + case TGSI_SEMANTIC_PSIZE: + psize = TRUE; + tab[i] = 1; + break; + case TGSI_SEMANTIC_FOG: + fog = TRUE; + /* Fall through... */ + case TGSI_SEMANTIC_GENERIC: + tab[i] = 6 + texs++; + break; + default: + debug_printf("r300: Unknown vertex input %d\n", + info->input_semantic_name[i]); + break; + } + } + + /* Do the actual vertex_info setup. + * + * vertex_info has four uints of hardware-specific data in it. + * vinfo.hwfmt[0] is R300_VAP_VTX_STATE_CNTL + * vinfo.hwfmt[1] is R300_VAP_VSM_VTX_ASSM + * vinfo.hwfmt[2] is R300_VAP_OUTPUT_VTX_FMT_0 + * vinfo.hwfmt[3] is R300_VAP_OUTPUT_VTX_FMT_1 */ + + vinfo.hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */ + + if (!pos) { + debug_printf("r300: Forcing vertex position attribute emit...\n"); + /* Make room for the position attribute + * at the beginning of the tab. */ + for (i = 15; i > 0; i--) { + tab[i] = tab[i-1]; + } + tab[0] = 0; + + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_POS, + draw_find_vs_output(r300->draw, TGSI_SEMANTIC_POSITION, 0)); + } else { + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, + draw_find_vs_output(r300->draw, TGSI_SEMANTIC_POSITION, 0)); + } + vinfo.hwfmt[1] |= R300_INPUT_CNTL_POS; + vinfo.hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; + + if (psize) { + draw_emit_vertex_attr(&vinfo, EMIT_1F_PSIZE, INTERP_POS, + draw_find_vs_output(r300->draw, TGSI_SEMANTIC_PSIZE, 0)); + vinfo.hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; + } + + for (i = 0; i < cols; i++) { + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, + draw_find_vs_output(r300->draw, TGSI_SEMANTIC_COLOR, i)); + vinfo.hwfmt[1] |= R300_INPUT_CNTL_COLOR; + vinfo.hwfmt[2] |= (R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i); + } + + for (i = 0; i < texs; i++) { + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, + draw_find_vs_output(r300->draw, TGSI_SEMANTIC_GENERIC, i)); + vinfo.hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i); + vinfo.hwfmt[3] |= (4 << (3 * i)); + } + + if (fog) { + i++; + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, + draw_find_vs_output(r300->draw, TGSI_SEMANTIC_FOG, 0)); + vinfo.hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i); + vinfo.hwfmt[3] |= (4 << (3 * i)); + } + + draw_compute_vertex_size(&vinfo); + + if (memcmp(&r300->vertex_info, &vinfo, sizeof(struct vertex_info))) { + uint32_t temp; + debug_printf("attrib count: %d, fp input count: %d\n", + vinfo.num_attribs, info->num_inputs); + for (i = 0; i < vinfo.num_attribs; i++) { + debug_printf("attrib: offset %d, interp %d, size %d," + " tab %d\n", vinfo.attrib[i].src_index, + vinfo.attrib[i].interp_mode, vinfo.attrib[i].emit, + tab[i]); + } + + for (i = 0; i < vinfo.num_attribs; i++) { + /* Make sure we have a proper destination for our attribute */ + assert(tab[i] != -1); + + temp = translate_vertex_data_type(vinfo.attrib[i].emit) | + (tab[i] << R300_DST_VEC_LOC_SHIFT); + if (i & 1) { + r300->vertex_info.vap_prog_stream_cntl[i >> 1] &= 0x0000ffff; + r300->vertex_info.vap_prog_stream_cntl[i >> 1] |= temp << 16; + } else { + r300->vertex_info.vap_prog_stream_cntl[i >> 1] &= 0xffff0000; + r300->vertex_info.vap_prog_stream_cntl[i >> 1] |= temp; + } + + r300->vertex_info.vap_prog_stream_cntl_ext[i >> 1] |= + (R300_VAP_SWIZZLE_XYZW << (i & 1 ? 16 : 0)); + } + /* Set the last vector. */ + i--; + r300->vertex_info.vap_prog_stream_cntl[i >> 1] |= (R300_LAST_VEC << + (i & 1 ? 16 : 0)); + + memcpy(r300->vertex_info.tab, tab, sizeof(tab)); + memcpy(&r300->vertex_info, &vinfo, sizeof(struct vertex_info)); + r300->dirty_state |= R300_NEW_VERTEX_FORMAT; + } +} + +/* Set up the RS block. This is the part of the chipset that actually does + * the rasterization of vertices into fragments. This is also the part of the + * chipset that locks up if any part of it is even slightly wrong. */ +static void r300_update_rs_block(struct r300_context* r300) +{ + struct r300_rs_block* rs = r300->rs_block; + struct vertex_info* vinfo = &r300->vertex_info.vinfo; + int* tab = r300->vertex_info.tab; + int col_count = 0, fp_offset = 0, i, memory_pos, tex_count = 0; + + memset(rs, 0, sizeof(struct r300_rs_block)); + + if (r300_screen(r300->context.screen)->caps->is_r500) { + for (i = 0; i < vinfo->num_attribs; i++) { + assert(tab[vinfo->attrib[i].src_index] != -1); + memory_pos = tab[vinfo->attrib[i].src_index] * 4; + switch (vinfo->attrib[i].interp_mode) { + case INTERP_LINEAR: + rs->ip[col_count] |= + R500_RS_COL_PTR(memory_pos) | + R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA); + col_count++; + break; + case INTERP_PERSPECTIVE: + rs->ip[tex_count] |= + R500_RS_SEL_S(memory_pos) | + R500_RS_SEL_T(memory_pos + 1) | + R500_RS_SEL_R(memory_pos + 2) | + R500_RS_SEL_Q(memory_pos + 3); + tex_count++; + break; + default: + break; + } + } + + if (col_count == 0) { + rs->ip[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001); + } + + /* Set up at least one texture pointer or RS will not be happy. */ + if (tex_count == 0) { + rs->ip[0] |= + R500_RS_SEL_S(R500_RS_IP_PTR_K0) | + R500_RS_SEL_T(R500_RS_IP_PTR_K0) | + R500_RS_SEL_R(R500_RS_IP_PTR_K0) | + R500_RS_SEL_Q(R500_RS_IP_PTR_K1); + } + + for (i = 0; i < tex_count; i++) { + rs->inst[i] |= R500_RS_INST_TEX_ID(i) | R500_RS_INST_TEX_CN_WRITE | + R500_RS_INST_TEX_ADDR(fp_offset); + fp_offset++; + } + + for (i = 0; i < col_count; i++) { + rs->inst[i] |= R500_RS_INST_COL_ID(i) | R500_RS_INST_COL_CN_WRITE | + R500_RS_INST_COL_ADDR(fp_offset); + fp_offset++; + } + } else { + for (i = 0; i < vinfo->num_attribs; i++) { + memory_pos = tab[vinfo->attrib[i].src_index] * 4; + assert(tab[vinfo->attrib[i].src_index] != -1); + switch (vinfo->attrib[i].interp_mode) { + case INTERP_LINEAR: + rs->ip[col_count] |= + R300_RS_COL_PTR(memory_pos) | + R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA); + col_count++; + break; + case INTERP_PERSPECTIVE: + rs->ip[tex_count] |= + R300_RS_TEX_PTR(memory_pos) | + R300_RS_SEL_S(R300_RS_SEL_C0) | + R300_RS_SEL_T(R300_RS_SEL_C1) | + R300_RS_SEL_R(R300_RS_SEL_C2) | + R300_RS_SEL_Q(R300_RS_SEL_C3); + tex_count++; + break; + default: + break; + } + } + + if (col_count == 0) { + rs->ip[0] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001); + } + + if (tex_count == 0) { + rs->ip[0] |= + R300_RS_SEL_S(R300_RS_SEL_K0) | + R300_RS_SEL_T(R300_RS_SEL_K0) | + R300_RS_SEL_R(R300_RS_SEL_K0) | + R300_RS_SEL_Q(R300_RS_SEL_K1); + } + + for (i = 0; i < tex_count; i++) { + rs->inst[i] |= R300_RS_INST_TEX_ID(i) | R300_RS_INST_TEX_CN_WRITE | + R300_RS_INST_TEX_ADDR(fp_offset); + fp_offset++; + } + + for (i = 0; i < col_count; i++) { + rs->inst[i] |= R300_RS_INST_COL_ID(i) | R300_RS_INST_COL_CN_WRITE | + R300_RS_INST_COL_ADDR(fp_offset); + fp_offset++; + } + } + + rs->count = (tex_count * 4) | (col_count << R300_IC_COUNT_SHIFT) | + R300_HIRES_EN; + + rs->inst_count = MAX2(MAX2(col_count - 1, tex_count - 1), 0); +} + +void r300_update_derived_state(struct r300_context* r300) +{ + if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER) { + r300_update_vertex_layout(r300); + } + + if (r300->dirty_state & R300_NEW_VERTEX_FORMAT) { + r300_update_rs_block(r300); + } +} diff --git a/src/gallium/drivers/r300/r300_state_derived.h b/src/gallium/drivers/r300/r300_state_derived.h new file mode 100644 index 00000000000..63ae8eb8d08 --- /dev/null +++ b/src/gallium/drivers/r300/r300_state_derived.h @@ -0,0 +1,34 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_STATE_DERIVED_H +#define R300_STATE_DERIVED_H + +#include "draw/draw_vertex.h" + +#include "r300_context.h" +#include "r300_reg.h" +#include "r300_state_inlines.h" + +void r300_update_derived_state(struct r300_context* r300); + +#endif /* R300_STATE_DERIVED_H */ diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h new file mode 100644 index 00000000000..fd92c71756b --- /dev/null +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -0,0 +1,384 @@ +/* + * Copyright 2009 Joakim Sindholt <opensource@zhasha.com> + * Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_STATE_INLINES_H +#define R300_STATE_INLINES_H + +#include "pipe/p_format.h" + +#include "r300_reg.h" + +/* Some maths. These should probably find their way to u_math, if needed. */ + +static INLINE int pack_float_16_6x(float f) { + return ((int)(f * 6.0) & 0xffff); +} + +/* Blend state. */ + +static INLINE uint32_t r300_translate_blend_function(int blend_func) +{ + switch (blend_func) { + case PIPE_BLEND_ADD: + return R300_COMB_FCN_ADD_CLAMP; + case PIPE_BLEND_SUBTRACT: + return R300_COMB_FCN_SUB_CLAMP; + case PIPE_BLEND_REVERSE_SUBTRACT: + return R300_COMB_FCN_RSUB_CLAMP; + case PIPE_BLEND_MIN: + return R300_COMB_FCN_MIN; + case PIPE_BLEND_MAX: + return R300_COMB_FCN_MAX; + default: + debug_printf("r300: Unknown blend function %d\n", blend_func); + break; + } + return 0; +} + +/* XXX we can also offer the D3D versions of some of these... */ +static INLINE uint32_t r300_translate_blend_factor(int blend_fact) +{ + switch (blend_fact) { + case PIPE_BLENDFACTOR_ONE: + return R300_BLEND_GL_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return R300_BLEND_GL_SRC_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return R300_BLEND_GL_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return R300_BLEND_GL_DST_ALPHA; + case PIPE_BLENDFACTOR_DST_COLOR: + return R300_BLEND_GL_DST_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return R300_BLEND_GL_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return R300_BLEND_GL_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return R300_BLEND_GL_CONST_ALPHA; + /* XXX WTF are these? + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: */ + case PIPE_BLENDFACTOR_ZERO: + return R300_BLEND_GL_ZERO; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return R300_BLEND_GL_ONE_MINUS_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return R300_BLEND_GL_ONE_MINUS_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return R300_BLEND_GL_ONE_MINUS_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return R300_BLEND_GL_ONE_MINUS_DST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return R300_BLEND_GL_ONE_MINUS_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return R300_BLEND_GL_ONE_MINUS_CONST_ALPHA; + /* XXX see above + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: */ + default: + debug_printf("r300: Unknown blend factor %d\n", blend_fact); + break; + } + return 0; +} + +/* DSA state. */ + +static INLINE uint32_t r300_translate_depth_stencil_function(int zs_func) +{ + switch (zs_func) { + case PIPE_FUNC_NEVER: + return R300_ZS_NEVER; + case PIPE_FUNC_LESS: + return R300_ZS_LESS; + case PIPE_FUNC_EQUAL: + return R300_ZS_EQUAL; + case PIPE_FUNC_LEQUAL: + return R300_ZS_LEQUAL; + case PIPE_FUNC_GREATER: + return R300_ZS_GREATER; + case PIPE_FUNC_NOTEQUAL: + return R300_ZS_NOTEQUAL; + case PIPE_FUNC_GEQUAL: + return R300_ZS_GEQUAL; + case PIPE_FUNC_ALWAYS: + return R300_ZS_ALWAYS; + default: + debug_printf("r300: Unknown depth/stencil function %d\n", + zs_func); + break; + } + return 0; +} + +static INLINE uint32_t r300_translate_stencil_op(int s_op) +{ + switch (s_op) { + case PIPE_STENCIL_OP_KEEP: + return R300_ZS_KEEP; + case PIPE_STENCIL_OP_ZERO: + return R300_ZS_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return R300_ZS_REPLACE; + case PIPE_STENCIL_OP_INCR: + return R300_ZS_INCR; + case PIPE_STENCIL_OP_DECR: + return R300_ZS_DECR; + case PIPE_STENCIL_OP_INCR_WRAP: + return R300_ZS_INCR_WRAP; + case PIPE_STENCIL_OP_DECR_WRAP: + return R300_ZS_DECR_WRAP; + case PIPE_STENCIL_OP_INVERT: + return R300_ZS_INVERT; + default: + debug_printf("r300: Unknown stencil op %d", s_op); + break; + } + return 0; +} + +static INLINE uint32_t r300_translate_alpha_function(int alpha_func) +{ + switch (alpha_func) { + case PIPE_FUNC_NEVER: + return R300_FG_ALPHA_FUNC_NEVER; + case PIPE_FUNC_LESS: + return R300_FG_ALPHA_FUNC_LESS; + case PIPE_FUNC_EQUAL: + return R300_FG_ALPHA_FUNC_EQUAL; + case PIPE_FUNC_LEQUAL: + return R300_FG_ALPHA_FUNC_LE; + case PIPE_FUNC_GREATER: + return R300_FG_ALPHA_FUNC_GREATER; + case PIPE_FUNC_NOTEQUAL: + return R300_FG_ALPHA_FUNC_NOTEQUAL; + case PIPE_FUNC_GEQUAL: + return R300_FG_ALPHA_FUNC_GE; + case PIPE_FUNC_ALWAYS: + return R300_FG_ALPHA_FUNC_ALWAYS; + default: + debug_printf("r300: Unknown alpha function %d", alpha_func); + break; + } + return 0; +} + +/* Texture sampler state. */ + +static INLINE uint32_t r300_translate_wrap(int wrap) +{ + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + return R300_TX_REPEAT; + case PIPE_TEX_WRAP_CLAMP: + return R300_TX_CLAMP; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return R300_TX_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return R300_TX_CLAMP_TO_BORDER; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return R300_TX_REPEAT | R300_TX_MIRRORED; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + return R300_TX_CLAMP | R300_TX_MIRRORED; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED; + default: + debug_printf("r300: Unknown texture wrap %d", wrap); + return 0; + } +} + +static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip) +{ + uint32_t retval = 0; + switch (min) { + case PIPE_TEX_FILTER_NEAREST: + retval |= R300_TX_MIN_FILTER_NEAREST; + break; + case PIPE_TEX_FILTER_LINEAR: + retval |= R300_TX_MIN_FILTER_LINEAR; + break; + case PIPE_TEX_FILTER_ANISO: + retval |= R300_TX_MIN_FILTER_ANISO; + break; + default: + debug_printf("r300: Unknown texture filter %d\n", min); + break; + } + switch (mag) { + case PIPE_TEX_FILTER_NEAREST: + retval |= R300_TX_MAG_FILTER_NEAREST; + break; + case PIPE_TEX_FILTER_LINEAR: + retval |= R300_TX_MAG_FILTER_LINEAR; + break; + case PIPE_TEX_FILTER_ANISO: + retval |= R300_TX_MAG_FILTER_ANISO; + break; + default: + debug_printf("r300: Unknown texture filter %d\n", mag); + break; + } + switch (mip) { + case PIPE_TEX_MIPFILTER_NONE: + retval |= R300_TX_MIN_FILTER_MIP_NONE; + break; + case PIPE_TEX_MIPFILTER_NEAREST: + retval |= R300_TX_MIN_FILTER_MIP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + retval |= R300_TX_MIN_FILTER_MIP_LINEAR; + break; + default: + debug_printf("r300: Unknown texture filter %d\n", mip); + break; + } + + return retval; +} + +static INLINE uint32_t r300_anisotropy(float max_aniso) +{ + if (max_aniso >= 16.0f) { + return R300_TX_MAX_ANISO_16_TO_1; + } else if (max_aniso >= 8.0f) { + return R300_TX_MAX_ANISO_8_TO_1; + } else if (max_aniso >= 4.0f) { + return R300_TX_MAX_ANISO_4_TO_1; + } else if (max_aniso >= 2.0f) { + return R300_TX_MAX_ANISO_2_TO_1; + } else { + return R300_TX_MAX_ANISO_1_TO_1; + } +} + +/* Buffer formats. */ + +static INLINE uint32_t r300_translate_colorformat(enum pipe_format format) +{ + switch (format) { + /* 8-bit buffers */ + case PIPE_FORMAT_I8_UNORM: + return R300_COLOR_FORMAT_I8; + /* 16-bit buffers */ + case PIPE_FORMAT_R5G6B5_UNORM: + return R300_COLOR_FORMAT_RGB565; + case PIPE_FORMAT_A1R5G5B5_UNORM: + return R300_COLOR_FORMAT_ARGB1555; + case PIPE_FORMAT_A4R4G4B4_UNORM: + return R300_COLOR_FORMAT_ARGB4444; + /* 32-bit buffers */ + case PIPE_FORMAT_A8R8G8B8_UNORM: + return R300_COLOR_FORMAT_ARGB8888; + /* XXX Not in pipe_format + case PIPE_FORMAT_A32R32G32B32: + return R300_COLOR_FORMAT_ARGB32323232; + case PIPE_FORMAT_A16R16G16B16: + return R300_COLOR_FORMAT_ARGB16161616; */ + /* XXX Not in pipe_format + case PIPE_FORMAT_A10R10G10B10_UNORM: + return R500_COLOR_FORMAT_ARGB10101010; + case PIPE_FORMAT_A2R10G10B10_UNORM: + return R500_COLOR_FORMAT_ARGB2101010; + case PIPE_FORMAT_I10_UNORM: + return R500_COLOR_FORMAT_I10; */ + default: + debug_printf("r300: Implementation error: " \ + "Got unsupported color format %s in %s\n", + pf_name(format), __FUNCTION__); + break; + } + return 0; +} + +static INLINE uint32_t r300_translate_zsformat(enum pipe_format format) +{ + switch (format) { + /* 16-bit depth, no stencil */ + case PIPE_FORMAT_Z16_UNORM: + return R300_DEPTHFORMAT_16BIT_INT_Z; + /* 24-bit depth, 8-bit stencil */ + case PIPE_FORMAT_Z24S8_UNORM: + return R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; + default: + debug_printf("r300: Implementation error: " \ + "Got unsupported ZS format %s in %s\n", + pf_name(format), __FUNCTION__); + break; + } + return 0; +} + +/* Non-CSO state. (For now.) */ + +static INLINE uint32_t r300_translate_gb_pipes(int pipe_count) +{ + switch (pipe_count) { + case 1: + return R300_GB_TILE_PIPE_COUNT_RV300; + break; + case 2: + return R300_GB_TILE_PIPE_COUNT_R300; + break; + case 3: + return R300_GB_TILE_PIPE_COUNT_R420_3P; + break; + case 4: + return R300_GB_TILE_PIPE_COUNT_R420; + break; + } + return 0; +} + +static INLINE uint32_t translate_vertex_data_type(int type) { + switch (type) { + case EMIT_1F: + case EMIT_1F_PSIZE: + return R300_DATA_TYPE_FLOAT_1; + break; + case EMIT_2F: + return R300_DATA_TYPE_FLOAT_2; + break; + case EMIT_3F: + return R300_DATA_TYPE_FLOAT_3; + break; + case EMIT_4F: + return R300_DATA_TYPE_FLOAT_4; + break; + case EMIT_4UB: + return R300_DATA_TYPE_BYTE; + break; + default: + debug_printf("r300: Implementation error: " + "Bad vertex data type!\n"); + assert(0); + break; + } + + return 0; +} + +#endif /* R300_STATE_INLINES_H */ diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c new file mode 100644 index 00000000000..3d51a8e65d2 --- /dev/null +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -0,0 +1,203 @@ +/* + * Copyright 2009 Joakim Sindholt <opensource@zhasha.com> + * Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_state_invariant.h" + +/* Calculate and emit invariant state. This is data that the 3D engine + * will probably want at the beginning of every CS, but it's not currently + * handled by any CSO setup, and in addition it doesn't really change much. + * + * Note that eventually this should be empty, but it's useful for development + * and general unduplication of code. */ +void r300_emit_invariant_state(struct r300_context* r300) +{ + struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; + CS_LOCALS(r300); + + BEGIN_CS(24 + (caps->has_tcl ? 2: 0)); + + /* Various GB enables */ + OUT_CS_REG(R300_GB_ENABLE, R300_GB_POINT_STUFF_ENABLE | + R300_GB_LINE_STUFF_ENABLE | R300_GB_TRIANGLE_STUFF_ENABLE); + /* Subpixel multisampling for AA */ + OUT_CS_REG(R300_GB_MSPOS0, 0x66666666); + OUT_CS_REG(R300_GB_MSPOS1, 0x66666666); + /* GB tile config and pipe setup */ + OUT_CS_REG(R300_GB_TILE_CONFIG, R300_GB_TILE_DISABLE | + r300_translate_gb_pipes(caps->num_frag_pipes)); + /* Source of fog depth */ + OUT_CS_REG(R300_GB_SELECT, R300_GB_FOG_SELECT_1_1_W); + /* AA enable */ + OUT_CS_REG(R300_GB_AA_CONFIG, 0x0); + /* GA errata fixes. */ + if (caps->is_r500) { + OUT_CS_REG(R300_GA_ENHANCE, + R300_GA_ENHANCE_DEADLOCK_CNTL_PREVENT_TCL | + R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE | + R500_GA_ENHANCE_REG_READWRITE_ENABLE | + R500_GA_ENHANCE_REG_NOSTALL_ENABLE); + } else { + OUT_CS_REG(R300_GA_ENHANCE, + R300_GA_ENHANCE_DEADLOCK_CNTL_PREVENT_TCL | + R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE); + } + + /* Fog block. */ + OUT_CS_REG(R300_FG_FOG_BLEND, 0x00000000); + OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x00000000); + OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x00000000); + OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x00000000); + OUT_CS_REG(R300_FG_DEPTH_SRC, 0x00000000); + + /* TCL-only stuff */ + if (caps->has_tcl) { + /* Amount of time to wait for vertex fetches in PVS */ + OUT_CS_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff); + } + + END_CS; + + /* XXX unsorted stuff from surface_fill */ + BEGIN_CS(99 + (caps->has_tcl ? 26 : 0)); + /* Flush PVS. */ + OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); + + OUT_CS_REG(R300_SE_VTE_CNTL, R300_VPORT_X_SCALE_ENA | + R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | + R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | + R300_VPORT_Z_OFFSET_ENA | R300_VTX_W0_FMT); + /* Max and min vertex index clamp. */ + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, 0xFFFFFF); + OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0x0); + /* XXX endian */ + if (caps->has_tcl) { + OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VC_NO_SWAP); + OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE | + R300_PS_UCP_MODE_CLIP_AS_TRIFAN); + OUT_CS_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4); + OUT_CS_32F(1.0); + OUT_CS_32F(1.0); + OUT_CS_32F(1.0); + OUT_CS_32F(1.0); + } else { + OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VC_NO_SWAP | + R300_VAP_TCL_BYPASS); + } + /* XXX magic number not in r300_reg */ + OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0xAAAAAAAA); + /* XXX point tex stuffing */ + OUT_CS_REG_SEQ(R300_GA_POINT_S0, 1); + OUT_CS_32F(0.0); + OUT_CS_REG_SEQ(R300_GA_POINT_S1, 1); + OUT_CS_32F(1.0); + OUT_CS_REG(R300_GA_TRIANGLE_STIPPLE, 0x5 | + (0x5 << R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT)); + /* XXX this big chunk should be refactored into rs_state */ + OUT_CS_REG(R300_GA_LINE_S0, 0x00000000); + OUT_CS_REG(R300_GA_LINE_S1, 0x3F800000); + OUT_CS_REG(R300_GA_SOLID_RG, 0x00000000); + OUT_CS_REG(R300_GA_SOLID_BA, 0x00000000); + OUT_CS_REG(R300_GA_POLY_MODE, 0x00000000); + OUT_CS_REG(R300_GA_ROUND_MODE, 0x00000001); + OUT_CS_REG(R300_GA_OFFSET, 0x00000000); + OUT_CS_REG(R300_GA_FOG_SCALE, 0x3DBF1412); + OUT_CS_REG(R300_GA_FOG_OFFSET, 0x00000000); + OUT_CS_REG(R300_SU_TEX_WRAP, 0x00000000); + OUT_CS_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF); + OUT_CS_REG(R300_SU_DEPTH_OFFSET, 0x00000000); + OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C); + OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525); + OUT_CS_REG(R300_RB3D_CCTL, 0x00000000); + OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0x0000000F); + OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000); + OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x00000000); + OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFFFFFFFF); + OUT_CS_REG(R300_ZB_FORMAT, 0x00000002); + OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, 0x00000003); + OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000); + OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000); + OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000); + OUT_CS_REG(R300_ZB_HIZ_PITCH, 0x00000000); + if (caps->has_tcl) { + OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0, + (R300_DATA_TYPE_FLOAT_4 << R300_DATA_TYPE_0_SHIFT) | + ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | + R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)); + } else { + OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0, + (R300_DATA_TYPE_FLOAT_4 << R300_DATA_TYPE_0_SHIFT) | + ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | + R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)); + } + OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0, + (R300_VAP_SWIZZLE_XYZW << R300_SWIZZLE0_SHIFT) | + (R300_VAP_SWIZZLE_XYZW << R300_SWIZZLE1_SHIFT)); + OUT_CS_REG(R300_VAP_VTX_STATE_CNTL, 0x1); + OUT_CS_REG(R300_VAP_VSM_VTX_ASSM, 0x405); + OUT_CS_REG(R300_SE_VTE_CNTL, 0x0000043F); + /* Vertex size. */ + OUT_CS_REG(R300_VAP_VTX_SIZE, 0x8); + OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0xAAAAAAAA); + OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_0, 0x00000003); + OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_1, 0x00000000); + OUT_CS_REG(R300_TX_ENABLE, 0x0); + + /* XXX */ + OUT_CS_REG(R300_SC_CLIP_RULE, 0xaaaa); + + OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4); + OUT_CS(R300_C0_SEL_B | R300_C1_SEL_G | R300_C2_SEL_R | R300_C3_SEL_A); + OUT_CS(R300_US_OUT_FMT_UNUSED); + OUT_CS(R300_US_OUT_FMT_UNUSED); + OUT_CS(R300_US_OUT_FMT_UNUSED); + OUT_CS_REG(R300_US_W_FMT, R300_W_FMT_W0); + /* XXX these magic numbers should be explained when + * this becomes a cached state object */ + if (caps->has_tcl) { + OUT_CS_REG(R300_VAP_CNTL, 0xA | + (0x5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (0xB << R300_VF_MAX_VTX_NUM_SHIFT) | + (caps->num_vert_fpus << R300_PVS_NUM_FPUS_SHIFT)); + OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_0, 0x00100000); + OUT_CS_REG(R300_VAP_PVS_CONST_CNTL, 0x00000000); + OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_1, 0x00000001); + /* XXX translate these back into normal instructions */ + OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x1); + OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0x0); + OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, 8); + OUT_CS(0x00F00203); + OUT_CS(0x00D10001); + OUT_CS(0x01248001); + OUT_CS(0x00000000); + OUT_CS(0x00F02203); + OUT_CS(0x00D10021); + OUT_CS(0x01248021); + OUT_CS(0x00000000); + } else { + OUT_CS_REG(R300_VAP_CNTL, 0xA | + (0x5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (0x5 << R300_VF_MAX_VTX_NUM_SHIFT) | + (caps->num_vert_fpus << R300_PVS_NUM_FPUS_SHIFT)); + } + END_CS; +} diff --git a/src/gallium/drivers/r300/r300_state_invariant.h b/src/gallium/drivers/r300/r300_state_invariant.h new file mode 100644 index 00000000000..8204bf9588b --- /dev/null +++ b/src/gallium/drivers/r300/r300_state_invariant.h @@ -0,0 +1,33 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_STATE_INVARIANT_H +#define R300_STATE_INVARIANT_H + +#include "r300_context.h" +#include "r300_cs.h" +#include "r300_reg.h" +#include "r300_state_inlines.h" + +void r300_emit_invariant_state(struct r300_context* r300); + +#endif /* R300_STATE_INVARIANT_H */ diff --git a/src/gallium/drivers/r300/r300_state_shader.c b/src/gallium/drivers/r300/r300_state_shader.c index e87172128fa..20b83bd15b1 100644 --- a/src/gallium/drivers/r300/r300_state_shader.c +++ b/src/gallium/drivers/r300/r300_state_shader.c @@ -22,12 +22,557 @@ #include "r300_state_shader.h" -void r300_translate_shader(struct r300_context* r300, - struct r300_fragment_shader* fs) +static void r300_copy_passthrough_shader(struct r300_fragment_shader* fs) { + struct r300_fragment_shader* pt = &r300_passthrough_fragment_shader; + fs->shader.stack_size = pt->shader.stack_size; + fs->alu_instruction_count = pt->alu_instruction_count; + fs->tex_instruction_count = pt->tex_instruction_count; + fs->indirections = pt->indirections; + fs->instructions[0] = pt->instructions[0]; } -void r500_translate_shader(struct r300_context* r300, - struct r500_fragment_shader* fs) +static void r500_copy_passthrough_shader(struct r500_fragment_shader* fs) { + struct r500_fragment_shader* pt = &r500_passthrough_fragment_shader; + fs->shader.stack_size = pt->shader.stack_size; + fs->instruction_count = pt->instruction_count; + fs->instructions[0] = pt->instructions[0]; +} + +static void r300_fs_declare(struct r300_fs_asm* assembler, + struct tgsi_full_declaration* decl) +{ + switch (decl->Declaration.File) { + case TGSI_FILE_INPUT: + switch (decl->Semantic.SemanticName) { + case TGSI_SEMANTIC_COLOR: + assembler->color_count++; + break; + case TGSI_SEMANTIC_GENERIC: + assembler->tex_count++; + break; + default: + debug_printf("r300: fs: Bad semantic declaration %d\n", + decl->Semantic.SemanticName); + break; + } + break; + case TGSI_FILE_OUTPUT: + case TGSI_FILE_CONSTANT: + break; + case TGSI_FILE_TEMPORARY: + assembler->temp_count++; + break; + default: + debug_printf("r300: fs: Bad file %d\n", decl->Declaration.File); + break; + } + + assembler->temp_offset = assembler->color_count + assembler->tex_count; +} + +static INLINE unsigned r300_fs_src(struct r300_fs_asm* assembler, + struct tgsi_src_register* src) +{ + switch (src->File) { + case TGSI_FILE_NULL: + return 0; + case TGSI_FILE_INPUT: + /* XXX may be wrong */ + return src->Index; + break; + case TGSI_FILE_TEMPORARY: + return src->Index + assembler->temp_offset; + break; + case TGSI_FILE_IMMEDIATE: + return (src->Index + assembler->imm_offset) | (1 << 8); + break; + case TGSI_FILE_CONSTANT: + /* XXX magic */ + return src->Index | (1 << 8); + break; + default: + debug_printf("r300: fs: Unimplemented src %d\n", src->File); + break; + } + return 0; +} + +static INLINE unsigned r300_fs_dst(struct r300_fs_asm* assembler, + struct tgsi_dst_register* dst) +{ + switch (dst->File) { + case TGSI_FILE_NULL: + /* This happens during KIL instructions. */ + return 0; + break; + case TGSI_FILE_OUTPUT: + return 0; + break; + case TGSI_FILE_TEMPORARY: + return dst->Index + assembler->temp_offset; + break; + default: + debug_printf("r300: fs: Unimplemented dst %d\n", dst->File); + break; + } + return 0; +} + +static INLINE unsigned r500_fix_swiz(unsigned s) +{ + /* For historical reasons, the swizzle values x, y, z, w, and 0 are + * equivalent to the actual machine code, but 1 is not. Thus, we just + * adjust it a bit... */ + if (s == TGSI_EXTSWIZZLE_ONE) { + return R500_SWIZZLE_ONE; + } else { + return s; + } +} + +static uint32_t r500_rgba_swiz(struct tgsi_full_src_register* reg) +{ + if (reg->SrcRegister.Extended) { + return r500_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleX) | + (r500_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleY) << 3) | + (r500_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleZ) << 6) | + (r500_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleW) << 9); + } else { + return reg->SrcRegister.SwizzleX | + (reg->SrcRegister.SwizzleY << 3) | + (reg->SrcRegister.SwizzleZ << 6) | + (reg->SrcRegister.SwizzleW << 9); + } +} + +static uint32_t r500_strq_swiz(struct tgsi_full_src_register* reg) +{ + return reg->SrcRegister.SwizzleX | + (reg->SrcRegister.SwizzleY << 2) | + (reg->SrcRegister.SwizzleZ << 4) | + (reg->SrcRegister.SwizzleW << 6); +} + +static INLINE uint32_t r500_rgb_swiz(struct tgsi_full_src_register* reg) +{ + /* Only the first 9 bits... */ + return (r500_rgba_swiz(reg) & 0x1ff) | + (reg->SrcRegister.Negate ? (1 << 9) : 0) | + (reg->SrcRegisterExtMod.Absolute ? (1 << 10) : 0); +} + +static INLINE uint32_t r500_alpha_swiz(struct tgsi_full_src_register* reg) +{ + /* Only the last 3 bits... */ + return (r500_rgba_swiz(reg) >> 9) | + (reg->SrcRegister.Negate ? (1 << 9) : 0) | + (reg->SrcRegisterExtMod.Absolute ? (1 << 10) : 0); +} + +static INLINE uint32_t r500_rgba_op(unsigned op) +{ + switch (op) { + case TGSI_OPCODE_EX2: + case TGSI_OPCODE_LG2: + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + return R500_ALU_RGBA_OP_SOP; + case TGSI_OPCODE_FRC: + return R500_ALU_RGBA_OP_FRC; + case TGSI_OPCODE_DP3: + return R500_ALU_RGBA_OP_DP3; + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_DPH: + return R500_ALU_RGBA_OP_DP4; + case TGSI_OPCODE_ABS: + case TGSI_OPCODE_CMP: + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: + return R500_ALU_RGBA_OP_CMP; + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_MAD: + case TGSI_OPCODE_MUL: + case TGSI_OPCODE_SUB: + return R500_ALU_RGBA_OP_MAD; + default: + return 0; + } +} + +static INLINE uint32_t r500_alpha_op(unsigned op) +{ + switch (op) { + case TGSI_OPCODE_EX2: + return R500_ALPHA_OP_EX2; + case TGSI_OPCODE_LG2: + return R500_ALPHA_OP_LN2; + case TGSI_OPCODE_RCP: + return R500_ALPHA_OP_RCP; + case TGSI_OPCODE_RSQ: + return R500_ALPHA_OP_RSQ; + case TGSI_OPCODE_FRC: + return R500_ALPHA_OP_FRC; + case TGSI_OPCODE_DP3: + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_DPH: + return R500_ALPHA_OP_DP; + case TGSI_OPCODE_ABS: + case TGSI_OPCODE_CMP: + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: + return R500_ALPHA_OP_CMP; + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_MAD: + case TGSI_OPCODE_MUL: + case TGSI_OPCODE_SUB: + return R500_ALPHA_OP_MAD; + default: + return 0; + } +} + +static INLINE uint32_t r500_tex_op(unsigned op) +{ + switch (op) { + case TGSI_OPCODE_KIL: + return R500_TEX_INST_TEXKILL; + case TGSI_OPCODE_TEX: + return R500_TEX_INST_LD; + case TGSI_OPCODE_TXB: + return R500_TEX_INST_LODBIAS; + case TGSI_OPCODE_TXP: + return R500_TEX_INST_PROJ; + default: + return 0; + } +} + +/* Setup an ALU operation. */ +static INLINE void r500_emit_alu(struct r500_fragment_shader* fs, + struct r300_fs_asm* assembler, + struct tgsi_full_dst_register* dst) +{ + int i = fs->instruction_count; + + if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { + fs->instructions[i].inst0 = R500_INST_TYPE_OUT | + R500_ALU_OMASK(dst->DstRegister.WriteMask); + } else { + fs->instructions[i].inst0 = R500_INST_TYPE_ALU | + R500_ALU_WMASK(dst->DstRegister.WriteMask); + } + + fs->instructions[i].inst0 |= R500_INST_TEX_SEM_WAIT; + + fs->instructions[i].inst4 = + R500_ALPHA_ADDRD(r300_fs_dst(assembler, &dst->DstRegister)); + fs->instructions[i].inst5 = + R500_ALU_RGBA_ADDRD(r300_fs_dst(assembler, &dst->DstRegister)); +} + +static INLINE void r500_emit_maths(struct r500_fragment_shader* fs, + struct r300_fs_asm* assembler, + struct tgsi_full_src_register* src, + struct tgsi_full_dst_register* dst, + unsigned op, + unsigned count) +{ + int i = fs->instruction_count; + + r500_emit_alu(fs, assembler, dst); + + switch (count) { + case 3: + fs->instructions[i].inst1 = + R500_RGB_ADDR2(r300_fs_src(assembler, &src[2].SrcRegister)); + fs->instructions[i].inst2 = + R500_ALPHA_ADDR2(r300_fs_src(assembler, &src[2].SrcRegister)); + fs->instructions[i].inst5 |= + R500_ALU_RGBA_SEL_C_SRC2 | + R500_SWIZ_RGBA_C(r500_rgb_swiz(&src[2])) | + R500_ALU_RGBA_ALPHA_SEL_C_SRC2 | + R500_SWIZ_ALPHA_C(r500_alpha_swiz(&src[2])); + case 2: + fs->instructions[i].inst1 |= + R500_RGB_ADDR1(r300_fs_src(assembler, &src[1].SrcRegister)); + fs->instructions[i].inst2 |= + R500_ALPHA_ADDR1(r300_fs_src(assembler, &src[1].SrcRegister)); + fs->instructions[i].inst3 = + R500_ALU_RGB_SEL_B_SRC1 | + R500_SWIZ_RGB_B(r500_rgb_swiz(&src[1])); + fs->instructions[i].inst4 |= + R500_SWIZ_ALPHA_B(r500_alpha_swiz(&src[1])) | + R500_ALPHA_SEL_B_SRC1; + case 1: + case 0: + default: + fs->instructions[i].inst1 |= + R500_RGB_ADDR0(r300_fs_src(assembler, &src[0].SrcRegister)); + fs->instructions[i].inst2 |= + R500_ALPHA_ADDR0(r300_fs_src(assembler, &src[0].SrcRegister)); + fs->instructions[i].inst3 |= + R500_ALU_RGB_SEL_A_SRC0 | + R500_SWIZ_RGB_A(r500_rgb_swiz(&src[0])); + fs->instructions[i].inst4 |= + R500_SWIZ_ALPHA_A(r500_alpha_swiz(&src[0])) | + R500_ALPHA_SEL_A_SRC0; + break; + } + + fs->instructions[i].inst4 |= r500_alpha_op(op); + fs->instructions[i].inst5 |= r500_rgba_op(op); + + fs->instruction_count++; +} + +static INLINE void r500_emit_tex(struct r500_fragment_shader* fs, + struct r300_fs_asm* assembler, + struct tgsi_full_src_register* src, + struct tgsi_full_dst_register* dst, + uint32_t op) +{ + int i = fs->instruction_count; + + fs->instructions[i].inst0 = R500_INST_TYPE_TEX | + R500_TEX_WMASK(dst->DstRegister.WriteMask) | + R500_INST_TEX_SEM_WAIT; + fs->instructions[i].inst1 = R500_TEX_ID(0) | + R500_TEX_SEM_ACQUIRE | //R500_TEX_IGNORE_UNCOVERED | + r500_tex_op(op); + fs->instructions[i].inst2 = + R500_TEX_SRC_ADDR(r300_fs_src(assembler, &src->SrcRegister)) | + R500_SWIZ_TEX_STRQ(r500_strq_swiz(src)) | + R500_TEX_DST_ADDR(r300_fs_dst(assembler, &dst->DstRegister)) | + R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G | + R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; + + if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { + fs->instructions[i].inst2 |= + R500_TEX_DST_ADDR(assembler->temp_count + + assembler->temp_offset); + + fs->instruction_count++; + + /* Setup and emit a MOV. */ + src[0].SrcRegister.Index = assembler->temp_count; + src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + + src[1] = src[0]; + src[2] = r500_constant_zero; + r500_emit_maths(fs, assembler, src, dst, TGSI_OPCODE_MOV, 3); + } else { + fs->instruction_count++; + } +} + +static void r500_fs_instruction(struct r500_fragment_shader* fs, + struct r300_fs_asm* assembler, + struct tgsi_full_instruction* inst) +{ + int i; + /* Switch between opcodes. When possible, prefer using the official + * AMD/ATI names for opcodes, please, as it facilitates using the + * documentation. */ + switch (inst->Instruction.Opcode) { + /* The simple scalar ops. */ + case TGSI_OPCODE_EX2: + case TGSI_OPCODE_LG2: + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + /* Copy red swizzle to alpha for src0 */ + inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleW = + inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleX; + inst->FullSrcRegisters[0].SrcRegister.SwizzleW = + inst->FullSrcRegisters[0].SrcRegister.SwizzleX; + /* Fall through */ + case TGSI_OPCODE_FRC: + r500_emit_maths(fs, assembler, inst->FullSrcRegisters, + &inst->FullDstRegisters[0], inst->Instruction.Opcode, 1); + break; + + /* The dot products. */ + case TGSI_OPCODE_DPH: + /* Set alpha swizzle to one for src0 */ + if (!inst->FullSrcRegisters[0].SrcRegister.Extended) { + inst->FullSrcRegisters[0].SrcRegister.Extended = TRUE; + inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleX = + inst->FullSrcRegisters[0].SrcRegister.SwizzleX; + inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleY = + inst->FullSrcRegisters[0].SrcRegister.SwizzleY; + inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleZ = + inst->FullSrcRegisters[0].SrcRegister.SwizzleZ; + } + inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleW = + TGSI_EXTSWIZZLE_ONE; + /* Fall through */ + case TGSI_OPCODE_DP3: + case TGSI_OPCODE_DP4: + r500_emit_maths(fs, assembler, inst->FullSrcRegisters, + &inst->FullDstRegisters[0], inst->Instruction.Opcode, 2); + break; + + /* Simple three-source operations. */ + case TGSI_OPCODE_CMP: + /* Swap src0 and src2 */ + inst->FullSrcRegisters[3] = inst->FullSrcRegisters[2]; + inst->FullSrcRegisters[2] = inst->FullSrcRegisters[0]; + inst->FullSrcRegisters[0] = inst->FullSrcRegisters[3]; + r500_emit_maths(fs, assembler, inst->FullSrcRegisters, + &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); + break; + + /* The MAD variants. */ + case TGSI_OPCODE_SUB: + /* Just like ADD, but flip the negation on src1 first */ + inst->FullSrcRegisters[1].SrcRegister.Negate = + !inst->FullSrcRegisters[1].SrcRegister.Negate; + /* Fall through */ + case TGSI_OPCODE_ADD: + /* Force src0 to one, move all registers over */ + inst->FullSrcRegisters[2] = inst->FullSrcRegisters[1]; + inst->FullSrcRegisters[1] = inst->FullSrcRegisters[0]; + inst->FullSrcRegisters[0] = r500_constant_one; + r500_emit_maths(fs, assembler, inst->FullSrcRegisters, + &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); + break; + case TGSI_OPCODE_MUL: + /* Force our src2 to zero */ + inst->FullSrcRegisters[2] = r500_constant_zero; + r500_emit_maths(fs, assembler, inst->FullSrcRegisters, + &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); + break; + case TGSI_OPCODE_MAD: + r500_emit_maths(fs, assembler, inst->FullSrcRegisters, + &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); + break; + + /* The MOV variants. */ + case TGSI_OPCODE_ABS: + /* Set absolute value modifiers. */ + inst->FullSrcRegisters[0].SrcRegisterExtMod.Absolute = TRUE; + /* Fall through */ + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: + /* src0 -> src1 and src2 forced to zero */ + inst->FullSrcRegisters[1] = inst->FullSrcRegisters[0]; + inst->FullSrcRegisters[2] = r500_constant_zero; + r500_emit_maths(fs, assembler, inst->FullSrcRegisters, + &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); + break; + + /* The texture instruction set. */ + case TGSI_OPCODE_KIL: + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXP: + r500_emit_tex(fs, assembler, &inst->FullSrcRegisters[0], + &inst->FullDstRegisters[0], inst->Instruction.Opcode); + break; + + /* This is the end. My only friend, the end. */ + case TGSI_OPCODE_END: + break; + default: + debug_printf("r300: fs: Bad opcode %d\n", + inst->Instruction.Opcode); + break; + } + + /* Clamp, if saturation flags are set. */ + if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) { + fs->instructions[fs->instruction_count - 1].inst0 |= + R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP; + } +} + +static void r500_fs_finalize(struct r500_fragment_shader* fs, + struct r300_fs_asm* assembler) +{ + fs->shader.stack_size = assembler->temp_count + assembler->temp_offset; + + /* XXX should this just go with OPCODE_END? */ + fs->instructions[fs->instruction_count - 1].inst0 |= + R500_INST_LAST; +} + +void r300_translate_fragment_shader(struct r300_context* r300, + struct r300_fragment_shader* fs) +{ + struct tgsi_parse_context parser; + + tgsi_parse_init(&parser, fs->shader.state.tokens); + + while (!tgsi_parse_end_of_tokens(&parser)) { + tgsi_parse_token(&parser); + } + + r300_copy_passthrough_shader(fs); +} + +void r500_translate_fragment_shader(struct r300_context* r300, + struct r500_fragment_shader* fs) +{ + struct tgsi_parse_context parser; + int i; + struct r300_constant_buffer* consts = + &r300->shader_constants[PIPE_SHADER_FRAGMENT]; + + struct r300_fs_asm* assembler = CALLOC_STRUCT(r300_fs_asm); + if (assembler == NULL) { + return; + } + /* Setup starting offset for immediates. */ + assembler->imm_offset = consts->user_count; + + tgsi_parse_init(&parser, fs->shader.state.tokens); + + while (!tgsi_parse_end_of_tokens(&parser)) { + tgsi_parse_token(&parser); + + /* This is seriously the lamest way to create fragment programs ever. + * I blame TGSI. */ + switch (parser.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + /* Allocated registers sitting at the beginning + * of the program. */ + r300_fs_declare(assembler, &parser.FullToken.FullDeclaration); + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: + debug_printf("r300: Emitting immediate to constant buffer, " + "position %d\n", + assembler->imm_offset + assembler->imm_count); + /* I am not amused by the length of these. */ + for (i = 0; i < 4; i++) { + consts->constants[assembler->imm_offset + + assembler->imm_count][i] = + parser.FullToken.FullImmediate.u.ImmediateFloat32[i] + .Float; + } + assembler->imm_count++; + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + r500_fs_instruction(fs, assembler, + &parser.FullToken.FullInstruction); + break; + } + + } + + debug_printf("r300: %d texs and %d colors, first free reg is %d\n", + assembler->tex_count, assembler->color_count, + assembler->tex_count + assembler->color_count); + + consts->count = consts->user_count + assembler->imm_count; + debug_printf("r300: %d total constants, " + "%d from user and %d from immediates\n", consts->count, + consts->user_count, assembler->imm_count); + r500_fs_finalize(fs, assembler); + + tgsi_dump(fs->shader.state.tokens); + r500_fs_dump(fs); + + tgsi_parse_free(&parser); + FREE(assembler); } diff --git a/src/gallium/drivers/r300/r300_state_shader.h b/src/gallium/drivers/r300/r300_state_shader.h index a20bd4276c6..06c0bb73789 100644 --- a/src/gallium/drivers/r300/r300_state_shader.h +++ b/src/gallium/drivers/r300/r300_state_shader.h @@ -23,13 +23,142 @@ #ifndef R300_STATE_SHADER_H #define R300_STATE_SHADER_H +#include "tgsi/tgsi_parse.h" + #include "r300_context.h" +#include "r300_debug.h" +#include "r300_reg.h" #include "r300_screen.h" -void r300_translate_shader(struct r300_context* r300, +/* XXX this all should find its way back to r300_reg */ +/* Swizzle tools */ +#define R500_SWIZZLE_ZERO 4 +#define R500_SWIZZLE_HALF 5 +#define R500_SWIZZLE_ONE 6 +#define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6)) +#define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6)) +#define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6)) +#define R500_SWIZ_MOD_NEG 1 +#define R500_SWIZ_MOD_ABS 2 +#define R500_SWIZ_MOD_NEG_ABS 3 +/* Swizzles for inst2 */ +#define R500_SWIZ_TEX_STRQ(x) ((x) << 8) +#define R500_SWIZ_TEX_RGBA(x) ((x) << 24) +/* Swizzles for inst3 */ +#define R500_SWIZ_RGB_A(x) ((x) << 2) +#define R500_SWIZ_RGB_B(x) ((x) << 15) +/* Swizzles for inst4 */ +#define R500_SWIZ_ALPHA_A(x) ((x) << 14) +#define R500_SWIZ_ALPHA_B(x) ((x) << 21) +/* Swizzle for inst5 */ +#define R500_SWIZ_RGBA_C(x) ((x) << 14) +#define R500_SWIZ_ALPHA_C(x) ((x) << 27) +/* Writemasks */ +#define R500_TEX_WMASK(x) ((x) << 11) +#define R500_ALU_WMASK(x) ((x) << 11) +#define R500_ALU_OMASK(x) ((x) << 15) + +/* TGSI constants. TGSI is like XML: If it can't solve your problems, you're + * not using enough of it. */ +static const struct tgsi_full_src_register r500_constant_zero = { + .SrcRegister.Extended = TRUE, + .SrcRegister.File = TGSI_FILE_NULL, + .SrcRegisterExtSwz.ExtSwizzleX = TGSI_EXTSWIZZLE_ZERO, + .SrcRegisterExtSwz.ExtSwizzleY = TGSI_EXTSWIZZLE_ZERO, + .SrcRegisterExtSwz.ExtSwizzleZ = TGSI_EXTSWIZZLE_ZERO, + .SrcRegisterExtSwz.ExtSwizzleW = TGSI_EXTSWIZZLE_ZERO, +}; + +static const struct tgsi_full_src_register r500_constant_one = { + .SrcRegister.Extended = TRUE, + .SrcRegister.File = TGSI_FILE_NULL, + .SrcRegisterExtSwz.ExtSwizzleX = TGSI_EXTSWIZZLE_ONE, + .SrcRegisterExtSwz.ExtSwizzleY = TGSI_EXTSWIZZLE_ONE, + .SrcRegisterExtSwz.ExtSwizzleZ = TGSI_EXTSWIZZLE_ONE, + .SrcRegisterExtSwz.ExtSwizzleW = TGSI_EXTSWIZZLE_ONE, +}; + +/* Temporary struct used to hold assembly state while putting together + * fragment programs. */ +struct r300_fs_asm { + /* Pipe context. */ + struct r300_context* r300; + /* Number of colors. */ + unsigned color_count; + /* Number of texcoords. */ + unsigned tex_count; + /* Offset for temporary registers. Inputs and temporaries have no + * distinguishing markings, so inputs start at 0 and the first usable + * temporary register is after all inputs. */ + unsigned temp_offset; + /* Number of requested temporary registers. */ + unsigned temp_count; + /* Offset for immediate constants. Neither R300 nor R500 can do four + * inline constants per source, so instead we copy immediates into the + * constant buffer. */ + unsigned imm_offset; + /* Number of immediate constants. */ + unsigned imm_count; +}; + +void r300_translate_fragment_shader(struct r300_context* r300, struct r300_fragment_shader* fs); -void r500_translate_shader(struct r300_context* r300, +void r500_translate_fragment_shader(struct r300_context* r300, struct r500_fragment_shader* fs); +static const struct r300_fragment_shader r300_passthrough_fragment_shader = { + /* XXX This is the emission code. TODO: decode + OUT_CS_REG(R300_US_CONFIG, 0); + OUT_CS_REG(R300_US_CODE_OFFSET, 0x0); + OUT_CS_REG(R300_US_CODE_ADDR_0, 0x0); + OUT_CS_REG(R300_US_CODE_ADDR_1, 0x0); + OUT_CS_REG(R300_US_CODE_ADDR_2, 0x0); + OUT_CS_REG(R300_US_CODE_ADDR_3, 0x400000); +*/ + .alu_instruction_count = 1, + .tex_instruction_count = 0, + .indirections = 1, + .shader.stack_size = 2, + + .instructions[0].alu_rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) | + R300_RGB_SWIZB(R300_ALU_ARGC_ONE) | + R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) | + R300_ALU_OUTC_MAD, + .instructions[0].alu_rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) | + R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ, + .instructions[0].alu_alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) | + R300_ALPHA_SWIZB(R300_ALU_ARGA_ONE) | + R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) | + R300_ALU_OUTA_MAD, + .instructions[0].alu_alpha_addr = R300_ALPHA_ADDR0(0) | + R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT, +}; + +static const struct r500_fragment_shader r500_passthrough_fragment_shader = { + .shader.stack_size = 0, + .instruction_count = 1, + .instructions[0].inst0 = R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | R500_INST_LAST | + R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK | + R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP, + .instructions[0].inst1 = + R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST | + R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST, + .instructions[0].inst2 = + R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST | + R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST, + .instructions[0].inst3 = + R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R | + R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B, + .instructions[0].inst4 = + R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A, + .instructions[0].inst5 = + R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 | + R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 | + R500_ALU_RGBA_A_SWIZ_0, +}; + #endif /* R300_STATE_SHADER_H */ diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c index 1e1f96a7f96..2cc0677e52c 100644 --- a/src/gallium/drivers/r300/r300_surface.c +++ b/src/gallium/drivers/r300/r300_surface.c @@ -1,5 +1,6 @@ /* * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Joakim Sindholt <opensource@zhasha.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -32,321 +33,161 @@ static void r300_surface_fill(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); CS_LOCALS(r300); - struct r300_capabilities* caps = ((struct r300_screen*)pipe->screen)->caps; + struct r300_capabilities* caps = r300_screen(pipe->screen)->caps; struct r300_texture* tex = (struct r300_texture*)dest->texture; int i; - float r, g, b, a; + float r, g, b, a, depth; + unsigned pixpitch = tex->stride / tex->tex.block.size; + r = (float)((color >> 16) & 0xff) / 255.0f; g = (float)((color >> 8) & 0xff) / 255.0f; b = (float)((color >> 0) & 0xff) / 255.0f; debug_printf("r300: Filling surface %p at (%d,%d)," - " dimensions %dx%d (stride %d), color 0x%x\n", - dest, x, y, w, h, dest->stride, color); + " dimensions %dx%d (pixel pitch %d), color 0x%x\n", + dest, x, y, w, h, pixpitch, color); /* Fallback? */ - if (0) { + if (tex->tex.format != PIPE_FORMAT_A8R8G8B8_UNORM) { debug_printf("r300: Falling back on surface clear..."); - void* map = pipe->screen->surface_map(pipe->screen, dest, - PIPE_BUFFER_USAGE_CPU_WRITE); - pipe_fill_rect(map, &dest->block, &dest->stride, x, y, w, h, color); - pipe->screen->surface_unmap(pipe->screen, dest); + util_surface_fill(pipe, dest, x, y, w, h, color); return; } -BEGIN_CS((caps->is_r500) ? 309 : 280); -R300_PACIFY; -OUT_CS_REG(R300_TX_INVALTAGS, 0x0); -R300_PACIFY; -/* Flush PVS. */ -OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); - -OUT_CS_REG(R300_SE_VTE_CNTL, R300_VPORT_X_SCALE_ENA | - R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | - R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | - R300_VPORT_Z_OFFSET_ENA | R300_VTX_W0_FMT); -/* Vertex size. */ -OUT_CS_REG(R300_VAP_VTX_SIZE, 0x8); -/* Max and min vertex index clamp. */ -OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, 0xFFFFFF); -OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0x0); -/* XXX endian */ -OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VC_NO_SWAP); -OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0, 0x0); -/* XXX magic number not in r300_reg */ -OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0xAAAAAAAA); -OUT_CS_REG(R300_VAP_CLIP_CNTL, 0x0); -OUT_CS_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4); -OUT_CS_32F(1.0); -OUT_CS_32F(1.0); -OUT_CS_32F(1.0); -OUT_CS_32F(1.0); -/* XXX is this too long? */ -OUT_CS_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xFFFF); -OUT_CS_REG(R300_GB_ENABLE, R300_GB_POINT_STUFF_ENABLE | - R300_GB_LINE_STUFF_ENABLE | R300_GB_TRIANGLE_STUFF_ENABLE); -/* XXX more magic numbers */ -OUT_CS_REG(R300_GB_MSPOS0, 0x66666666); -OUT_CS_REG(R300_GB_MSPOS1, 0x66666666); -/* XXX why doesn't classic Mesa write the number of pipes, too? */ -OUT_CS_REG(R300_GB_TILE_CONFIG, R300_GB_TILE_ENABLE | R300_GB_TILE_SIZE_16); -OUT_CS_REG(R300_GB_SELECT, R300_GB_FOG_SELECT_1_1_W); -OUT_CS_REG(R300_GB_AA_CONFIG, 0x0); -/* XXX point tex stuffing */ -OUT_CS_REG_SEQ(R300_GA_POINT_S0, 1); -OUT_CS_32F(0.0); -OUT_CS_REG_SEQ(R300_GA_POINT_S1, 1); -OUT_CS_32F(1.0); -OUT_CS_REG(R300_GA_TRIANGLE_STIPPLE, 0x5 | - (0x5 << R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT)); -/* XXX should this be related to the actual point size? */ -OUT_CS_REG(R300_GA_POINT_MINMAX, 0x6 | - (0x1800 << R300_GA_POINT_MINMAX_MAX_SHIFT)); -/* XXX this big chunk should be refactored into rs_state */ -OUT_CS_REG(R300_GA_LINE_CNTL, 0x00030006); -OUT_CS_REG(R300_GA_LINE_STIPPLE_CONFIG, 0x3BAAAAAB); -OUT_CS_REG(R300_GA_LINE_STIPPLE_VALUE, 0x00000000); -OUT_CS_REG(R300_GA_LINE_S0, 0x00000000); -OUT_CS_REG(R300_GA_LINE_S1, 0x3F800000); -OUT_CS_REG(R300_GA_ENHANCE, 0x00000002); -OUT_CS_REG(R300_GA_COLOR_CONTROL, 0x0003AAAA); -OUT_CS_REG(R300_GA_SOLID_RG, 0x00000000); -OUT_CS_REG(R300_GA_SOLID_BA, 0x00000000); -OUT_CS_REG(R300_GA_POLY_MODE, 0x00000000); -OUT_CS_REG(R300_GA_ROUND_MODE, 0x00000001); -OUT_CS_REG(R300_GA_OFFSET, 0x00000000); -OUT_CS_REG(R300_GA_FOG_SCALE, 0x3DBF1412); -OUT_CS_REG(R300_GA_FOG_OFFSET, 0x00000000); -OUT_CS_REG(R300_SU_TEX_WRAP, 0x00000000); -OUT_CS_REG(R300_SU_POLY_OFFSET_FRONT_SCALE, 0x00000000); -OUT_CS_REG(R300_SU_POLY_OFFSET_FRONT_OFFSET, 0x00000000); -OUT_CS_REG(R300_SU_POLY_OFFSET_BACK_SCALE, 0x00000000); -OUT_CS_REG(R300_SU_POLY_OFFSET_BACK_OFFSET, 0x00000000); -OUT_CS_REG(R300_SU_POLY_OFFSET_ENABLE, 0x00000000); -OUT_CS_REG(R300_SU_CULL_MODE, 0x00000000); -OUT_CS_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF); -OUT_CS_REG(R300_SU_DEPTH_OFFSET, 0x00000000); -OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C); -OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525); -OUT_CS_REG(R300_SC_SCREENDOOR, 0x00FFFFFF); -OUT_CS_REG(R300_FG_FOG_BLEND, 0x00000002); -OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x00000000); -OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x00000000); -OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x00000000); -OUT_CS_REG(R300_FG_DEPTH_SRC, 0x00000000); -OUT_CS_REG(R300_FG_DEPTH_SRC, 0x00000000); -OUT_CS_REG(R300_RB3D_CCTL, 0x00000000); -OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0x0000000F); - -/* XXX: Oh the wonderful unknown */ -OUT_CS_REG_SEQ(0x4E54, 8); -for (i = 0; i < 8; i++) - OUT_CS(0x00000000); -OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000); -OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x00000000); -OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFFFFFFFF); -OUT_CS_REG(R300_ZB_FORMAT, 0x00000002); -OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, 0x00000003); -OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000); -OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000); -OUT_CS_REG(0x4F30, 0x00000000); -OUT_CS_REG(0x4F34, 0x00000000); -OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000); -OUT_CS_REG(R300_ZB_HIZ_PITCH, 0x00000000); -R300_PACIFY; -if (caps->has_tcl) { - OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0, - (R300_DATA_TYPE_FLOAT_4 << R300_DATA_TYPE_0_SHIFT) | - ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | - R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)); -} else { - OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0, - (R300_DATA_TYPE_FLOAT_4 << R300_DATA_TYPE_0_SHIFT) | - ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | - R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)); -} -OUT_CS_REG(R300_FG_FOG_BLEND, 0x00000000); -OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0xF688F688); -OUT_CS_REG(R300_VAP_VTX_STATE_CNTL, 0x1); -OUT_CS_REG(R300_VAP_VSM_VTX_ASSM, 0x405); -OUT_CS_REG(R300_SE_VTE_CNTL, 0x0000043F); -OUT_CS_REG(R300_VAP_VTX_SIZE, 0x00000008); -OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0xAAAAAAAA); -OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_0, 0x00000003); -OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_1, 0x00000000); -OUT_CS_REG(R300_TX_ENABLE, 0x0); -/* XXX viewport setup */ -OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); -OUT_CS_32F(1.0); -OUT_CS_32F((float)x); -OUT_CS_32F(1.0); -OUT_CS_32F((float)y); -OUT_CS_32F(1.0); -OUT_CS_32F(0.0); - -if (caps->has_tcl) { - OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE | - R300_PS_UCP_MODE_CLIP_AS_TRIFAN); -} - -OUT_CS_REG(R300_GA_POINT_SIZE, ((h * 6) & R300_POINTSIZE_Y_MASK) | - ((w * 6) << R300_POINTSIZE_X_SHIFT)); - -/* XXX RS block and fp setup */ -if (caps->is_r500) { - OUT_CS_REG_SEQ(R500_RS_IP_0, 8); - for (i = 0; i < 8; i++) { - /* I like the operator macros more than the shift macros... */ - OUT_CS((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | - (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)); + r300_emit_blend_state(r300, &blend_clear_state); + r300_emit_blend_color_state(r300, &blend_color_clear_state); + r300_emit_dsa_state(r300, &dsa_clear_state); + r300_emit_rs_state(r300, &rs_clear_state); + + /* Fragment shader setup */ + if (caps->is_r500) { + r500_emit_fragment_shader(r300, &r500_passthrough_fragment_shader); + r300_emit_rs_block_state(r300, &r500_rs_block_clear_state); + } else { + r300_emit_fragment_shader(r300, &r300_passthrough_fragment_shader); + r300_emit_rs_block_state(r300, &r300_rs_block_clear_state); } - /* XXX */ - OUT_CS_REG_SEQ(R300_RS_COUNT, 2); - OUT_CS((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); - OUT_CS(0x0); - OUT_CS_REG(R500_RS_INST_0, R500_RS_INST_COL_CN_WRITE); - OUT_CS_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); - OUT_CS_REG(R500_US_PIXSIZE, 0x00000000); - OUT_CS_REG(R500_US_CODE_ADDR, R500_US_CODE_START_ADDR(0) | - R500_US_CODE_END_ADDR(1)); - OUT_CS_REG(R500_US_CODE_RANGE, R500_US_CODE_RANGE_ADDR(0) | - R500_US_CODE_RANGE_SIZE(1)); - OUT_CS_REG(R500_US_CODE_OFFSET, R500_US_CODE_OFFSET_ADDR(0)); - R300_PACIFY; - OUT_CS_REG(R500_GA_US_VECTOR_INDEX, - 0 | R500_GA_US_VECTOR_INDEX_TYPE_INSTR); - OUT_CS_REG(R500_GA_US_VECTOR_DATA, - R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT | R500_INST_LAST | - R500_INST_RGB_OMASK_R | R500_INST_RGB_OMASK_G | R500_INST_RGB_OMASK_B | - R500_INST_ALPHA_OMASK | R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP); - OUT_CS_REG(R500_GA_US_VECTOR_DATA, - R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST | - R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST); - OUT_CS_REG(R500_GA_US_VECTOR_DATA, - R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST | - R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST); - OUT_CS_REG(R500_GA_US_VECTOR_DATA, - R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R | - R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B | - R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R | - R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B); - OUT_CS_REG(R500_GA_US_VECTOR_DATA, - R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A); - OUT_CS_REG(R500_GA_US_VECTOR_DATA, - R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 | - R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 | - R500_ALU_RGBA_A_SWIZ_0); -} else { - OUT_CS_REG_SEQ(R300_RS_IP_0, 8); - for (i = 0; i < 8; i++) { - OUT_CS(R300_RS_SEL_T(R300_RS_SEL_K0) | - R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1)); - } - /* XXX */ - OUT_CS_REG_SEQ(R300_RS_COUNT, 2); - OUT_CS((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); - OUT_CS(1); - OUT_CS_REG(R300_RS_INST_0, R300_RS_INST_COL_CN_WRITE); - - /* XXX magic numbers */ - OUT_CS_REG(R300_US_CONFIG, 0); - OUT_CS_REG(R300_US_PIXSIZE, 2); - OUT_CS_REG(R300_US_CODE_OFFSET, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_0, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_1, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_2, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_3, 0x400000); - OUT_CS_REG(R300_US_ALU_RGB_INST_0, 0x50A80); - OUT_CS_REG(R300_US_ALU_RGB_ADDR_0, 0x1C000000); - OUT_CS_REG(R300_US_ALU_ALPHA_INST_0, 0x40889); - OUT_CS_REG(R300_US_ALU_ALPHA_ADDR_0, 0x1000000); - OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4); - OUT_CS(R300_C0_SEL_B | R300_C1_SEL_G | R300_C2_SEL_R | R300_C3_SEL_A); - OUT_CS(R300_US_OUT_FMT_UNUSED); - OUT_CS(R300_US_OUT_FMT_UNUSED); - OUT_CS(R300_US_OUT_FMT_UNUSED); - OUT_CS_REG(R300_US_W_FMT, R300_W_FMT_W0); -} -/* XXX these magic numbers should be explained when - * this becomes a cached state object */ -if (caps->has_tcl) { - OUT_CS_REG(R300_VAP_CNTL, 0xA | - (0x5 << R300_PVS_NUM_CNTLRS_SHIFT) | - (0xB << R300_VF_MAX_VTX_NUM_SHIFT) | - (caps->num_vert_fpus << R300_PVS_NUM_FPUS_SHIFT)); - OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_0, 0x00100000); - OUT_CS_REG(R300_VAP_PVS_CONST_CNTL, 0x00000000); - OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_1, 0x00000001); - R300_PACIFY; - /* XXX translate these back into normal instructions */ - OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x1); - OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0x0); - OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0xF00203); - OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0xD10001); - OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0x1248001); - OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0x0); - OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0xF02203); - OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0xD10021); - OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0x1248021); - OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0x0); -} else { - OUT_CS_REG(R300_VAP_CNTL, 0xA | - (0x5 << R300_PVS_NUM_CNTLRS_SHIFT) | - (0x5 << R300_VF_MAX_VTX_NUM_SHIFT) | - (caps->num_vert_fpus << R300_PVS_NUM_FPUS_SHIFT)); + BEGIN_CS(36); + + /* Viewport setup */ + OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); + OUT_CS_32F(1.0); + OUT_CS_32F((float)x); + OUT_CS_32F(1.0); + OUT_CS_32F((float)y); + OUT_CS_32F(1.0); + OUT_CS_32F(0.0); + + /* Pixel scissors */ + OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); + OUT_CS((x << R300_SCISSORS_X_SHIFT) | (y << R300_SCISSORS_Y_SHIFT)); + OUT_CS((w << R300_SCISSORS_X_SHIFT) | (h << R300_SCISSORS_Y_SHIFT)); + + /* The size of the point we're about to draw, in sixths of pixels */ + OUT_CS_REG(R300_GA_POINT_SIZE, + ((h * 6) & R300_POINTSIZE_Y_MASK) | + ((w * 6) << R300_POINTSIZE_X_SHIFT)); + + /* Flush colorbuffer and blend caches. */ + OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, + R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D | + R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL); + OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, + R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | + R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + + OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1); + OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_REG(R300_RB3D_COLORPITCH0, pixpitch | + r300_translate_colorformat(tex->tex.format)); + OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0x0000000F); + /* XXX Packet3 */ + OUT_CS(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8)); + OUT_CS(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING | + (1 << R300_PRIM_NUM_VERTICES_SHIFT)); + OUT_CS_32F(w / 2.0); + OUT_CS_32F(h / 2.0); + /* XXX this should be the depth value to clear to */ + OUT_CS_32F(1.0); + OUT_CS_32F(1.0); + OUT_CS_32F(r); + OUT_CS_32F(g); + OUT_CS_32F(b); + OUT_CS_32F(1.0); + + /* XXX figure out why this is 0xA and not 0x2 */ + OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA); + /* XXX OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, + R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | + R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); */ + + END_CS; + + r300->dirty_hw++; } -R300_PACIFY; -END_CS; -r300_emit_blend_state(r300, &blend_clear_state); -r300_emit_blend_color_state(r300, &blend_color_clear_state); -r300_emit_dsa_state(r300, &dsa_clear_state); - -BEGIN_CS(36); -R300_PACIFY; -/* Flush colorbuffer and blend caches. */ -OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, - R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D | - R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL); -OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, - R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | - R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); - -OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1); -OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); -/* XXX this should not be so rigid and it still doesn't work right */ -OUT_CS_REG(R300_RB3D_COLORPITCH0, (dest->stride >> 2) | R300_COLOR_FORMAT_ARGB8888); -OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0x0000000F); -/* XXX Packet3 */ -OUT_CS(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8)); -OUT_CS(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING | -(1 << R300_PRIM_NUM_VERTICES_SHIFT)); -OUT_CS_32F(w / 2.0); -OUT_CS_32F(h / 2.0); -/* XXX this should be the depth value to clear to */ -OUT_CS_32F(1.0); -OUT_CS_32F(1.0); -OUT_CS_32F(r); -OUT_CS_32F(g); -OUT_CS_32F(b); -OUT_CS_32F(1.0); - -/* XXX figure out why this is 0xA and not 0x2 */ -OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA); -/* XXX OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, - R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | - R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); */ -R300_PACIFY; - -END_CS; -FLUSH_CS; - - r300->dirty_state = R300_NEW_KITCHEN_SINK; +static void r300_surface_copy(struct pipe_context* pipe, + struct pipe_surface* dest, + unsigned destx, unsigned desty, + struct pipe_surface* src, + unsigned srcx, unsigned srcy, + unsigned w, unsigned h) +{ + struct r300_context* r300 = r300_context(pipe); + CS_LOCALS(r300); + struct r300_texture* srctex = (struct r300_texture*)src->texture; + struct r300_texture* desttex = (struct r300_texture*)dest->texture; + + unsigned pixpitch = srctex->stride / srctex->tex.block.size; + debug_printf("r300: Copying surface %p at (%d,%d) to %p at (%d, %d)," + " dimensions %dx%d (pixel pitch %d)\n", + src, srcx, srcy, dest, destx, desty, w, h, pixpitch); + + if (TRUE) { + debug_printf("r300: Falling back on surface_copy\n"); + return util_surface_copy(pipe, FALSE, dest, destx, desty, src, + srcx, srcy, w, h); + } +#if 0 + BEGIN_CS(); + OUT_CS_REG(RADEON_DEFAULT_SC_BOTTOM_RIGHT,(RADEON_DEFAULT_SC_RIGHT_MAX | + RADEON_DEFAULT_SC_BOTTOM_MAX)); + OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, (RADEON_GMC_DST_PITCH_OFFSET_CNTL | + RADEON_GMC_SRC_PITCH_OFFSET_CNTL | + RADEON_GMC_BRUSH_NONE | + (datatype << 8) | + RADEON_GMC_SRC_DATATYPE_COLOR | + RADEON_ROP[rop].rop | + RADEON_DP_SRC_SOURCE_MEMORY | + RADEON_GMC_CLR_CMP_CNTL_DIS)); + OUT_CS_REG(RADEON_DP_BRUSH_FRGD_CLR, 0xffffffff); + OUT_CS_REG(RADEON_DP_BRUSH_BKGD_CLR, 0x0); + OUT_CS_REG(RADEON_DP_SRC_FRGD_CLR, 0xffffffff); + OUT_CS_REG(RADEON_DP_SRC_BKGD_CLR, 0x0); + OUT_ACCEL_REG(RADEON_DP_WRITE_MASK, planemask); + OUT_ACCEL_REG(RADEON_DP_CNTL, ((info->accel_state->xdir >= 0 ? RADEON_DST_X_LEFT_TO_RIGHT : 0) | + (info->accel_state->ydir >= 0 ? RADEON_DST_Y_TOP_TO_BOTTOM : 0)); +); + + OUT_CS_REG_SEQ(RADEON_DST_PITCH_OFFSET, 1); + OUT_CS_RELOC(desttex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + + OUT_CS_REG_SEQ(RADEON_SRC_PITCH_OFFSET, 1); + OUT_CS_RELOC(srctex->buffer, 0, + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); + + OUT_CS_REG(RADEON_SRC_Y_X, (srcy << 16) | srcx); + OUT_CS_REG(RADEON_DST_Y_X, (desty << 16) | destx); + OUT_CS_REG(RADEON_DST_HEIGHT_WIDTH, (h << 16) | w); + OUT_CS_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL); + OUT_CS_REG(RADEON_WAIT_UNTIL, + RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE); + END_CS; +#endif } void r300_init_surface_functions(struct r300_context* r300) { r300->context.surface_fill = r300_surface_fill; + r300->context.surface_copy = r300_surface_copy; } diff --git a/src/gallium/drivers/r300/r300_surface.h b/src/gallium/drivers/r300/r300_surface.h index e1d53116a1f..b75b3ab84cf 100644 --- a/src/gallium/drivers/r300/r300_surface.h +++ b/src/gallium/drivers/r300/r300_surface.h @@ -31,6 +31,8 @@ #include "r300_context.h" #include "r300_cs.h" #include "r300_emit.h" +#include "r300_state_shader.h" +#include "r300_state_inlines.h" const struct r300_blend_state blend_clear_state = { .blend_control = 0x0, @@ -55,4 +57,38 @@ const struct r300_dsa_state dsa_clear_state = { .stencil_ref_bf = 0x0, }; +const struct r300_rs_state rs_clear_state = { + .point_minmax = 0x36000006, + .line_control = 0x00030006, + .depth_scale_front = 0x0, + .depth_offset_front = 0x0, + .depth_scale_back = 0x0, + .depth_offset_back = 0x0, + .polygon_offset_enable = 0x0, + .cull_mode = 0x0, + .line_stipple_config = 0x3BAAAAAB, + .line_stipple_value = 0x0, + .color_control = R300_SHADE_MODEL_FLAT, +}; + +const struct r300_rs_block r300_rs_block_clear_state = { + .ip[0] = R500_RS_SEL_S(R300_RS_SEL_K0) | + R500_RS_SEL_T(R300_RS_SEL_K0) | + R500_RS_SEL_R(R300_RS_SEL_K0) | + R500_RS_SEL_Q(R300_RS_SEL_K1), + .inst[0] = R300_RS_INST_COL_CN_WRITE, + .count = R300_IT_COUNT(0) | R300_IC_COUNT(1) | R300_HIRES_EN, + .inst_count = 0, +}; + +const struct r300_rs_block r500_rs_block_clear_state = { + .ip[0] = R500_RS_SEL_S(R500_RS_IP_PTR_K0) | + R500_RS_SEL_T(R500_RS_IP_PTR_K0) | + R500_RS_SEL_R(R500_RS_IP_PTR_K0) | + R500_RS_SEL_Q(R500_RS_IP_PTR_K1), + .inst[0] = R500_RS_INST_COL_CN_WRITE, + .count = R300_IT_COUNT(0) | R300_IC_COUNT(1) | R300_HIRES_EN, + .inst_count = 0, +}; + #endif /* R300_SURFACE_H */ diff --git a/src/gallium/drivers/r300/r300_swtcl_emit.c b/src/gallium/drivers/r300/r300_swtcl_emit.c index f6e98d23e98..3db09514c69 100644 --- a/src/gallium/drivers/r300/r300_swtcl_emit.c +++ b/src/gallium/drivers/r300/r300_swtcl_emit.c @@ -21,109 +21,315 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "draw/draw_pipe.h" +#include "draw/draw_vbuf.h" #include "util/u_memory.h" #include "r300_cs.h" #include "r300_context.h" #include "r300_reg.h" +#include "r300_state_derived.h" -/* r300_swtcl_emit: Primitive vertex emission using an immediate - * vertex buffer and no HW TCL. */ +/* r300_swtcl_emit: Vertex and index buffer primitive emission. No HW TCL. */ -struct swtcl_stage { +struct r300_swtcl_render { /* Parent class */ - struct draw_stage draw; - + struct vbuf_render base; + + /* Pipe context */ struct r300_context* r300; + + /* Vertex information */ + size_t vertex_size; + unsigned prim; + unsigned hwprim; + + /* VBO */ + struct pipe_buffer* vbo; + size_t vbo_size; + size_t vbo_offset; + void* vbo_map; + size_t vbo_alloc_size; + size_t vbo_max_used; }; -static INLINE struct swtcl_stage* swtcl_stage(struct draw_stage* draw) { - return (struct swtcl_stage*)draw; +static INLINE struct r300_swtcl_render* +r300_swtcl_render(struct vbuf_render* render) +{ + return (struct r300_swtcl_render*)render; } -static void r300_emit_vertex(struct r300_context* r300, - const struct vertex_header* vertex) +static const struct vertex_info* +r300_swtcl_render_get_vertex_info(struct vbuf_render* render) { - /* XXX */ + struct r300_swtcl_render* r300render = r300_swtcl_render(render); + struct r300_context* r300 = r300render->r300; + + r300_update_derived_state(r300); + + return &r300->vertex_info; } -static INLINE void r300_emit_prim(struct draw_stage* draw, - struct prim_header* prim, - unsigned hwprim, - unsigned count) +static boolean r300_swtcl_render_allocate_vertices(struct vbuf_render* render, + ushort vertex_size, + ushort count) { - struct r300_context* r300 = swtcl_stage(draw)->r300; - CS_LOCALS(r300); - int i; + struct r300_swtcl_render* r300render = r300_swtcl_render(render); + struct r300_context* r300 = r300render->r300; + struct pipe_screen* screen = r300->context.screen; + size_t size = (size_t)vertex_size * (size_t)count; - r300_emit_dirty_state(r300); + if (r300render->vbo) { + pipe_buffer_reference(&r300render->vbo, NULL); + } + + r300render->vbo_size = MAX2(size, r300render->vbo_alloc_size); + r300render->vbo_offset = 0; + r300render->vbo = pipe_buffer_create(screen, + 64, + PIPE_BUFFER_USAGE_VERTEX, + r300render->vbo_size); - /* XXX should be count * vtx size */ - BEGIN_CS(2 + count + 6); - OUT_CS(CP_PACKET3(R200_3D_DRAW_IMMD_2, count)); - OUT_CS(hwprim | R300_PRIM_WALK_RING | - (count << R300_PRIM_NUM_VERTICES_SHIFT)); + r300render->vertex_size = vertex_size; - for (i = 0; i < count; i++) { - r300_emit_vertex(r300, prim->v[i]); + if (r300render->vbo) { + return TRUE; + } else { + return FALSE; } - R300_PACIFY; - END_CS; } -/* Just as an aside... - * - * Radeons can do many more primitives: - * - Line strip - * - Triangle fan - * - Triangle strip - * - Line loop - * - Quads - * - Quad strip - * - Polygons - * - * The following were just the only ones in Draw. */ +static void* r300_swtcl_render_map_vertices(struct vbuf_render* render) +{ + struct r300_swtcl_render* r300render = r300_swtcl_render(render); + struct pipe_screen* screen = r300render->r300->context.screen; -static void r300_emit_point(struct draw_stage* draw, struct prim_header* prim) + r300render->vbo_map = pipe_buffer_map(screen, r300render->vbo, + PIPE_BUFFER_USAGE_CPU_WRITE); + + return (unsigned char*)r300render->vbo_map + r300render->vbo_offset; +} + +static void r300_swtcl_render_unmap_vertices(struct vbuf_render* render, + ushort min, + ushort max) { - r300_emit_prim(draw, prim, R300_PRIM_TYPE_POINT, 1); + struct r300_swtcl_render* r300render = r300_swtcl_render(render); + struct pipe_screen* screen = r300render->r300->context.screen; + + r300render->vbo_max_used = MAX2(r300render->vbo_max_used, + r300render->vertex_size * (max + 1)); + + pipe_buffer_unmap(screen, r300render->vbo); } -static void r300_emit_line(struct draw_stage* draw, struct prim_header* prim) +static void r300_swtcl_render_release_vertices(struct vbuf_render* render) { - r300_emit_prim(draw, prim, R300_PRIM_TYPE_LINE, 2); + struct r300_swtcl_render* r300render = r300_swtcl_render(render); + + pipe_buffer_reference(&r300render->vbo, NULL); } -static void r300_emit_tri(struct draw_stage* draw, struct prim_header* prim) +static boolean r300_swtcl_render_set_primitive(struct vbuf_render* render, + unsigned prim) { - r300_emit_prim(draw, prim, R300_PRIM_TYPE_TRI_LIST, 3); + struct r300_swtcl_render* r300render = r300_swtcl_render(render); + r300render->prim = prim; + + switch (prim) { + case PIPE_PRIM_POINTS: + r300render->hwprim = R300_VAP_VF_CNTL__PRIM_POINTS; + break; + case PIPE_PRIM_LINES: + r300render->hwprim = R300_VAP_VF_CNTL__PRIM_LINES; + break; + case PIPE_PRIM_LINE_LOOP: + r300render->hwprim = R300_VAP_VF_CNTL__PRIM_LINE_LOOP; + break; + case PIPE_PRIM_LINE_STRIP: + r300render->hwprim = R300_VAP_VF_CNTL__PRIM_LINE_STRIP; + break; + case PIPE_PRIM_TRIANGLES: + r300render->hwprim = R300_VAP_VF_CNTL__PRIM_TRIANGLES; + break; + case PIPE_PRIM_TRIANGLE_STRIP: + r300render->hwprim = R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP; + break; + case PIPE_PRIM_TRIANGLE_FAN: + r300render->hwprim = R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN; + break; + case PIPE_PRIM_QUADS: + r300render->hwprim = R300_VAP_VF_CNTL__PRIM_QUADS; + break; + case PIPE_PRIM_QUAD_STRIP: + r300render->hwprim = R300_VAP_VF_CNTL__PRIM_QUAD_STRIP; + break; + case PIPE_PRIM_POLYGON: + r300render->hwprim = R300_VAP_VF_CNTL__PRIM_POLYGON; + break; + default: + return FALSE; + break; + } + + return TRUE; } -static void r300_swtcl_flush(struct draw_stage* draw, unsigned flags) +static void prepare_render(struct r300_swtcl_render* render, unsigned count) { + struct r300_context* r300 = render->r300; + int i; + + CS_LOCALS(r300); + + /* Make sure that all possible state is emitted. */ + r300_emit_dirty_state(r300); + + debug_printf("r300: Preparing vertex buffer %p for render, " + "vertex size %d, vertex count %d\n", render->vbo, + r300->vertex_info.vinfo.size, count); + /* Set the pointer to our vertex buffer. The emitted values are this: + * PACKET3 [3D_LOAD_VBPNTR] + * COUNT [1] + * FORMAT [size | stride << 8] + * OFFSET [0] + * VBPNTR [relocated BO] + */ + BEGIN_CS(7); + OUT_CS(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 3)); + OUT_CS(1); + OUT_CS(r300->vertex_info.vinfo.size | + (r300->vertex_info.vinfo.size << 8)); + OUT_CS(render->vbo_offset); + OUT_CS_RELOC(render->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); + END_CS; } -static void r300_reset_stipple(struct draw_stage* draw) +static void r300_swtcl_render_draw_arrays(struct vbuf_render* render, + unsigned start, + unsigned count) { - /* XXX */ + struct r300_swtcl_render* r300render = r300_swtcl_render(render); + struct r300_context* r300 = r300render->r300; + struct pipe_screen* screen = r300->context.screen; + + CS_LOCALS(r300); + + r300render->vbo_offset = start; + + prepare_render(r300render, count); + + debug_printf("r300: Doing vbuf render, count %d\n", count); + + BEGIN_CS(2); + OUT_CS(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0)); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | + r300render->hwprim); + END_CS; +} + +static void r300_swtcl_render_draw(struct vbuf_render* render, + const ushort* indices, + uint count) +{ + struct r300_swtcl_render* r300render = r300_swtcl_render(render); + struct r300_context* r300 = r300render->r300; + struct pipe_screen* screen = r300->context.screen; + struct pipe_buffer* index_buffer; + void* index_map; + + CS_LOCALS(r300); + + count /= 4; + + prepare_render(r300render, count); + + /* Send our indices into an index buffer. */ + index_buffer = pipe_buffer_create(screen, 64, PIPE_BUFFER_USAGE_VERTEX, + count * 4); + if (!index_buffer) { + return; + } + + index_map = pipe_buffer_map(screen, index_buffer, + PIPE_BUFFER_USAGE_CPU_WRITE); + memcpy(index_map, indices, count * 4); + pipe_buffer_unmap(screen, index_buffer); + + debug_printf("r300: Doing indexbuf render, count %d\n", count); +#if 0 + BEGIN_CS(5); + OUT_CS(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0)); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | + r300render->hwprim | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); + + OUT_CS(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2)); + OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2)); + OUT_CS_RELOC(index_buffer, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); + END_CS; +#endif +} + +static void r300_swtcl_render_destroy(struct vbuf_render* render) +{ + FREE(render); } -static void r300_swtcl_destroy(struct draw_stage* draw) +static struct vbuf_render* r300_swtcl_render_create(struct r300_context* r300) { - FREE(draw); + struct r300_swtcl_render* r300render = CALLOC_STRUCT(r300_swtcl_render); + struct pipe_screen* screen = r300->context.screen; + + r300render->r300 = r300; + + /* XXX find real numbers plz */ + r300render->base.max_vertex_buffer_bytes = 128 * 1024; + r300render->base.max_indices = 16 * 1024; + + r300render->base.get_vertex_info = r300_swtcl_render_get_vertex_info; + r300render->base.allocate_vertices = r300_swtcl_render_allocate_vertices; + r300render->base.map_vertices = r300_swtcl_render_map_vertices; + r300render->base.unmap_vertices = r300_swtcl_render_unmap_vertices; + r300render->base.set_primitive = r300_swtcl_render_set_primitive; + r300render->base.draw = r300_swtcl_render_draw; + r300render->base.draw_arrays = r300_swtcl_render_draw_arrays; + r300render->base.release_vertices = r300_swtcl_render_release_vertices; + r300render->base.destroy = r300_swtcl_render_destroy; + + /* XXX bonghits ahead + r300render->vbo_alloc_size = 128 * 4096; + r300render->vbo_size = r300render->vbo_alloc_size; + r300render->vbo_offset = 0; + r300render->vbo = pipe_buffer_create(screen, + 64, + PIPE_BUFFER_USAGE_VERTEX, + r300render->vbo_size); + r300render->vbo_map = pipe_buffer_map(screen, + r300render->vbo, + PIPE_BUFFER_USAGE_CPU_WRITE); + pipe_buffer_unmap(screen, r300render->vbo); */ + + return &r300render->base; } struct draw_stage* r300_draw_swtcl_stage(struct r300_context* r300) { - struct swtcl_stage* swtcl = CALLOC_STRUCT(swtcl_stage); + struct vbuf_render* render; + struct draw_stage* stage; + + render = r300_swtcl_render_create(r300); + + if (!render) { + return NULL; + } + + stage = draw_vbuf_stage(r300->draw, render); + + if (!stage) { + render->destroy(render); + return NULL; + } - swtcl->r300 = r300; - swtcl->draw.point = r300_emit_point; - swtcl->draw.line = r300_emit_line; - swtcl->draw.tri = r300_emit_tri; - swtcl->draw.flush = r300_swtcl_flush; - swtcl->draw.reset_stipple_counter = r300_reset_stipple; - swtcl->draw.destroy = r300_swtcl_destroy; + draw_set_render(r300->draw, render); - return &swtcl->draw; + return stage; } diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index ff812c09f80..6cdea3d2854 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -27,12 +27,37 @@ static int minify(int i) return MAX2(1, i >> 1); } +static void r300_setup_texture_state(struct r300_texture* tex, + unsigned width, + unsigned height, + unsigned pitch) +{ + struct r300_texture_state* state = &tex->state; + + state->format0 = R300_TX_WIDTH((width - 1) & 0x7ff) | + R300_TX_HEIGHT((height - 1) & 0x7ff) | R300_TX_PITCH_EN; + + /* XXX */ + state->format1 = R300_TX_FORMAT_A8R8G8B8; + + state->format2 = pitch - 1; + + /* XXX + if (width > 2048) { + state->pitch |= R300_TXWIDTH_11; + } + if (height > 2048) { + state->pitch |= R300_TXHEIGHT_11; + } */ +} + static void r300_setup_miptree(struct r300_texture* tex) { struct pipe_texture* base = &tex->tex; int stride, size, offset; + int i; - for (int i = 0; i <= base->last_level; i++) { + for (i = 0; i <= base->last_level; i++) { if (i > 0) { base->width[i] = minify(base->width[i-1]); base->height[i] = minify(base->height[i-1]); @@ -43,13 +68,16 @@ static void r300_setup_miptree(struct r300_texture* tex) base->nblocksy[i] = pf_get_nblocksy(&base->block, base->width[i]); /* Radeons enjoy things in multiples of 32. */ - /* XXX NPOT -> 64, not 32 */ + /* XXX this can be 32 when POT */ stride = (base->nblocksx[i] * base->block.size + 63) & ~63; size = stride * base->nblocksy[i] * base->depth[i]; - /* XXX 64 for NPOT */ tex->offset[i] = (tex->size + 63) & ~63; tex->size = tex->offset[i] + size; + + if (i == 0) { + tex->stride = stride; + } } } @@ -67,12 +95,16 @@ static struct pipe_texture* } tex->tex = *template; - tex->tex.refcount = 1; + pipe_reference_init(&tex->tex.reference, 1); tex->tex.screen = screen; r300_setup_miptree(tex); - tex->buffer = screen->buffer_create(screen, 63, + /* XXX */ + r300_setup_texture_state(tex, tex->tex.width[0], tex->tex.height[0], + tex->tex.width[0]); + + tex->buffer = screen->buffer_create(screen, 64, PIPE_BUFFER_USAGE_PIXEL, tex->size); @@ -84,24 +116,13 @@ static struct pipe_texture* return (struct pipe_texture*)tex; } -static void r300_texture_release(struct pipe_screen* screen, - struct pipe_texture** texture) +static void r300_texture_destroy(struct pipe_texture* texture) { - if (!*texture) { - return; - } - - (*texture)->refcount--; - - if ((*texture)->refcount <= 0) { - struct r300_texture* tex = (struct r300_texture*)*texture; + struct r300_texture* tex = (struct r300_texture*)texture; - pipe_buffer_reference(screen, &tex->buffer, NULL); + pipe_buffer_reference(&tex->buffer, NULL); - FREE(tex); - } - - *texture = NULL; + FREE(tex); } static struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, @@ -119,17 +140,11 @@ static struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, offset = tex->offset[level]; if (surface) { - surface->refcount = 1; + pipe_reference_init(&surface->reference, 1); pipe_texture_reference(&surface->texture, texture); surface->format = texture->format; surface->width = texture->width[level]; surface->height = texture->height[level]; - surface->block = texture->block; - surface->nblocksx = texture->nblocksx[level]; - surface->nblocksy = texture->nblocksy[level]; - /* XXX save the actual stride instead plz kthnxbai */ - surface->stride = - (texture->nblocksx[level] * texture->block.size + 63) & ~63; surface->offset = offset; surface->usage = flags; surface->status = PIPE_SURFACE_STATUS_DEFINED; @@ -138,19 +153,10 @@ static struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, return surface; } -static void r300_tex_surface_release(struct pipe_screen* screen, - struct pipe_surface** surface) +static void r300_tex_surface_destroy(struct pipe_surface* s) { - struct pipe_surface* s = *surface; - - s->refcount--; - - if (s->refcount <= 0) { - pipe_texture_reference(&s->texture, NULL); - FREE(s); - } - - *surface = NULL; + pipe_texture_reference(&s->texture, NULL); + FREE(s); } static struct pipe_texture* @@ -173,12 +179,15 @@ static struct pipe_texture* } tex->tex = *base; - tex->tex.refcount = 1; + pipe_reference_init(&tex->tex.reference, 1); tex->tex.screen = screen; - /* XXX tex->stride = *stride; */ + tex->stride = *stride; + + r300_setup_texture_state(tex, tex->tex.width[0], tex->tex.height[0], + tex->stride); - pipe_buffer_reference(screen, &tex->buffer, buffer); + pipe_buffer_reference(&tex->buffer, buffer); return (struct pipe_texture*)tex; } @@ -186,8 +195,26 @@ static struct pipe_texture* void r300_init_screen_texture_functions(struct pipe_screen* screen) { screen->texture_create = r300_texture_create; - screen->texture_release = r300_texture_release; + screen->texture_destroy = r300_texture_destroy; screen->get_tex_surface = r300_get_tex_surface; - screen->tex_surface_release = r300_tex_surface_release; + screen->tex_surface_destroy = r300_tex_surface_destroy; screen->texture_blanket = r300_texture_blanket; } + +boolean r300_get_texture_buffer(struct pipe_texture* texture, + struct pipe_buffer** buffer, + unsigned* stride) +{ + struct r300_texture* tex = (struct r300_texture*)texture; + if (!tex) { + return FALSE; + } + + pipe_buffer_reference(buffer, tex->buffer); + + if (stride) { + *stride = tex->stride; + } + + return TRUE; +} diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 7964229a94f..98fb5c9a08f 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -28,7 +28,16 @@ #include "util/u_math.h" #include "r300_context.h" +#include "r300_reg.h" void r300_init_screen_texture_functions(struct pipe_screen* screen); +#ifndef R300_WINSYS_H + +boolean r300_get_texture_buffer(struct pipe_texture* texture, + struct pipe_buffer** buffer, + unsigned* stride); + +#endif /* R300_WINSYS_H */ + #endif /* R300_TEXTURE_H */ diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 5a3a2128927..baa95282c33 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -33,10 +33,16 @@ extern "C" { #include "pipe/p_defines.h" #include "pipe/p_state.h" +#include "pipe/internal/p_winsys_screen.h" struct radeon_cs; struct r300_winsys { + /* Parent class */ + struct pipe_winsys base; + + /* Opaque Radeon-specific winsys object. */ + void* radeon_winsys; /* PCI ID */ uint32_t pci_id; @@ -47,10 +53,6 @@ struct r300_winsys { /* CS object. This is very much like Intel's batchbuffer. * Fill it full of dwords and relocs and then submit. * Repeat as needed. */ - /* Note: Unlike Mesa's version of this, we don't keep a copy of the CSM - * that was used to create this CS. Is this a good idea? */ - /* Note: The pipe driver doesn't know how to use this. This is purely - * for the winsys. */ struct radeon_cs* cs; /* Check to see if there's room for commands. */ @@ -84,9 +86,12 @@ struct r300_winsys { }; struct pipe_context* r300_create_context(struct pipe_screen* screen, - struct pipe_winsys* winsys, struct r300_winsys* r300_winsys); +boolean r300_get_texture_buffer(struct pipe_texture* texture, + struct pipe_buffer** buffer, + unsigned* stride); + #ifdef __cplusplus } #endif |