diff options
Diffstat (limited to 'src/gallium/drivers/cell/spu/spu_render.c')
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_render.c | 356 |
1 files changed, 0 insertions, 356 deletions
diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c deleted file mode 100644 index 14987e3c3a2..00000000000 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ /dev/null @@ -1,356 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include <stdio.h> -#include <libmisc.h> -#include <spu_mfcio.h> - -#include "spu_main.h" -#include "spu_render.h" -#include "spu_shuffle.h" -#include "spu_tri.h" -#include "spu_tile.h" -#include "cell/common.h" -#include "util/u_memory.h" - - -/** - * Given a rendering command's bounding box (in pixels) compute the - * location of the corresponding screen tile bounding box. - */ -static INLINE void -tile_bounding_box(const struct cell_command_render *render, - uint *txmin, uint *tymin, - uint *box_num_tiles, uint *box_width_tiles) -{ -#if 0 - /* Debug: full-window bounding box */ - uint txmax = spu.fb.width_tiles - 1; - uint tymax = spu.fb.height_tiles - 1; - *txmin = 0; - *tymin = 0; - *box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; - *box_width_tiles = spu.fb.width_tiles; - (void) render; - (void) txmax; - (void) tymax; -#else - uint txmax, tymax, box_height_tiles; - - *txmin = (uint) render->xmin / TILE_SIZE; - *tymin = (uint) render->ymin / TILE_SIZE; - txmax = (uint) render->xmax / TILE_SIZE; - tymax = (uint) render->ymax / TILE_SIZE; - if (txmax >= spu.fb.width_tiles) - txmax = spu.fb.width_tiles-1; - if (tymax >= spu.fb.height_tiles) - tymax = spu.fb.height_tiles-1; - *box_width_tiles = txmax - *txmin + 1; - box_height_tiles = tymax - *tymin + 1; - *box_num_tiles = *box_width_tiles * box_height_tiles; -#endif -#if 0 - printf("SPU %u: bounds: %g, %g ... %g, %g\n", spu.init.id, - render->xmin, render->ymin, render->xmax, render->ymax); - printf("SPU %u: tiles: %u, %u .. %u, %u\n", - spu.init.id, *txmin, *tymin, txmax, tymax); - ASSERT(render->xmin <= render->xmax); - ASSERT(render->ymin <= render->ymax); -#endif -} - - -/** Check if the tile at (tx,ty) belongs to this SPU */ -static INLINE boolean -my_tile(uint tx, uint ty) -{ - return (spu.fb.width_tiles * ty + tx) % spu.init.num_spus == spu.init.id; -} - - -/** - * Start fetching non-clear color/Z tiles from main memory - */ -static INLINE void -get_cz_tiles(uint tx, uint ty) -{ - if (spu.read_depth_stencil) { - if (spu.cur_ztile_status != TILE_STATUS_CLEAR) { - //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty); - get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1); - spu.cur_ztile_status = TILE_STATUS_GETTING; - } - } - - if (spu.cur_ctile_status != TILE_STATUS_CLEAR) { - //printf("SPU %u: getting C tile %u, %u\n", spu.init.id, tx, ty); - get_tile(tx, ty, &spu.ctile, TAG_READ_TILE_COLOR, 0); - spu.cur_ctile_status = TILE_STATUS_GETTING; - } -} - - -/** - * Start putting dirty color/Z tiles back to main memory - */ -static INLINE void -put_cz_tiles(uint tx, uint ty) -{ - if (spu.cur_ztile_status == TILE_STATUS_DIRTY) { - /* tile was modified and needs to be written back */ - //printf("SPU %u: put dirty Z tile %u, %u\n", spu.init.id, tx, ty); - put_tile(tx, ty, &spu.ztile, TAG_WRITE_TILE_Z, 1); - spu.cur_ztile_status = TILE_STATUS_DEFINED; - } - else if (spu.cur_ztile_status == TILE_STATUS_GETTING) { - /* tile was never used */ - spu.cur_ztile_status = TILE_STATUS_DEFINED; - //printf("SPU %u: put getting Z tile %u, %u\n", spu.init.id, tx, ty); - } - - if (spu.cur_ctile_status == TILE_STATUS_DIRTY) { - /* tile was modified and needs to be written back */ - //printf("SPU %u: put dirty C tile %u, %u\n", spu.init.id, tx, ty); - put_tile(tx, ty, &spu.ctile, TAG_WRITE_TILE_COLOR, 0); - spu.cur_ctile_status = TILE_STATUS_DEFINED; - } - else if (spu.cur_ctile_status == TILE_STATUS_GETTING) { - /* tile was never used */ - spu.cur_ctile_status = TILE_STATUS_DEFINED; - //printf("SPU %u: put getting C tile %u, %u\n", spu.init.id, tx, ty); - } -} - - -/** - * Wait for 'put' of color/z tiles to complete. - */ -static INLINE void -wait_put_cz_tiles(void) -{ - wait_on_mask(1 << TAG_WRITE_TILE_COLOR); - if (spu.read_depth_stencil) { - wait_on_mask(1 << TAG_WRITE_TILE_Z); - } -} - - -/** - * Render primitives - * \param pos_incr returns value indicating how may words to skip after - * this command in the batch buffer - */ -void -cmd_render(const struct cell_command_render *render, uint *pos_incr) -{ - /* we'll DMA into these buffers */ - PIPE_ALIGN_VAR(16) ubyte vertex_data[CELL_BUFFER_SIZE]; - const uint vertex_size = render->vertex_size; /* in bytes */ - /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; - uint index_bytes; - const ubyte *vertices; - const ushort *indexes; - uint i, j; - uint num_tiles; - - D_PRINTF(CELL_DEBUG_CMD, - "RENDER prim=%u num_vert=%u num_ind=%u inline_vert=%u\n", - render->prim_type, - render->num_verts, - render->num_indexes, - render->inline_verts); - - ASSERT(sizeof(*render) % 4 == 0); - ASSERT(total_vertex_bytes % 16 == 0); - ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES); - ASSERT(render->num_indexes % 3 == 0); - - - /* indexes are right after the render command in the batch buffer */ - indexes = (const ushort *) (render + 1); - index_bytes = ROUNDUP8(render->num_indexes * 2); - *pos_incr = index_bytes / 8 + sizeof(*render) / 8; - - - if (render->inline_verts) { - /* Vertices are after indexes in batch buffer at next 16-byte addr */ - vertices = (const ubyte *) render + (*pos_incr * 8); - vertices = (const ubyte *) align_pointer((void *) vertices, 16); - ASSERT_ALIGN16(vertices); - *pos_incr = ((vertices + total_vertex_bytes) - (ubyte *) render) / 8; - } - else { - /* Begin DMA fetch of vertex buffer */ - ubyte *src = spu.init.buffers[render->vertex_buf]; - ubyte *dest = vertex_data; - - /* skip vertex data we won't use */ -#if 01 - src += render->min_index * vertex_size; - dest += render->min_index * vertex_size; - total_vertex_bytes -= render->min_index * vertex_size; -#endif - ASSERT(total_vertex_bytes % 16 == 0); - ASSERT_ALIGN16(dest); - ASSERT_ALIGN16(src); - - mfc_get(dest, /* in vertex_data[] array */ - (unsigned int) src, /* src in main memory */ - total_vertex_bytes, /* size */ - TAG_VERTEX_BUFFER, - 0, /* tid */ - 0 /* rid */); - - vertices = vertex_data; - - wait_on_mask(1 << TAG_VERTEX_BUFFER); - } - - - /** - ** find tiles which intersect the prim bounding box - **/ - uint txmin, tymin, box_width_tiles, box_num_tiles; - tile_bounding_box(render, &txmin, &tymin, - &box_num_tiles, &box_width_tiles); - - - /* make sure any pending clears have completed */ - wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */ - - - num_tiles = 0; - - /** - ** loop over tiles, rendering tris - **/ - for (i = 0; i < box_num_tiles; i++) { - const uint tx = txmin + i % box_width_tiles; - const uint ty = tymin + i / box_width_tiles; - - ASSERT(tx < spu.fb.width_tiles); - ASSERT(ty < spu.fb.height_tiles); - - if (!my_tile(tx, ty)) - continue; - - num_tiles++; - - spu.cur_ctile_status = spu.ctile_status[ty][tx]; - spu.cur_ztile_status = spu.ztile_status[ty][tx]; - - get_cz_tiles(tx, ty); - - uint drawn = 0; - - const qword vertex_sizes = (qword)spu_splats(vertex_size); - const qword verticess = (qword)spu_splats((uint)vertices); - - ASSERT_ALIGN16(&indexes[0]); - - const uint num_indexes = render->num_indexes; - - /* loop over tris - * &indexes[0] will be 16 byte aligned. This loop is heavily unrolled - * avoiding variable rotates when extracting vertex indices. - */ - for (j = 0; j < num_indexes; j += 24) { - /* Load three vectors, containing 24 ushort indices */ - const qword* lower_qword = (qword*)&indexes[j]; - const qword indices0 = lower_qword[0]; - const qword indices1 = lower_qword[1]; - const qword indices2 = lower_qword[2]; - - /* stores three indices for each tri n in slots 0, 1 and 2 of vsn */ - /* Straightforward rotates for these */ - qword vs0 = indices0; - qword vs1 = si_shlqbyi(indices0, 6); - qword vs3 = si_shlqbyi(indices1, 2); - qword vs4 = si_shlqbyi(indices1, 8); - qword vs6 = si_shlqbyi(indices2, 4); - qword vs7 = si_shlqbyi(indices2, 10); - - /* For tri 2 and 5, the three indices are split across two machine - * words - rotate and combine */ - const qword tmp2a = si_shlqbyi(indices0, 12); - const qword tmp2b = si_rotqmbyi(indices1, 12|16); - qword vs2 = si_selb(tmp2a, tmp2b, si_fsmh(si_from_uint(0x20))); - - const qword tmp5a = si_shlqbyi(indices1, 14); - const qword tmp5b = si_rotqmbyi(indices2, 14|16); - qword vs5 = si_selb(tmp5a, tmp5b, si_fsmh(si_from_uint(0x60))); - - /* unpack indices from halfword slots to word slots */ - vs0 = si_shufb(vs0, vs0, SHUFB8(0,A,0,B,0,C,0,0)); - vs1 = si_shufb(vs1, vs1, SHUFB8(0,A,0,B,0,C,0,0)); - vs2 = si_shufb(vs2, vs2, SHUFB8(0,A,0,B,0,C,0,0)); - vs3 = si_shufb(vs3, vs3, SHUFB8(0,A,0,B,0,C,0,0)); - vs4 = si_shufb(vs4, vs4, SHUFB8(0,A,0,B,0,C,0,0)); - vs5 = si_shufb(vs5, vs5, SHUFB8(0,A,0,B,0,C,0,0)); - vs6 = si_shufb(vs6, vs6, SHUFB8(0,A,0,B,0,C,0,0)); - vs7 = si_shufb(vs7, vs7, SHUFB8(0,A,0,B,0,C,0,0)); - - /* Calculate address of vertex in vertices[] */ - vs0 = si_mpya(vs0, vertex_sizes, verticess); - vs1 = si_mpya(vs1, vertex_sizes, verticess); - vs2 = si_mpya(vs2, vertex_sizes, verticess); - vs3 = si_mpya(vs3, vertex_sizes, verticess); - vs4 = si_mpya(vs4, vertex_sizes, verticess); - vs5 = si_mpya(vs5, vertex_sizes, verticess); - vs6 = si_mpya(vs6, vertex_sizes, verticess); - vs7 = si_mpya(vs7, vertex_sizes, verticess); - - /* Select the appropriate call based on the number of vertices - * remaining */ - switch(num_indexes - j) { - default: drawn += tri_draw(vs7, tx, ty); - case 21: drawn += tri_draw(vs6, tx, ty); - case 18: drawn += tri_draw(vs5, tx, ty); - case 15: drawn += tri_draw(vs4, tx, ty); - case 12: drawn += tri_draw(vs3, tx, ty); - case 9: drawn += tri_draw(vs2, tx, ty); - case 6: drawn += tri_draw(vs1, tx, ty); - case 3: drawn += tri_draw(vs0, tx, ty); - } - } - - //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3); - - /* write color/z tiles back to main framebuffer, if dirtied */ - put_cz_tiles(tx, ty); - - wait_put_cz_tiles(); /* XXX seems unnecessary... */ - - spu.ctile_status[ty][tx] = spu.cur_ctile_status; - spu.ztile_status[ty][tx] = spu.cur_ztile_status; - } - - D_PRINTF(CELL_DEBUG_CMD, - "RENDER done (%u tiles hit)\n", - num_tiles); -} |