/* * Mesa 3-D graphics library * Version: 7.5 * * Copyright (C) 2009 VMware, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "main/glheader.h" #include "main/context.h" #include "main/macros.h" #include "program.h" #include "prog_instruction.h" #include "prog_optimize.h" #include "prog_print.h" #define MAX_LOOP_NESTING 50 static GLboolean dbg = GL_FALSE; /** * In 'prog' remove instruction[i] if removeFlags[i] == TRUE. * \return number of instructions removed */ static GLuint remove_instructions(struct gl_program *prog, const GLboolean *removeFlags) { GLint i, removeEnd = 0, removeCount = 0; GLuint totalRemoved = 0; /* go backward */ for (i = prog->NumInstructions - 1; i >= 0; i--) { if (removeFlags[i]) { totalRemoved++; if (removeCount == 0) { /* begin a run of instructions to remove */ removeEnd = i; removeCount = 1; } else { /* extend the run of instructions to remove */ removeCount++; } } else { /* don't remove this instruction, but check if the preceeding * instructions are to be removed. */ if (removeCount > 0) { GLint removeStart = removeEnd - removeCount + 1; _mesa_delete_instructions(prog, removeStart, removeCount); removeStart = removeCount = 0; /* reset removal info */ } } } return totalRemoved; } /** * Remap register indexes according to map. * \param prog the program to search/replace * \param file the type of register file to search/replace * \param map maps old register indexes to new indexes */ static void replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[]) { GLuint i; for (i = 0; i < prog->NumInstructions; i++) { struct prog_instruction *inst = prog->Instructions + i; const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); GLuint j; for (j = 0; j < numSrc; j++) { if (inst->SrcReg[j].File == file) { GLuint index = inst->SrcReg[j].Index; ASSERT(map[index] >= 0); inst->SrcReg[j].Index = map[index]; } } if (inst->DstReg.File == file) { const GLuint index = inst->DstReg.Index; ASSERT(map[index] >= 0); inst->DstReg.Index = map[index]; } } } /** * Consolidate temporary registers to use low numbers. For example, if the * shader only uses temps 4, 5, 8, replace them with 0, 1, 2. */ static void _mesa_consolidate_registers(struct gl_program *prog) { GLboolean tempUsed[MAX_PROGRAM_TEMPS]; GLint tempMap[MAX_PROGRAM_TEMPS]; GLuint tempMax = 0, i; if (dbg) { _mesa_printf("Optimize: Begin register consolidation\n"); } memset(tempUsed, 0, sizeof(tempUsed)); for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { tempMap[i] = -1; } /* set tempUsed[i] if temporary [i] is referenced */ for (i = 0; i < prog->NumInstructions; i++) { const struct prog_instruction *inst = prog->Instructions + i; const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); GLuint j; for (j = 0; j < numSrc; j++) { if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { const GLuint index = inst->SrcReg[j].Index; ASSERT(index < MAX_PROGRAM_TEMPS); tempUsed[index] = GL_TRUE; tempMax = MAX2(tempMax, index); break; } } if (inst->DstReg.File == PROGRAM_TEMPORARY) { const GLuint index = inst->DstReg.Index; ASSERT(index < MAX_PROGRAM_TEMPS); tempUsed[index] = GL_TRUE; tempMax = MAX2(tempMax, index); } } /* allocate a new index for each temp that's used */ { GLuint freeTemp = 0; for (i = 0; i <= tempMax; i++) { if (tempUsed[i]) { tempMap[i] = freeTemp++; /*_mesa_printf("replace %u with %u\n", i, tempMap[i]);*/ } } if (freeTemp == tempMax + 1) { /* no consolidation possible */ return; } if (dbg) { _mesa_printf("Replace regs 0..%u with 0..%u\n", tempMax, freeTemp-1); } } replace_regs(prog, PROGRAM_TEMPORARY, tempMap); if (dbg) { _mesa_printf("Optimize: End register consolidation\n"); } } /** * Remove dead instructions from the given program. * This is very primitive for now. Basically look for temp registers * that are written to but never read. Remove any instructions that * write to such registers. Be careful with condition code setters. */ static void _mesa_remove_dead_code(struct gl_program *prog) { GLboolean tempWritten[MAX_PROGRAM_TEMPS], tempRead[MAX_PROGRAM_TEMPS]; GLboolean *removeInst; /* per-instruction removal flag */ GLuint i, rem; memset(tempWritten, 0, sizeof(tempWritten)); memset(tempRead, 0, sizeof(tempRead)); if (dbg) { _mesa_printf("Optimize: Begin dead code removal\n"); /*_mesa_print_program(prog);*/ } removeInst = (GLboolean *) _mesa_calloc(prog->NumInstructions * sizeof(GLboolean)); /* Determine which temps are read and written */ for (i = 0; i < prog->NumInstructions; i++) { const struct prog_instruction *inst = prog->Instructions + i; const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); GLuint j; /* check src regs */ for (j = 0; j < numSrc; j++) { if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { const GLuint index = inst->SrcReg[j].Index; ASSERT(index < MAX_PROGRAM_TEMPS); if (inst->SrcReg[j].RelAddr) { if (dbg) _mesa_printf("abort remove dead code (indirect temp)\n"); return; } tempRead[index] = GL_TRUE; } } /* check dst reg */ if (inst->DstReg.File == PROGRAM_TEMPORARY) { const GLuint index = inst->DstReg.Index; ASSERT(index < MAX_PROGRAM_TEMPS); if (inst->DstReg.RelAddr) { if (dbg) _mesa_printf("abort remove dead code (indirect temp)\n"); return; } tempWritten[index] = GL_TRUE; if (inst->CondUpdate) { /* If we're writing to this register and setting condition * codes we cannot remove the instruction. Prevent removal * by setting the 'read' flag. */ tempRead[index] = GL_TRUE; } } } if (dbg) { for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { if (tempWritten[i] && !tempRead[i]) _mesa_printf("Remove writes to tmp %u\n", i); } } /* find instructions that write to dead registers, flag for removal */ for (i = 0; i < prog->NumInstructions; i++) { const struct prog_instruction *inst = prog->Instructions + i; if (inst->DstReg.File == PROGRAM_TEMPORARY) { GLint index = inst->DstReg.Index; removeInst[i] = (tempWritten[index] && !tempRead[index]); if (dbg && removeInst[i]) { _mesa_printf("Remove inst %u: ", i); _mesa_print_instruction(inst); } } } /* now remove the instructions which aren't needed */ rem = remove_instructions(prog, removeInst); _mesa_free(removeInst); if (dbg) { _mesa_printf("Optimize: End dead code removal. %u instructions removed\n", rem); /*_mesa_print_program(prog);*/ } } enum temp_use { READ, WRITE, FLOW, END }; /** * Scan forward in program from 'start' for the next occurance of TEMP[index]. * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator * that we can't look further. */ static enum temp_use find_next_temp_use(const struct gl_program *prog, GLuint start, GLuint index) { GLuint i; for (i = start; i < prog->NumInstructions; i++) { const struct prog_instruction *inst = prog->Instructions + i; switch (inst->Opcode) { case OPCODE_BGNLOOP: case OPCODE_ENDLOOP: case OPCODE_BGNSUB: case OPCODE_ENDSUB: return FLOW; default: { const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); GLuint j; for (j = 0; j < numSrc; j++) { if (inst->SrcReg[j].File == PROGRAM_TEMPORARY && inst->SrcReg[j].Index == index) return READ; } if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == index) return WRITE; } } } return END; } /** * Try to remove extraneous MOV instructions from the given program. */ static void _mesa_remove_extra_moves(struct gl_program *prog) { GLboolean *removeInst; /* per-instruction removal flag */ GLuint i, rem, loopNesting = 0, subroutineNesting = 0; if (dbg) { _mesa_printf("Optimize: Begin remove extra moves\n"); _mesa_print_program(prog); } removeInst = (GLboolean *) _mesa_calloc(prog->NumInstructions * sizeof(GLboolean)); /* * Look for sequences such as this: * FOO tmpX, arg0, arg1; * MOV tmpY, tmpX; * and convert into: * FOO tmpY, arg0, arg1; */ for (i = 0; i < prog->NumInstructions; i++) { const struct prog_instruction *inst = prog->Instructions + i; switch (inst->Opcode) { case OPCODE_BGNLOOP: loopNesting++; break; case OPCODE_ENDLOOP: loopNesting--; break; case OPCODE_BGNSUB: subroutineNesting++; break; case OPCODE_ENDSUB: subroutineNesting--; break; case OPCODE_MOV: if (i > 0 && loopNesting == 0 && subroutineNesting == 0 && inst->SrcReg[0].File == PROGRAM_TEMPORARY && inst->SrcReg[0].Swizzle == SWIZZLE_XYZW) { /* see if this MOV can be removed */ const GLuint tempIndex = inst->SrcReg[0].Index; struct prog_instruction *prevInst; GLuint prevI; /* get pointer to previous instruction */ prevI = i - 1; while (removeInst[prevI] && prevI > 0) prevI--; prevInst = prog->Instructions + prevI; if (prevInst->DstReg.File == PROGRAM_TEMPORARY && prevInst->DstReg.Index == tempIndex && prevInst->DstReg.WriteMask == WRITEMASK_XYZW) { enum temp_use next_use = find_next_temp_use(prog, i + 1, tempIndex); if (next_use == WRITE || next_use == END) { /* OK, we can safely remove this MOV instruction. * Transform: * prevI: FOO tempIndex, x, y; * i: MOV z, tempIndex; * Into: * prevI: FOO z, x, y; */ /* patch up prev inst */ prevInst->DstReg.File = inst->DstReg.File; prevInst->DstReg.Index = inst->DstReg.Index; /* flag this instruction for removal */ removeInst[i] = GL_TRUE; if (dbg) { _mesa_printf("Remove MOV at %u\n", i); _mesa_printf("new prev inst %u: ", prevI); _mesa_print_instruction(prevInst); } } } } break; default: ; /* nothing */ } } /* now remove the instructions which aren't needed */ rem = remove_instructions(prog, removeInst); if (dbg) { _mesa_printf("Optimize: End remove extra moves. %u instructions removed\n", rem); /*_mesa_print_program(prog);*/ } } /** A live register interval */ struct interval { GLuint Reg; /** The temporary register index */ GLuint Start, End; /** Start/end instruction numbers */ }; /** A list of register intervals */ struct interval_list { GLuint Num; struct interval Intervals[MAX_PROGRAM_TEMPS]; }; static void append_interval(struct interval_list *list, const struct interval *inv) { list->Intervals[list->Num++] = *inv; } /** Insert interval inv into list, sorted by interval end */ static void insert_interval_by_end(struct interval_list *list, const struct interval *inv) { /* XXX we could do a binary search insertion here since list is sorted */ GLint i = list->Num - 1; while (i >= 0 && list->Intervals[i].End > inv->End) { list->Intervals[i + 1] = list->Intervals[i]; i--; } list->Intervals[i + 1] = *inv; list->Num++; #ifdef DEBUG { GLuint i; for (i = 0; i + 1 < list->Num; i++) { ASSERT(list->Intervals[i].End <= list->Intervals[i + 1].End); } } #endif } /** Remove the given interval from the interval list */ static void remove_interval(struct interval_list *list, const struct interval *inv) { /* XXX we could binary search since list is sorted */ GLuint k; for (k = 0; k < list->Num; k++) { if (list->Intervals[k].Reg == inv->Reg) { /* found, remove it */ ASSERT(list->Intervals[k].Start == inv->Start); ASSERT(list->Intervals[k].End == inv->End); while (k < list->Num - 1) { list->Intervals[k] = list->Intervals[k + 1]; k++; } list->Num--; return; } } } /** called by qsort() */ static int compare_start(const void *a, const void *b) { const struct interval *ia = (const struct interval *) a; const struct interval *ib = (const struct interval *) b; if (ia->Start < ib->Start) return -1; else if (ia->Start > ib->Start) return +1; else return 0; } /** sort the interval list according to interval starts */ static void sort_interval_list_by_start(struct interval_list *list) { qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start); #ifdef DEBUG { GLuint i; for (i = 0; i + 1 < list->Num; i++) { ASSERT(list->Intervals[i].Start <= list->Intervals[i + 1].Start); } } #endif } /** * Update the intermediate interval info for register 'index' and * instruction 'ic'. */ static void update_interval(GLint intBegin[], GLint intEnd[], GLuint index, GLuint ic) { ASSERT(index < MAX_PROGRAM_TEMPS); if (intBegin[index] == -1) { ASSERT(intEnd[index] == -1); intBegin[index] = intEnd[index] = ic; } else { intEnd[index] = ic; } } /** * Find first/last instruction that references each temporary register. */ GLboolean _mesa_find_temp_intervals(const struct prog_instruction *instructions, GLuint numInstructions, GLint intBegin[MAX_PROGRAM_TEMPS], GLint intEnd[MAX_PROGRAM_TEMPS]) { struct loop_info { GLuint Start, End; /**< Start, end instructions of loop */ }; struct loop_info loopStack[MAX_LOOP_NESTING]; GLuint loopStackDepth = 0; GLuint i; for (i = 0; i < MAX_PROGRAM_TEMPS; i++){ intBegin[i] = intEnd[i] = -1; } /* Scan instructions looking for temporary registers */ for (i = 0; i < numInstructions; i++) { const struct prog_instruction *inst = instructions + i; if (inst->Opcode == OPCODE_BGNLOOP) { loopStack[loopStackDepth].Start = i; loopStack[loopStackDepth].End = inst->BranchTarget; loopStackDepth++; } else if (inst->Opcode == OPCODE_ENDLOOP) { loopStackDepth--; } else if (inst->Opcode == OPCODE_CAL) { return GL_FALSE; } else { const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/ GLuint j; for (j = 0; j < numSrc; j++) { if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { const GLuint index = inst->SrcReg[j].Index; if (inst->SrcReg[j].RelAddr) return GL_FALSE; update_interval(intBegin, intEnd, index, i); if (loopStackDepth > 0) { /* extend temp register's interval to end of loop */ GLuint loopEnd = loopStack[loopStackDepth - 1].End; update_interval(intBegin, intEnd, index, loopEnd); } } } if (inst->DstReg.File == PROGRAM_TEMPORARY) { const GLuint index = inst->DstReg.Index; if (inst->DstReg.RelAddr) return GL_FALSE; update_interval(intBegin, intEnd, index, i); if (loopStackDepth > 0) { /* extend temp register's interval to end of loop */ GLuint loopEnd = loopStack[loopStackDepth - 1].End; update_interval(intBegin, intEnd, index, loopEnd); } } } } return GL_TRUE; } /** * Find the live intervals for each temporary register in the program. * For register R, the interval [A,B] indicates that R is referenced * from instruction A through instruction B. * Special consideration is needed for loops and subroutines. * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason */ static GLboolean find_live_intervals(struct gl_program *prog, struct interval_list *liveIntervals) { GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS]; GLuint i; /* * Note: we'll return GL_FALSE below if we find relative indexing * into the TEMP register file. We can't handle that yet. * We also give up on subroutines for now. */ if (dbg) { _mesa_printf("Optimize: Begin find intervals\n"); } /* build intermediate arrays */ if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions, intBegin, intEnd)) return GL_FALSE; /* Build live intervals list from intermediate arrays */ liveIntervals->Num = 0; for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { if (intBegin[i] >= 0) { struct interval inv; inv.Reg = i; inv.Start = intBegin[i]; inv.End = intEnd[i]; append_interval(liveIntervals, &inv); } } /* Sort the list according to interval starts */ sort_interval_list_by_start(liveIntervals); if (dbg) { /* print interval info */ for (i = 0; i < liveIntervals->Num; i++) { const struct interval *inv = liveIntervals->Intervals + i; _mesa_printf("Reg[%d] live [%d, %d]:", inv->Reg, inv->Start, inv->End); if (1) { int j; for (j = 0; j < inv->Start; j++) _mesa_printf(" "); for (j = inv->Start; j <= inv->End; j++) _mesa_printf("x"); } _mesa_printf("\n"); } } return GL_TRUE; } /** Scan the array of used register flags to find free entry */ static GLint alloc_register(GLboolean usedRegs[MAX_PROGRAM_TEMPS]) { GLuint k; for (k = 0; k < MAX_PROGRAM_TEMPS; k++) { if (!usedRegs[k]) { usedRegs[k] = GL_TRUE; return k; } } return -1; } /** * This function implements "Linear Scan Register Allocation" to reduce * the number of temporary registers used by the program. * * We compute the "live interval" for all temporary registers then * examine the overlap of the intervals to allocate new registers. * Basically, if two intervals do not overlap, they can use the same register. */ static void _mesa_reallocate_registers(struct gl_program *prog) { struct interval_list liveIntervals; GLint registerMap[MAX_PROGRAM_TEMPS]; GLboolean usedRegs[MAX_PROGRAM_TEMPS]; GLuint i; GLint maxTemp = -1; if (dbg) { _mesa_printf("Optimize: Begin live-interval register reallocation\n"); _mesa_print_program(prog); } for (i = 0; i < MAX_PROGRAM_TEMPS; i++){ registerMap[i] = -1; usedRegs[i] = GL_FALSE; } if (!find_live_intervals(prog, &liveIntervals)) { if (dbg) _mesa_printf("Aborting register reallocation\n"); return; } { struct interval_list activeIntervals; activeIntervals.Num = 0; /* loop over live intervals, allocating a new register for each */ for (i = 0; i < liveIntervals.Num; i++) { const struct interval *live = liveIntervals.Intervals + i; if (dbg) _mesa_printf("Consider register %u\n", live->Reg); /* Expire old intervals. Intervals which have ended with respect * to the live interval can have their remapped registers freed. */ { GLint j; for (j = 0; j < activeIntervals.Num; j++) { const struct interval *inv = activeIntervals.Intervals + j; if (inv->End >= live->Start) { /* Stop now. Since the activeInterval list is sorted * we know we don't have to go further. */ break; } else { /* Interval 'inv' has expired */ const GLint regNew = registerMap[inv->Reg]; ASSERT(regNew >= 0); if (dbg) _mesa_printf(" expire interval for reg %u\n", inv->Reg); /* remove interval j from active list */ remove_interval(&activeIntervals, inv); j--; /* counter-act j++ in for-loop above */ /* return register regNew to the free pool */ if (dbg) _mesa_printf(" free reg %d\n", regNew); ASSERT(usedRegs[regNew] == GL_TRUE); usedRegs[regNew] = GL_FALSE; } } } /* find a free register for this live interval */ { const GLint k = alloc_register(usedRegs); if (k < 0) { /* out of registers, give up */ return; } registerMap[live->Reg] = k; maxTemp = MAX2(maxTemp, k); if (dbg) _mesa_printf(" remap register %u -> %d\n", live->Reg, k); } /* Insert this live interval into the active list which is sorted * by increasing end points. */ insert_interval_by_end(&activeIntervals, live); } } if (maxTemp + 1 < liveIntervals.Num) { /* OK, we've reduced the number of registers needed. * Scan the program and replace all the old temporary register * indexes with the new indexes. */ replace_regs(prog, PROGRAM_TEMPORARY, registerMap); prog->NumTemporaries = maxTemp + 1; } if (dbg) { _mesa_printf("Optimize: End live-interval register reallocation\n"); _mesa_printf("Num temp regs before: %u after: %u\n", liveIntervals.Num, maxTemp + 1); _mesa_print_program(prog); } } /** * Apply optimizations to the given program to eliminate unnecessary * instructions, temp regs, etc. */ void _mesa_optimize_program(GLcontext *ctx, struct gl_program *program) { if (1) _mesa_remove_dead_code(program); if (0) /* not tested much yet */ _mesa_remove_extra_moves(program); if (0) _mesa_consolidate_registers(program); else _mesa_reallocate_registers(program); }