summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/nvc0/nvc0_program.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/nvc0/nvc0_program.c')
-rw-r--r--src/gallium/drivers/nvc0/nvc0_program.c61
1 files changed, 42 insertions, 19 deletions
diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c
index 50a853abed9..f228d07bf6b 100644
--- a/src/gallium/drivers/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nvc0/nvc0_program.c
@@ -152,7 +152,7 @@ nvc0_sp_assign_input_slots(struct nv50_ir_prog_info *info)
152static int 152static int
153nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info) 153nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info)
154{ 154{
155 unsigned last = info->prop.fp.numColourResults * 4; 155 unsigned count = info->prop.fp.numColourResults * 4;
156 unsigned i, c; 156 unsigned i, c;
157 157
158 for (i = 0; i < info->numOutputs; ++i) 158 for (i = 0; i < info->numOutputs; ++i)
@@ -161,10 +161,13 @@ nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info)
161 info->out[i].slot[c] = info->out[i].si * 4 + c; 161 info->out[i].slot[c] = info->out[i].si * 4 + c;
162 162
163 if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) 163 if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
164 info->out[info->io.sampleMask].slot[0] = last++; 164 info->out[info->io.sampleMask].slot[0] = count++;
165 else
166 if (info->target >= 0xe0)
167 count++; /* on Kepler, depth is always last colour reg + 2 */
165 168
166 if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS) 169 if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS)
167 info->out[info->io.fragDepth].slot[2] = last; 170 info->out[info->io.fragDepth].slot[2] = count;
168 171
169 return 0; 172 return 0;
170} 173}
@@ -278,7 +281,7 @@ nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
278 vp->vp.clip_mode |= 1 << (i * 4); 281 vp->vp.clip_mode |= 1 << (i * 4);
279 282
280 if (info->io.genUserClip < 0) 283 if (info->io.genUserClip < 0)
281 vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES; /* prevent rebuilding */ 284 vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES + 1; /* prevent rebuilding */
282 285
283 return 0; 286 return 0;
284} 287}
@@ -434,6 +437,7 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
434{ 437{
435 unsigned i, c, a, m; 438 unsigned i, c, a, m;
436 439
440 /* just 00062 on Kepler */
437 fp->hdr[0] = 0x20062 | (5 << 10); 441 fp->hdr[0] = 0x20062 | (5 << 10);
438 fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */ 442 fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */
439 443
@@ -538,7 +542,7 @@ nvc0_program_dump(struct nvc0_program *prog)
538#endif 542#endif
539 543
540boolean 544boolean
541nvc0_program_translate(struct nvc0_program *prog) 545nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
542{ 546{
543 struct nv50_ir_prog_info *info; 547 struct nv50_ir_prog_info *info;
544 int ret; 548 int ret;
@@ -548,11 +552,13 @@ nvc0_program_translate(struct nvc0_program *prog)
548 return FALSE; 552 return FALSE;
549 553
550 info->type = prog->type; 554 info->type = prog->type;
551 info->target = 0xc0; 555 info->target = chipset;
552 info->bin.sourceRep = NV50_PROGRAM_IR_TGSI; 556 info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
553 info->bin.source = (void *)prog->pipe.tokens; 557 info->bin.source = (void *)prog->pipe.tokens;
554 558
555 info->io.genUserClip = prog->vp.num_ucps; 559 info->io.genUserClip = prog->vp.num_ucps;
560 info->io.ucpBase = 256;
561 info->io.ucpBinding = 15;
556 562
557 info->assignSlots = nvc0_program_assign_varying_slots; 563 info->assignSlots = nvc0_program_assign_varying_slots;
558 564
@@ -655,7 +661,13 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
655 size = align(size, 0x40); 661 size = align(size, 0x40);
656 size += prog->immd_size + 0xc0; /* add 0xc0 for align 0x40 -> 0x100 */ 662 size += prog->immd_size + 0xc0; /* add 0xc0 for align 0x40 -> 0x100 */
657 } 663 }
658 size = align(size, 0x40); /* required by SP_START_ID */ 664 /* On Fermi, SP_START_ID must be aligned to 0x40.
665 * On Kepler, the first instruction must be aligned to 0x80 because
666 * latency information is expected only at certain positions.
667 */
668 if (screen->base.class_3d >= NVE4_3D_CLASS)
669 size = size + 0x70;
670 size = align(size, 0x40);
659 671
660 ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem); 672 ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem);
661 if (ret) { 673 if (ret) {
@@ -667,6 +679,17 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
667 assert((prog->immd_size == 0) || (prog->immd_base + prog->immd_size <= 679 assert((prog->immd_size == 0) || (prog->immd_base + prog->immd_size <=
668 prog->mem->start + prog->mem->size)); 680 prog->mem->start + prog->mem->size));
669 681
682 if (screen->base.class_3d >= NVE4_3D_CLASS) {
683 switch (prog->mem->start & 0xff) {
684 case 0x40: prog->code_base += 0x70; break;
685 case 0x80: prog->code_base += 0x30; break;
686 case 0xc0: prog->code_base += 0x70; break;
687 default:
688 prog->code_base += 0x30;
689 assert((prog->mem->start & 0xff) == 0x00);
690 break;
691 }
692 }
670 code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE; 693 code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE;
671 694
672 if (prog->relocs) 695 if (prog->relocs)
@@ -677,18 +700,18 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
677 nvc0_program_dump(prog); 700 nvc0_program_dump(prog);
678#endif 701#endif
679 702
680 nvc0_m2mf_push_linear(&nvc0->base, screen->text, prog->code_base, 703 nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base,
681 NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr); 704 NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr);
682 nvc0_m2mf_push_linear(&nvc0->base, screen->text, 705 nvc0->base.push_data(&nvc0->base, screen->text,
683 prog->code_base + NVC0_SHADER_HEADER_SIZE, 706 prog->code_base + NVC0_SHADER_HEADER_SIZE,
684 NOUVEAU_BO_VRAM, prog->code_size, prog->code); 707 NOUVEAU_BO_VRAM, prog->code_size, prog->code);
685 if (prog->immd_size) 708 if (prog->immd_size)
686 nvc0_m2mf_push_linear(&nvc0->base, 709 nvc0->base.push_data(&nvc0->base,
687 screen->text, prog->immd_base, NOUVEAU_BO_VRAM, 710 screen->text, prog->immd_base, NOUVEAU_BO_VRAM,
688 prog->immd_size, prog->immd_data); 711 prog->immd_size, prog->immd_data);
689 712
690 BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(MEM_BARRIER), 1); 713 BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(MEM_BARRIER), 1);
691 PUSH_DATA (nvc0->base.pushbuf, 0x1111); 714 PUSH_DATA (nvc0->base.pushbuf, 0x1011);
692 715
693 return TRUE; 716 return TRUE;
694} 717}
@@ -714,9 +737,9 @@ nvc0_program_library_upload(struct nvc0_context *nvc0)
714 if (ret) 737 if (ret)
715 return; 738 return;
716 739
717 nvc0_m2mf_push_linear(&nvc0->base, 740 nvc0->base.push_data(&nvc0->base,
718 screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM, 741 screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM,
719 size, code); 742 size, code);
720 /* no need for a memory barrier, will be emitted with first program */ 743 /* no need for a memory barrier, will be emitted with first program */
721} 744}
722 745