diff options
Diffstat (limited to 'src/gallium/drivers/nvc0/nvc0_program.c')
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_program.c | 61 |
1 files changed, 42 insertions, 19 deletions
diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index 50a853abed9..f228d07bf6b 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c | |||
@@ -152,7 +152,7 @@ nvc0_sp_assign_input_slots(struct nv50_ir_prog_info *info) | |||
152 | static int | 152 | static int |
153 | nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info) | 153 | nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info) |
154 | { | 154 | { |
155 | unsigned last = info->prop.fp.numColourResults * 4; | 155 | unsigned count = info->prop.fp.numColourResults * 4; |
156 | unsigned i, c; | 156 | unsigned i, c; |
157 | 157 | ||
158 | for (i = 0; i < info->numOutputs; ++i) | 158 | for (i = 0; i < info->numOutputs; ++i) |
@@ -161,10 +161,13 @@ nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info) | |||
161 | info->out[i].slot[c] = info->out[i].si * 4 + c; | 161 | info->out[i].slot[c] = info->out[i].si * 4 + c; |
162 | 162 | ||
163 | if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) | 163 | if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) |
164 | info->out[info->io.sampleMask].slot[0] = last++; | 164 | info->out[info->io.sampleMask].slot[0] = count++; |
165 | else | ||
166 | if (info->target >= 0xe0) | ||
167 | count++; /* on Kepler, depth is always last colour reg + 2 */ | ||
165 | 168 | ||
166 | if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS) | 169 | if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS) |
167 | info->out[info->io.fragDepth].slot[2] = last; | 170 | info->out[info->io.fragDepth].slot[2] = count; |
168 | 171 | ||
169 | return 0; | 172 | return 0; |
170 | } | 173 | } |
@@ -278,7 +281,7 @@ nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info) | |||
278 | vp->vp.clip_mode |= 1 << (i * 4); | 281 | vp->vp.clip_mode |= 1 << (i * 4); |
279 | 282 | ||
280 | if (info->io.genUserClip < 0) | 283 | if (info->io.genUserClip < 0) |
281 | vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES; /* prevent rebuilding */ | 284 | vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES + 1; /* prevent rebuilding */ |
282 | 285 | ||
283 | return 0; | 286 | return 0; |
284 | } | 287 | } |
@@ -434,6 +437,7 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info) | |||
434 | { | 437 | { |
435 | unsigned i, c, a, m; | 438 | unsigned i, c, a, m; |
436 | 439 | ||
440 | /* just 00062 on Kepler */ | ||
437 | fp->hdr[0] = 0x20062 | (5 << 10); | 441 | fp->hdr[0] = 0x20062 | (5 << 10); |
438 | fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */ | 442 | fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */ |
439 | 443 | ||
@@ -538,7 +542,7 @@ nvc0_program_dump(struct nvc0_program *prog) | |||
538 | #endif | 542 | #endif |
539 | 543 | ||
540 | boolean | 544 | boolean |
541 | nvc0_program_translate(struct nvc0_program *prog) | 545 | nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset) |
542 | { | 546 | { |
543 | struct nv50_ir_prog_info *info; | 547 | struct nv50_ir_prog_info *info; |
544 | int ret; | 548 | int ret; |
@@ -548,11 +552,13 @@ nvc0_program_translate(struct nvc0_program *prog) | |||
548 | return FALSE; | 552 | return FALSE; |
549 | 553 | ||
550 | info->type = prog->type; | 554 | info->type = prog->type; |
551 | info->target = 0xc0; | 555 | info->target = chipset; |
552 | info->bin.sourceRep = NV50_PROGRAM_IR_TGSI; | 556 | info->bin.sourceRep = NV50_PROGRAM_IR_TGSI; |
553 | info->bin.source = (void *)prog->pipe.tokens; | 557 | info->bin.source = (void *)prog->pipe.tokens; |
554 | 558 | ||
555 | info->io.genUserClip = prog->vp.num_ucps; | 559 | info->io.genUserClip = prog->vp.num_ucps; |
560 | info->io.ucpBase = 256; | ||
561 | info->io.ucpBinding = 15; | ||
556 | 562 | ||
557 | info->assignSlots = nvc0_program_assign_varying_slots; | 563 | info->assignSlots = nvc0_program_assign_varying_slots; |
558 | 564 | ||
@@ -655,7 +661,13 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) | |||
655 | size = align(size, 0x40); | 661 | size = align(size, 0x40); |
656 | size += prog->immd_size + 0xc0; /* add 0xc0 for align 0x40 -> 0x100 */ | 662 | size += prog->immd_size + 0xc0; /* add 0xc0 for align 0x40 -> 0x100 */ |
657 | } | 663 | } |
658 | size = align(size, 0x40); /* required by SP_START_ID */ | 664 | /* On Fermi, SP_START_ID must be aligned to 0x40. |
665 | * On Kepler, the first instruction must be aligned to 0x80 because | ||
666 | * latency information is expected only at certain positions. | ||
667 | */ | ||
668 | if (screen->base.class_3d >= NVE4_3D_CLASS) | ||
669 | size = size + 0x70; | ||
670 | size = align(size, 0x40); | ||
659 | 671 | ||
660 | ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem); | 672 | ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem); |
661 | if (ret) { | 673 | if (ret) { |
@@ -667,6 +679,17 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) | |||
667 | assert((prog->immd_size == 0) || (prog->immd_base + prog->immd_size <= | 679 | assert((prog->immd_size == 0) || (prog->immd_base + prog->immd_size <= |
668 | prog->mem->start + prog->mem->size)); | 680 | prog->mem->start + prog->mem->size)); |
669 | 681 | ||
682 | if (screen->base.class_3d >= NVE4_3D_CLASS) { | ||
683 | switch (prog->mem->start & 0xff) { | ||
684 | case 0x40: prog->code_base += 0x70; break; | ||
685 | case 0x80: prog->code_base += 0x30; break; | ||
686 | case 0xc0: prog->code_base += 0x70; break; | ||
687 | default: | ||
688 | prog->code_base += 0x30; | ||
689 | assert((prog->mem->start & 0xff) == 0x00); | ||
690 | break; | ||
691 | } | ||
692 | } | ||
670 | code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE; | 693 | code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE; |
671 | 694 | ||
672 | if (prog->relocs) | 695 | if (prog->relocs) |
@@ -677,18 +700,18 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) | |||
677 | nvc0_program_dump(prog); | 700 | nvc0_program_dump(prog); |
678 | #endif | 701 | #endif |
679 | 702 | ||
680 | nvc0_m2mf_push_linear(&nvc0->base, screen->text, prog->code_base, | 703 | nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base, |
681 | NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr); | 704 | NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr); |
682 | nvc0_m2mf_push_linear(&nvc0->base, screen->text, | 705 | nvc0->base.push_data(&nvc0->base, screen->text, |
683 | prog->code_base + NVC0_SHADER_HEADER_SIZE, | 706 | prog->code_base + NVC0_SHADER_HEADER_SIZE, |
684 | NOUVEAU_BO_VRAM, prog->code_size, prog->code); | 707 | NOUVEAU_BO_VRAM, prog->code_size, prog->code); |
685 | if (prog->immd_size) | 708 | if (prog->immd_size) |
686 | nvc0_m2mf_push_linear(&nvc0->base, | 709 | nvc0->base.push_data(&nvc0->base, |
687 | screen->text, prog->immd_base, NOUVEAU_BO_VRAM, | 710 | screen->text, prog->immd_base, NOUVEAU_BO_VRAM, |
688 | prog->immd_size, prog->immd_data); | 711 | prog->immd_size, prog->immd_data); |
689 | 712 | ||
690 | BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(MEM_BARRIER), 1); | 713 | BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(MEM_BARRIER), 1); |
691 | PUSH_DATA (nvc0->base.pushbuf, 0x1111); | 714 | PUSH_DATA (nvc0->base.pushbuf, 0x1011); |
692 | 715 | ||
693 | return TRUE; | 716 | return TRUE; |
694 | } | 717 | } |
@@ -714,9 +737,9 @@ nvc0_program_library_upload(struct nvc0_context *nvc0) | |||
714 | if (ret) | 737 | if (ret) |
715 | return; | 738 | return; |
716 | 739 | ||
717 | nvc0_m2mf_push_linear(&nvc0->base, | 740 | nvc0->base.push_data(&nvc0->base, |
718 | screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM, | 741 | screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM, |
719 | size, code); | 742 | size, code); |
720 | /* no need for a memory barrier, will be emitted with first program */ | 743 | /* no need for a memory barrier, will be emitted with first program */ |
721 | } | 744 | } |
722 | 745 | ||