diff options
author | Christoph Bumiller <e0425955@student.tuwien.ac.at> | 2010-07-03 14:27:36 +0200 |
---|---|---|
committer | Christoph Bumiller <e0425955@student.tuwien.ac.at> | 2010-07-03 14:27:36 +0200 |
commit | 8573280871401e29f27fa24d0b7c6ac5e35b2cc1 (patch) | |
tree | ec1b7260de8ab21178405c050bc0efa9f83b5ee7 /src | |
parent | db98ad23633958e52f84501c5a2061d42d346b64 (diff) |
nvc0: initial implementation
EXA should work, Xv not yet done.
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile.am | 2 | ||||
-rw-r--r-- | src/drmmode_display.c | 24 | ||||
-rw-r--r-- | src/nouveau_class.h | 77 | ||||
-rw-r--r-- | src/nouveau_exa.c | 67 | ||||
-rw-r--r-- | src/nouveau_wfb.c | 3 | ||||
-rw-r--r-- | src/nv_accel_common.c | 26 | ||||
-rw-r--r-- | src/nv_dma.c | 17 | ||||
-rw-r--r-- | src/nv_driver.c | 24 | ||||
-rw-r--r-- | src/nv_proto.h | 25 | ||||
-rw-r--r-- | src/nv_type.h | 5 | ||||
-rw-r--r-- | src/nvc0_accel.c | 758 | ||||
-rw-r--r-- | src/nvc0_accel.h | 83 | ||||
-rw-r--r-- | src/nvc0_exa.c | 1239 |
13 files changed, 2288 insertions, 62 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index 9340c45..8f1f704 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -58,6 +58,8 @@ nouveau_drv_la_SOURCES = \ nv50_exa.c \ nv50_xv.c \ nv50_texture.h \ + nvc0_accel.c nvc0_accel.h \ + nvc0_exa.c \ drmmode_display.c \ vl_hwmc.c \ vl_hwmc.h diff --git a/src/drmmode_display.c b/src/drmmode_display.c index 9b5d52d..fb0fa50 100644 --- a/src/drmmode_display.c +++ b/src/drmmode_display.c @@ -420,8 +420,13 @@ drmmode_crtc_shadow_allocate(xf86CrtcPtr crtc, int width, int height) if (pNv->Architecture >= NV_ARCH_50) { tile_mode = 4; - tile_flags = (drmmode->cpp == 2) ? 0x7000 : 0x7a00; - ah = NOUVEAU_ALIGN(height, 1 << (tile_mode + 2)); + if (pNv->Architecture == NV_ARCH_C0) { + tile_flags = 0xfe0; + ah = NOUVEAU_ALIGN(height, 1 << (tile_mode + 3)); + } else { + tile_flags = (drmmode->cpp == 2) ? 0x7000 : 0x7a00; + ah = NOUVEAU_ALIGN(height, 1 << (tile_mode + 2)); + } pitch = NOUVEAU_ALIGN(width * drmmode->cpp, 64); } else { pitch = nv_pitch_align(pNv, width, crtc->scrn->depth); @@ -429,7 +434,8 @@ drmmode_crtc_shadow_allocate(xf86CrtcPtr crtc, int width, int height) } drmmode_crtc->rotate_pitch = pitch; - ret = nouveau_bo_new_tile(pNv->dev, NOUVEAU_BO_VRAM | NOUVEAU_BO_MAP, 0, + ret = nouveau_bo_new_tile(pNv->dev, NOUVEAU_BO_VRAM | NOUVEAU_BO_MAP, + 1 << 17, drmmode_crtc->rotate_pitch * ah, tile_mode, tile_flags, &drmmode_crtc->rotate_bo); if (ret) { @@ -1018,8 +1024,14 @@ drmmode_xf86crtc_resize(ScrnInfoPtr scrn, int width, int height) if (pNv->Architecture >= NV_ARCH_50 && pNv->wfb_enabled) { tile_mode = 4; - tile_flags = (scrn->bitsPerPixel == 16) ? 0x7000 : 0x7a00; - ah = NOUVEAU_ALIGN(height, 1 << (tile_mode + 2)); + if (pNv->Architecture == NV_ARCH_C0) { + tile_flags = 0xfe0; + ah = NOUVEAU_ALIGN(height, 1 << (tile_mode + 3)); + } else { + tile_flags = + (scrn->bitsPerPixel == 16) ? 0x7000 : 0x7a00; + ah = NOUVEAU_ALIGN(height, 1 << (tile_mode + 2)); + } pitch = NOUVEAU_ALIGN(width * (scrn->bitsPerPixel >> 3), 64); } else { pitch = nv_pitch_align(pNv, width, scrn->depth); @@ -1038,7 +1050,7 @@ drmmode_xf86crtc_resize(ScrnInfoPtr scrn, int width, int height) scrn->displayWidth = pitch / (scrn->bitsPerPixel >> 3); ret = nouveau_bo_new_tile(pNv->dev, NOUVEAU_BO_VRAM | NOUVEAU_BO_MAP, - 0, pitch * ah, tile_mode, tile_flags, + 1 << 17, pitch * ah, tile_mode, tile_flags, &pNv->scanout); if (ret) goto fail; diff --git a/src/nouveau_class.h b/src/nouveau_class.h index c03e633..688158a 100644 --- a/src/nouveau_class.h +++ b/src/nouveau_class.h @@ -735,43 +735,43 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT_HIGH 0x0000023c -#define NVC0_MEMORY_TO_MEMORY_FORMAT 0x00009039 - -#define NVC0_MEMORY_TO_MEMORY_FORMAT_NOP 0x00000100 -#define NVC0_MEMORY_TO_MEMORY_FORMAT_SERIALIZE 0x00000110 -#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_MODE_IN 0x00000204 -#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_PITCH_IN 0x00000208 -#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_HEIGHT_IN 0x0000020c -#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_DEPTH_IN 0x00000210 -#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN_Z 0x00000214 -#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_MODE_OUT 0x00000220 -#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_PITCH_OUT 0x00000224 -#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_HEIGHT_OUT 0x00000228 -#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_DEPTH_OUT 0x0000022c -#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT_Z 0x00000230 -#define NVC0_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT_HIGH 0x00000238 -#define NVC0_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT_LOW 0x0000023c -#define NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC 0x00000300 -#define NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC_PUSH (1 << 0) -#define NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC_LINEAR_IN (1 << 4) -#define NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC_LINEAR_OUT (1 << 8) -#define NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC_NOTIFY (1 << 13) -#define NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC_INC_SHIFT 20 -#define NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC_INC_MASK 0x00f00000 -#define NVC0_MEMORY_TO_MEMORY_FORMAT_DATA 0x00000304 -#define NVC0_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_HIGH 0x0000030c -#define NVC0_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_LOW 0x00000310 -#define NVC0_MEMORY_TO_MEMORY_FORMAT_PITCH_IN 0x00000314 -#define NVC0_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT 0x00000318 -#define NVC0_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN 0x0000031c -#define NVC0_MEMORY_TO_MEMORY_FORMAT_LINE_COUNT 0x00000320 -#define NVC0_MEMORY_TO_MEMORY_FORMAT_NOTIFY_ADDRESS_HIGH 0x0000032c -#define NVC0_MEMORY_TO_MEMORY_FORMAT_NOTIFY_ADDRESS_LOW 0x00000330 -#define NVC0_MEMORY_TO_MEMORY_FORMAT_NOTIFY 0x00000334 -#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN_X 0x00000344 -#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN_Y 0x00000348 -#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT_X 0x0000034c -#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT_Y 0x00000350 +#define NVC0_M2MF 0x00009039 + +#define NVC0_M2MF_NOP 0x00000100 +#define NVC0_M2MF_SERIALIZE 0x00000110 +#define NVC0_M2MF_TILING_MODE_IN 0x00000204 +#define NVC0_M2MF_TILING_PITCH_IN 0x00000208 +#define NVC0_M2MF_TILING_HEIGHT_IN 0x0000020c +#define NVC0_M2MF_TILING_DEPTH_IN 0x00000210 +#define NVC0_M2MF_TILING_POSITION_IN_Z 0x00000214 +#define NVC0_M2MF_TILING_MODE_OUT 0x00000220 +#define NVC0_M2MF_TILING_PITCH_OUT 0x00000224 +#define NVC0_M2MF_TILING_HEIGHT_OUT 0x00000228 +#define NVC0_M2MF_TILING_DEPTH_OUT 0x0000022c +#define NVC0_M2MF_TILING_POSITION_OUT_Z 0x00000230 +#define NVC0_M2MF_OFFSET_OUT_HIGH 0x00000238 +#define NVC0_M2MF_OFFSET_OUT_LOW 0x0000023c +#define NVC0_M2MF_EXEC 0x00000300 +#define NVC0_M2MF_EXEC_PUSH (1 << 0) +#define NVC0_M2MF_EXEC_LINEAR_IN (1 << 4) +#define NVC0_M2MF_EXEC_LINEAR_OUT (1 << 8) +#define NVC0_M2MF_EXEC_NOTIFY (1 << 13) +#define NVC0_M2MF_EXEC_INC_SHIFT 20 +#define NVC0_M2MF_EXEC_INC_MASK 0x00f00000 +#define NVC0_M2MF_DATA 0x00000304 +#define NVC0_M2MF_OFFSET_IN_HIGH 0x0000030c +#define NVC0_M2MF_OFFSET_IN_LOW 0x00000310 +#define NVC0_M2MF_PITCH_IN 0x00000314 +#define NVC0_M2MF_PITCH_OUT 0x00000318 +#define NVC0_M2MF_LINE_LENGTH_IN 0x0000031c +#define NVC0_M2MF_LINE_COUNT 0x00000320 +#define NVC0_M2MF_NOTIFY_ADDRESS_HIGH 0x0000032c +#define NVC0_M2MF_NOTIFY_ADDRESS_LOW 0x00000330 +#define NVC0_M2MF_NOTIFY 0x00000334 +#define NVC0_M2MF_TILING_POSITION_IN_X 0x00000344 +#define NVC0_M2MF_TILING_POSITION_IN_Y 0x00000348 +#define NVC0_M2MF_TILING_POSITION_OUT_X 0x0000034c +#define NVC0_M2MF_TILING_POSITION_OUT_Y 0x00000350 #define NV01_MEMORY_LOCAL_BANKED 0x0000003d @@ -9120,6 +9120,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0TCL_NOTIFY 0x0000010c #define NVC0TCL_SERIALIZE 0x00000110 #define NVC0TCL_EARLY_FRAGMENT_TESTS 0x00000210 +#define NVC0TCL_CODE_FLUSH 0x0000021c #define NVC0TCL_TESS_MODE 0x00000320 #define NVC0TCL_TESS_MODE_PRIM_SHIFT 0 #define NVC0TCL_TESS_MODE_PRIM_MASK 0x0000000f @@ -9554,6 +9555,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC1_COLOR 0x0000c901 #define NVC0TCL_BLEND_FUNC_DST_ALPHA_SRC1_ALPHA 0x0000c902 #define NVC0TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC1_ALPHA 0x0000c903 +#define NVC0TCL_BLEND_ENABLE(x) (0x00001360+((x)*4)) +#define NVC0TCL_BLEND_ENABLE__SIZE 0x00000008 #define NVC0TCL_STENCIL_ENABLE 0x00001380 #define NVC0TCL_STENCIL_FRONT_OP_FAIL 0x00001384 #define NVC0TCL_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000 diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c index 1b8888e..d07d94a 100644 --- a/src/nouveau_exa.c +++ b/src/nouveau_exa.c @@ -343,12 +343,26 @@ nouveau_exa_create_pixmap(ScreenPtr pScreen, int width, int height, int depth, else if (height > 4) tile_mode = 1; else tile_mode = 0; - if (usage_hint & NOUVEAU_CREATE_PIXMAP_ZETA) - tile_flags = 0x2800; - else - tile_flags = 0x7000; + if (pNv->Architecture >= NV_ARCH_C0) { + if (tile_mode && height <= 64) + tile_mode -= 1; - height = NOUVEAU_ALIGN(height, 1 << (tile_mode + 2)); + if (usage_hint & NOUVEAU_CREATE_PIXMAP_ZETA) + tile_flags = 0xdb0; + else + tile_flags = 0xfe0; + + height = NOUVEAU_ALIGN( + height, 1 << (tile_mode + 3)); + } else { + if (usage_hint & NOUVEAU_CREATE_PIXMAP_ZETA) + tile_flags = 0x2800; + else + tile_flags = 0x7000; + + height = NOUVEAU_ALIGN( + height, 1 << (tile_mode + 2)); + } } else { if (usage_hint & NOUVEAU_CREATE_PIXMAP_TILED) { int pitch_align = @@ -366,6 +380,10 @@ nouveau_exa_create_pixmap(ScreenPtr pScreen, int width, int height, int depth, *new_pitch = NOUVEAU_ALIGN(*new_pitch, 64); size = *new_pitch * height; + xf86DrvMsg(xf86Screens[pScreen->myNum]->scrnIndex, X_INFO, + "exaCreatePixmap: tile_mode=%x tile_flags=%x size=%x\n", + tile_mode, tile_flags, size); + ret = nouveau_bo_new_tile(pNv->dev, flags, 0, size, tile_mode, tile_flags, &nvpix->bo); if (ret) { @@ -394,7 +412,7 @@ nv50_style_tiled_pixmap(PixmapPtr ppix) ScrnInfoPtr pScrn = xf86Screens[ppix->drawable.pScreen->myNum]; NVPtr pNv = NVPTR(pScrn); - return pNv->Architecture == NV_ARCH_50 && + return pNv->Architecture >= NV_ARCH_50 && nouveau_pixmap_bo(ppix)->tile_flags; } @@ -414,6 +432,10 @@ nouveau_exa_download_from_screen(PixmapPtr pspix, int x, int y, int w, int h, offset = (y * src_pitch) + (x * cpp); if (pNv->GART) { + if ((pNv->Architecture >= NV_ARCH_C0) && + NVC0AccelDownloadM2MF(pspix, x, y, w, h, dst, dst_pitch)) + return TRUE; + else if (NVAccelDownloadM2MF(pspix, x, y, w, h, dst, dst_pitch)) return TRUE; } @@ -450,17 +472,29 @@ nouveau_exa_upload_to_screen(PixmapPtr pdpix, int x, int y, int w, int h, exaMarkSync(pdpix->drawable.pScreen); return TRUE; } - } else { + } else + if (pNv->Architecture < NV_ARCH_C0) { if (NV50EXAUploadSIFC(src, src_pitch, pdpix, x, y, w, h, cpp)) { exaMarkSync(pdpix->drawable.pScreen); return TRUE; } + } else { + if (NVC0EXAUploadSIFC(src, src_pitch, pdpix, + x, y, w, h, cpp)) { + exaMarkSync(pdpix->drawable.pScreen); + return TRUE; + } } } /* try gart-based transfer */ if (pNv->GART) { + if ((pNv->Architecture >= NV_ARCH_C0) && + NVC0AccelUploadM2MF(pdpix, x, y, w, h, src, src_pitch)) { + exaMarkSync(pdpix->drawable.pScreen); + return TRUE; + } else if (NVAccelUploadM2MF(pdpix, x, y, w, h, src, src_pitch)) { exaMarkSync(pdpix->drawable.pScreen); return TRUE; @@ -501,6 +535,8 @@ nouveau_exa_init(ScreenPtr pScreen) return FALSE; } + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "nouveau EXA init\n"); + exa->exa_major = EXA_VERSION_MAJOR; exa->exa_minor = EXA_VERSION_MINOR; exa->flags = EXA_OFFSCREEN_PIXMAPS; @@ -582,12 +618,29 @@ nouveau_exa_init(ScreenPtr pScreen) exa->Composite = NV50EXAComposite; exa->DoneComposite = NV50EXADoneComposite; break; + case NV_ARCH_C0: + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "EXA func pointers for NVC0\n"); + exa->PrepareCopy = NVC0EXAPrepareCopy; + exa->Copy = NVC0EXACopy; + exa->DoneCopy = NVC0EXADoneCopy; + + exa->PrepareSolid = NVC0EXAPrepareSolid; + exa->Solid = NVC0EXASolid; + exa->DoneSolid = NVC0EXADoneSolid; + + exa->CheckComposite = NVC0EXACheckComposite; + exa->PrepareComposite = NVC0EXAPrepareComposite; + exa->Composite = NVC0EXAComposite; + exa->DoneComposite = NVC0EXADoneComposite; + break; default: break; } if (!exaDriverInit(pScreen, exa)) return FALSE; + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "exaDriverInit successful\n"); pNv->EXADriverPtr = exa; return TRUE; diff --git a/src/nouveau_wfb.c b/src/nouveau_wfb.c index 7575672..1f62a50 100644 --- a/src/nouveau_wfb.c +++ b/src/nouveau_wfb.c @@ -180,6 +180,9 @@ nouveau_wfb_setup_wrap(ReadMemoryProcPtr *pRead, WriteMemoryProcPtr *pWrite, wfb->tile_height = bo->tile_mode + 2; wfb->horiz_tiles = wfb->pitch / 64; have_tiled = 1; + + if (1 /* NV_ARCH_C0, FIXME */) + wfb->tile_height += 1; } out: diff --git a/src/nv_accel_common.c b/src/nv_accel_common.c index 869a6d6..89a0301 100644 --- a/src/nv_accel_common.c +++ b/src/nv_accel_common.c @@ -497,6 +497,8 @@ NVAccelCommonInit(ScrnInfoPtr pScrn) if (pNv->NoAccel) return TRUE; + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "NVAccelCommonInit\n"); + /* General engine objects */ INIT_CONTEXT_OBJECT(DmaNotifier0); @@ -512,13 +514,23 @@ NVAccelCommonInit(ScrnInfoPtr pScrn) INIT_CONTEXT_OBJECT(ScaledImage); INIT_CONTEXT_OBJECT(ClipRectangle); INIT_CONTEXT_OBJECT(ImageFromCpu); - } else { + } else + if (pNv->Architecture < NV_ARCH_C0) { INIT_CONTEXT_OBJECT(2D_NV50); + } else { + INIT_CONTEXT_OBJECT(2D_NVC0); } - INIT_CONTEXT_OBJECT(MemFormat); + + if (pNv->Architecture < NV_ARCH_C0) + INIT_CONTEXT_OBJECT(MemFormat); + else + INIT_CONTEXT_OBJECT(M2MF_NVC0); /* 3D init */ switch (pNv->Architecture) { + case NV_ARCH_C0: + INIT_CONTEXT_OBJECT(NVC0TCL); + break; case NV_ARCH_50: INIT_CONTEXT_OBJECT(NV50TCL); break; @@ -561,11 +573,15 @@ void NVAccelFree(ScrnInfoPtr pScrn) nouveau_grobj_free(&pNv->NvClipRectangle); nouveau_grobj_free(&pNv->NvImageFromCpu); } else + if (pNv->Architecture < NV_ARCH_C0) nouveau_grobj_free(&pNv->Nv2D); - nouveau_grobj_free(&pNv->NvMemFormat); - nouveau_grobj_free(&pNv->NvSW); - nouveau_grobj_free(&pNv->Nv3D); + if (pNv->Architecture < NV_ARCH_C0) { + nouveau_grobj_free(&pNv->NvMemFormat); + + nouveau_grobj_free(&pNv->NvSW); + nouveau_grobj_free(&pNv->Nv3D); + } nouveau_bo_ref(NULL, &pNv->tesla_scratch); nouveau_bo_ref(NULL, &pNv->shader_mem); diff --git a/src/nv_dma.c b/src/nv_dma.c index c3d7639..975bfeb 100644 --- a/src/nv_dma.c +++ b/src/nv_dma.c @@ -54,12 +54,21 @@ void NVSync(ScrnInfoPtr pScrn) if (pNv->NoAccel) return; + return; /* XXX */ + /* Wait for nvchannel to go completely idle */ nouveau_notifier_reset(pNv->notify0, 0); - BEGIN_RING(chan, gr, 0x104, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, gr, 0x100, 1); - OUT_RING (chan, 0); + if (pNv->Architecture >= NV_ARCH_C0) { + BEGIN_RING_NVC0(chan, NvSub2D, 0x0104, 1); + OUT_RING (chan, 0); + BEGIN_RING_NVC0(chan, NvSub2D, 0x0100, 1); + OUT_RING (chan, 0); + } else { + BEGIN_RING(chan, gr, 0x104, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, gr, 0x100, 1); + OUT_RING (chan, 0); + } FIRE_RING (chan); if (nouveau_notifier_wait_status(pNv->notify0, 0, NV_NOTIFY_STATE_STATUS_COMPLETED, 2.0)) diff --git a/src/nv_driver.c b/src/nv_driver.c index 0abe400..f83762f 100644 --- a/src/nv_driver.c +++ b/src/nv_driver.c @@ -109,6 +109,8 @@ static struct NvFamily NVKnownFamilies[] = { "GeForce 6", "NV4x" }, { "GeForce 7", "G7x" }, { "GeForce 8", "G8x" }, + { "GeForce GTX 200", "NVA0" }, + { "GeForce GTX 400", "NVC0" }, { NULL, NULL} }; @@ -254,6 +256,7 @@ NVPciProbe(DriverPtr drv, int entity_num, struct pci_device *pci_dev, case 0x80: case 0x90: case 0xa0: + case 0xc0: break; default: xf86DrvMsg(-1, X_ERROR, "Unknown chipset: NV%02x\n", chipset); @@ -652,6 +655,9 @@ NVPreInit(ScrnInfoPtr pScrn, int flags) case 0xa0: pNv->Architecture = NV_ARCH_50; break; + case 0xc0: + pNv->Architecture = NV_ARCH_C0; + break; default: return FALSE; } @@ -845,14 +851,26 @@ NVMapMem(ScrnInfoPtr pScrn) size = pScrn->displayWidth * (pScrn->bitsPerPixel >> 3); if (pNv->Architecture >= NV_ARCH_50 && pNv->tiled_scanout) { tile_mode = 4; - tile_flags = pScrn->bitsPerPixel == 16 ? 0x7000 : 0x7a00; - size *= NOUVEAU_ALIGN(pScrn->virtualY, (1 << (tile_mode + 2))); + if (pNv->Architecture == NV_ARCH_C0) { + tile_flags = 0xfe0; + size *= NOUVEAU_ALIGN(pScrn->virtualY, + (1 << (tile_mode + 3))); + } else { + tile_flags = + pScrn->bitsPerPixel == 16 ? 0x7000 : 0x7a00; + size *= NOUVEAU_ALIGN(pScrn->virtualY, + (1 << (tile_mode + 2))); + } } else { size *= pScrn->virtualY; } + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "new scanout bo: tile_mode=%x, tile_flags=%x\n", + tile_mode, tile_flags); + ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM | NOUVEAU_BO_MAP, - 0, size, tile_mode, tile_flags, + 1 << 17, size, tile_mode, tile_flags, &pNv->scanout); if (ret) { xf86DrvMsg(pScrn->scrnIndex, X_ERROR, diff --git a/src/nv_proto.h b/src/nv_proto.h index d6791ef..8d3a988 100644 --- a/src/nv_proto.h +++ b/src/nv_proto.h @@ -127,6 +127,11 @@ int NV40SetTexturePortAttribute(ScrnInfoPtr, Atom, INT32, pointer); /* in nv50_accel.c */ Bool NVAccelInitNV50TCL(ScrnInfoPtr pScrn); +/* in nvc0_accel.c */ +Bool NVAccelInitNVC0TCL(ScrnInfoPtr pScrn); +Bool NVAccelInitM2MF_NVC0(ScrnInfoPtr pScrn); +Bool NVAccelInit2D_NVC0(ScrnInfoPtr pScrn); + /* in nv50_exa.c */ Bool NV50EXAPrepareSolid(PixmapPtr, int, Pixel, Pixel); void NV50EXASolid(PixmapPtr, int, int, int, int); @@ -142,6 +147,26 @@ void NV50EXADoneComposite(PixmapPtr); Bool NV50EXAUploadSIFC(const char *src, int src_pitch, PixmapPtr pdPix, int x, int y, int w, int h, int cpp); +/* in nvc0_exa.c */ +Bool NVC0AccelUploadM2MF(PixmapPtr pdpix, int x, int y, int w, int h, + const char *src, int src_pitch); +Bool NVC0AccelDownloadM2MF(PixmapPtr pspix, int x, int y, int w, int h, + char *dst, unsigned dst_pitch); + +Bool NVC0EXAPrepareSolid(PixmapPtr, int, Pixel, Pixel); +void NVC0EXASolid(PixmapPtr, int, int, int, int); +void NVC0EXADoneSolid(PixmapPtr); +Bool NVC0EXAPrepareCopy(PixmapPtr, PixmapPtr, int, int, int, Pixel); +void NVC0EXACopy(PixmapPtr, int, int, int, int, int, int); +void NVC0EXADoneCopy(PixmapPtr); +Bool NVC0EXACheckComposite(int, PicturePtr, PicturePtr, PicturePtr); +Bool NVC0EXAPrepareComposite(int, PicturePtr, PicturePtr, PicturePtr, + PixmapPtr, PixmapPtr, PixmapPtr); +void NVC0EXAComposite(PixmapPtr, int, int, int, int, int, int, int, int); +void NVC0EXADoneComposite(PixmapPtr); +Bool NVC0EXAUploadSIFC(const char *src, int src_pitch, + PixmapPtr pdPix, int x, int y, int w, int h, int cpp); + /* nv50_xv.c */ int nv50_xv_image_put(ScrnInfoPtr, struct nouveau_bo *, int, int, int, int, BoxPtr, int, int, int, int, uint16_t, uint16_t, diff --git a/src/nv_type.h b/src/nv_type.h index 0bfe721..a6398a2 100644 --- a/src/nv_type.h +++ b/src/nv_type.h @@ -24,6 +24,11 @@ #define NV_ARCH_30 0x30 #define NV_ARCH_40 0x40 #define NV_ARCH_50 0x50 +#define NV_ARCH_C0 0xc0 + +#define NvSubM2MF 3 +#define NvSub2D 4 +#define NvSub3D 5 /* NV50 */ typedef struct _NVRec *NVPtr; diff --git a/src/nvc0_accel.c b/src/nvc0_accel.c new file mode 100644 index 0000000..f2fe8ff --- /dev/null +++ b/src/nvc0_accel.c @@ -0,0 +1,758 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nv_include.h" +#include "nvc0_accel.h" + +#define NOUVEAU_BO(a, b, m) (NOUVEAU_BO_##a | NOUVEAU_BO_##b | NOUVEAU_BO_##m) + +Bool +NVAccelInitM2MF_NVC0(ScrnInfoPtr pScrn) +{ + NVPtr pNv = NVPTR(pScrn); + struct nouveau_channel *chan = pNv->chan; + + pNv->NvMemFormat = (struct nouveau_grobj *)-2; + + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "init NVC0_M2MF (9039)\n"); + + BEGIN_RING(chan, NvSubM2MF, 0x0000, 1); + OUT_RING (chan, 0x9039); + + /* XXX: Stupid interface, I want the notifier address ! */ + + FIRE_RING (chan); + + return TRUE; +} + +Bool +NVAccelInit2D_NVC0(ScrnInfoPtr pScrn) +{ + NVPtr pNv = NVPTR(pScrn); + struct nouveau_channel *chan = pNv->chan; + + pNv->Nv2D = (struct nouveau_grobj *)-2; + + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "init NVC0_2D (902d)\n"); + + BEGIN_RING(chan, NvSub2D, 0x0000, 1); + OUT_RING (chan, 0x902d); + + BEGIN_RING(chan, NvSub2D, NV50_2D_CLIP_ENABLE, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, NvSub2D, NV50_2D_COLOR_KEY_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, NvSub2D, 0x0884, 1); + OUT_RING (chan, 0x3f); + BEGIN_RING(chan, NvSub2D, 0x0888, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, NvSub2D, NV50_2D_ROP, 1); + OUT_RING (chan, 0x55); + BEGIN_RING(chan, NvSub2D, NV50_2D_OPERATION, 1); + OUT_RING (chan, NV50_2D_OPERATION_SRCCOPY); + + BEGIN_RING(chan, NvSub2D, NV50_2D_BLIT_DU_DX_FRACT, 4); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + BEGIN_RING(chan, NvSub2D, NV50_2D_DRAW_SHAPE, 2); + OUT_RING (chan, 4); + OUT_RING (chan, NVC0TCL_RT_FORMAT_R5G6B5_UNORM); + BEGIN_RING(chan, NvSub2D, NV50_2D_PATTERN_FORMAT, 2); + OUT_RING (chan, 2); + OUT_RING (chan, 1); + + FIRE_RING (chan); + + pNv->currentRop = 0xfffffffa; + return TRUE; +} + +Bool +NVAccelInitNVC0TCL(ScrnInfoPtr pScrn) +{ + NVPtr pNv = NVPTR(pScrn); + struct nouveau_channel *chan = pNv->chan; + struct nouveau_bo *bo = pNv->tesla_scratch; + uint32_t tclClass; + int i; + + switch (pNv->dev->chipset) { + case 0xc0: + tclClass = 0x9097; + break; + default: + return FALSE; + } + + if (!pNv->Nv3D) { + int ret; + + ret = nouveau_notifier_alloc(chan, NvVBlankSem, 1, + &pNv->vblank_sem); + if (ret) + return FALSE; + + ret = nouveau_bo_new(pNv->dev, NOUVEAU_BO_VRAM, + (128 << 10), 0x20000, + &pNv->tesla_scratch); + bo = pNv->tesla_scratch; + if (!ret) + ret = nouveau_bo_pin(bo, NOUVEAU_BO_VRAM); + if (ret) { + nouveau_notifier_free(&pNv->vblank_sem); + return FALSE; + } + } + pNv->Nv3D = (struct nouveau_grobj *)-2; + + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "init NVC0TCL (%x)\n", tclClass); + + if (MARK_RING(chan, 512, 32)) + return FALSE; + + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_NOTIFY_ADDRESS_HIGH, 3); + OUT_RELOCh(chan, bo, NTFY_OFST, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RELOCl(chan, bo, NTFY_OFST, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RING (chan, 0); + + BEGIN_RING(chan, NvSub3D, 0x0000, 1); + OUT_RING (chan, tclClass); + + BEGIN_RING(chan, NvSub3D, NVC0TCL_NOTIFY_ADDRESS_HIGH, 3); + OUT_RELOCh(chan, bo, NTFY_OFST, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RELOCl(chan, bo, NTFY_OFST, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RING (chan, 0); + + BEGIN_RING(chan, NvSub3D, NVC0TCL_MULTISAMPLE_COLOR_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, NvSub3D, NVC0TCL_MULTISAMPLE_ZETA_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, NvSub3D, NVC0TCL_MULTISAMPLE_MODE, 1); + OUT_RING (chan, NVC0TCL_MULTISAMPLE_MODE_1X); + + BEGIN_RING(chan, NvSub3D, NVC0TCL_COND_MODE, 1); + OUT_RING (chan, NVC0TCL_COND_MODE_ALWAYS); + BEGIN_RING(chan, NvSub3D, NVC0TCL_RT_CONTROL, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, NvSub3D, NVC0TCL_ZETA_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, NvSub3D, NVC0TCL_VIEWPORT_CLIP_RECTS_EN, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, NvSub3D, NVC0TCL_CLIPID_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, NvSub3D, NVC0TCL_VERTEX_TWO_SIDE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, NvSub3D, 0x0fac, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, NvSub3D, NVC0TCL_COLOR_MASK(0), 8); + OUT_RING (chan, 0x1111); + for (i = 1; i < 8; ++i) + OUT_RING(chan, 0); + + FIRE_RING (chan); + + BEGIN_RING(chan, NvSub3D, NVC0TCL_SCREEN_SCISSOR_HORIZ, 2); + OUT_RING (chan, (8192 << 16) | 0); + OUT_RING (chan, (8192 << 16) | 0); + BEGIN_RING(chan, NvSub3D, NVC0TCL_Y_ORIGIN_BOTTOM, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, NvSub3D, NVC0TCL_WINDOW_OFFSET_X, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, NvSub3D, 0x1590, 1); + OUT_RING (chan, 0); + + BEGIN_RING(chan, NvSub3D, NVC0TCL_LINKED_TSC, 1); + OUT_RING (chan, 1); + + BEGIN_RING(chan, NvSub3D, NVC0TCL_VIEWPORT_TRANSFORM_EN, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, NvSub3D, NVC0TCL_VIEW_VOLUME_CLIP_CTRL, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, NvSub3D, NVC0TCL_DEPTH_RANGE_NEAR(0), 2); + OUT_RINGf (chan, 0.0f); + OUT_RINGf (chan, 1.0f); + + BEGIN_RING(chan, NvSub3D, NVC0TCL_TEX_LIMITS(4), 1); + OUT_RING (chan, 0x54); + + BEGIN_RING(chan, NvSub3D, NVC0TCL_BLEND_ENABLE(0), 8); + OUT_RING (chan, 1); + for (i = 1; i < 8; ++i) + OUT_RING(chan, 0); + BEGIN_RING(chan, NvSub3D, NVC0TCL_BLEND_INDEPENDENT, 1); + OUT_RING (chan, 0); + + BEGIN_RING(chan, NvSub3D, 0x17bc, 3); + OUT_RELOCh(chan, bo, MISC_OFST, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RELOCl(chan, bo, MISC_OFST, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RING (chan, 1); + + BEGIN_RING(chan, NvSub3D, 0x3420, 2); + OUT_RING (chan, (bo->offset + CB_OFFSET) >> 8); + OUT_RING (chan, 16384); + + FIRE_RING (chan); + + BEGIN_RING(chan, NvSub3D, NVC0TCL_CODE_ADDRESS_HIGH, 2); + OUT_RELOCh(chan, bo, CODE_OFFSET, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, bo, CODE_OFFSET, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_OUT_HIGH, 2); + if (OUT_RELOCh(chan, bo, PVP_PASS, NOUVEAU_BO(VRAM, VRAM, WR)) || + OUT_RELOCl(chan, bo, PVP_PASS, NOUVEAU_BO(VRAM, VRAM, WR))) { + MARK_UNDO(chan); + return FALSE; + } + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_LINE_LENGTH_IN, 2); + OUT_RING (chan, 7 * 8 + 20 * 4); + OUT_RING (chan, 1); + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_EXEC, 1); + OUT_RING (chan, 0x100111); + BEGIN_RING_NI(chan, NvSubM2MF, NVC0_M2MF_DATA, 7 * 2 + 20); + OUT_RING (chan, 0x00020461); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0xff000); + OUT_RING (chan, 0x00000000); /* VP_ATTR_EN[0x000] */ + OUT_RING (chan, 0x0001033f); /* VP_ATTR_EN[0x080] */ + OUT_RING (chan, 0x00000000); /* VP_ATTR_EN[0x100] */ + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); /* VP_ATTR_EN[0x200] */ + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); /* VP_ATTR_EN[0x300] */ + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x0033f000); /* VP_EXPORT_EN[0x040] */ + OUT_RING (chan, 0x00000000); /* VP_EXPORT_EN[0x0c0] */ + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); /* VP_EXPORT_EN[0x2c0] */ + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0xfff01c66); + OUT_RING (chan, 0x06000080); /* vfetch { $r0,1,2,3 } b128 a[0x80] */ + OUT_RING (chan, 0xfff11c26); + OUT_RING (chan, 0x06000090); /* vfetch { $r4,5 } b64 a[0x90] */ + OUT_RING (chan, 0xfff19c26); + OUT_RING (chan, 0x060000a0); /* vfetch { $r6,7 } b64 a[0xa0] */ + OUT_RING (chan, 0x03f01c66); + OUT_RING (chan, 0x0a7e0070); /* export v[0x70] { $r0 $r1 $r2 $r3 } */ + OUT_RING (chan, 0x13f01c26); + OUT_RING (chan, 0x0a7e0080); /* export v[0x80] { $r4 $r5 } */ + OUT_RING (chan, 0x1bf01c26); + OUT_RING (chan, 0x0a7e0090); /* export v[0x90] { $r6 $r7 } */ + OUT_RING (chan, 0x00001de7); + OUT_RING (chan, 0x80000000); /* exit */ + + BEGIN_RING(chan, NvSub3D, NVC0TCL_SP_SELECT(1), 2); + OUT_RING (chan, 0x11); + OUT_RING (chan, PVP_PASS); + BEGIN_RING(chan, NvSub3D, NVC0TCL_SP_GPR_ALLOC(1), 1); + OUT_RING (chan, 8); + BEGIN_RING(chan, NvSub3D, 0x163c, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, NvSub3D, 0x2600, 1); + OUT_RING (chan, 1); + + FIRE_RING (chan); usleep(500); + + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_OUT_HIGH, 2); + if (OUT_RELOCh(chan, bo, PFP_S, NOUVEAU_BO(VRAM, VRAM, WR)) || + OUT_RELOCl(chan, bo, PFP_S, NOUVEAU_BO(VRAM, VRAM, WR))) { + MARK_UNDO(chan); + return FALSE; + } + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_LINE_LENGTH_IN, 2); + OUT_RING (chan, 6 * 8 + 20 * 4); + OUT_RING (chan, 1); + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_EXEC, 1); + OUT_RING (chan, 0x100111); + BEGIN_RING_NI(chan, NvSubM2MF, NVC0_M2MF_DATA, 6 * 2 + 20); + OUT_RING (chan, 0x00021462); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x80000000); + OUT_RING (chan, 0x0000000a); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x0000000f); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0xfff01c00); + OUT_RING (chan, 0xc07e007c); /* linterp f32 $r0 v[$r63+0x7c] */ + OUT_RING (chan, 0x10001c00); + OUT_RING (chan, 0xc8000000); /* rcp f32 $r0 $r0 */ + OUT_RING (chan, 0x03f05c40); + OUT_RING (chan, 0xc07e0084); /* pinterp f32 $r1 $r0 v[$r63+0x84] */ + OUT_RING (chan, 0x03f01c40); + OUT_RING (chan, 0xc07e0080); /* pinterp f32 $r0 $r0 v[$r63+0x80] */ + OUT_RING (chan, 0xfc001e86); + OUT_RING (chan, 0x8013c000); /* tex { $r0,1,2,3 } $t0 { $r0,1 } */ + OUT_RING (chan, 0x00001de7); + OUT_RING (chan, 0x80000000); /* exit */ + + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_OUT_HIGH, 2); + if (OUT_RELOCh(chan, bo, PFP_C, NOUVEAU_BO(VRAM, VRAM, WR)) || + OUT_RELOCl(chan, bo, PFP_C, NOUVEAU_BO(VRAM, VRAM, WR))) { + MARK_UNDO(chan); + return FALSE; + } + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_LINE_LENGTH_IN, 2); + OUT_RING (chan, 13 * 8 + 20 * 4); + OUT_RING (chan, 1); + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_EXEC, 1); + OUT_RING (chan, 0x100111); + BEGIN_RING_NI(chan, NvSubM2MF, NVC0_M2MF_DATA, 13 * 2 + 20); + OUT_RING (chan, 0x00021462); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x80000000); + OUT_RING (chan, 0x00000a0a); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x0000000f); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0xfff01c00); + OUT_RING (chan, 0xc07e007c); /* linterp f32 $r0 v[$r63+0x7c] */ + OUT_RING (chan, 0x10001c00); + OUT_RING (chan, 0xc8000000); /* rcp f32 $r0 $r0 */ + OUT_RING (chan, 0x03f0dc40); + OUT_RING (chan, 0xc07e0094); /* pinterp f32 $r3 $r0 v[$r63+0x94] */ + OUT_RING (chan, 0x03f09c40); + OUT_RING (chan, 0xc07e0090); /* pinterp f32 $r2 $r0 v[$r63+0x90] */ + OUT_RING (chan, 0xfc211e86); + OUT_RING (chan, 0x80120001); /* tex { _,_,_,$r4 } $t1 { $r2,3 } */ + OUT_RING (chan, 0x03f05c40); + OUT_RING (chan, 0xc07e0084); /* pinterp f32 $r1 $r0 v[$r63+0x84] */ + OUT_RING (chan, 0x03f01c40); + OUT_RING (chan, 0xc07e0080); /* pinterp f32 $r0 $r0 v[$r63+0x80] */ + OUT_RING (chan, 0xfc001e86); + OUT_RING (chan, 0x8013c000); /* tex { $r0,1,2,3 } $t0 { $r0,1 } */ + OUT_RING (chan, 0x1030dc40); + OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r3 $r3 $r4 */ + OUT_RING (chan, 0x10209c40); + OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r2 $r2 $r4 */ + OUT_RING (chan, 0x10105c40); + OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r1 $r1 $r4 */ + OUT_RING (chan, 0x10001c40); + OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r0 $r0 $r4 */ + OUT_RING (chan, 0x00001de7); + OUT_RING (chan, 0x80000000); /* exit */ + + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_OUT_HIGH, 2); + if (OUT_RELOCh(chan, bo, PFP_CCA, NOUVEAU_BO(VRAM, VRAM, WR)) || + OUT_RELOCl(chan, bo, PFP_CCA, NOUVEAU_BO(VRAM, VRAM, WR))) { + MARK_UNDO(chan); + return FALSE; + } + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_LINE_LENGTH_IN, 2); + OUT_RING (chan, 13 * 8 + 20 * 4); + OUT_RING (chan, 1); + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_EXEC, 1); + OUT_RING (chan, 0x100111); + BEGIN_RING_NI(chan, NvSubM2MF, NVC0_M2MF_DATA, 13 * 2 + 20); + OUT_RING (chan, 0x00021462); /* 0x0000c000 = USES_KIL, MULTI_COLORS */ + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x80000000); /* FRAG_COORD_UMASK = 0x8 */ + OUT_RING (chan, 0x00000a0a); /* FP_INTERP[0x080], 0022 0022 */ + OUT_RING (chan, 0x00000000); /* FP_INTERP[0x0c0], 0 = OFF */ + OUT_RING (chan, 0x00000000); /* FP_INTERP[0x100], 1 = FLAT */ + OUT_RING (chan, 0x00000000); /* FP_INTERP[0x140], 2 = PERSPECTIVE */ + OUT_RING (chan, 0x00000000); /* FP_INTERP[0x180], 3 = LINEAR */ + OUT_RING (chan, 0x00000000); /* FP_INTERP[0x1c0] */ + OUT_RING (chan, 0x00000000); /* FP_INTERP[0x200] */ + OUT_RING (chan, 0x00000000); /* FP_INTERP[0x240] */ + OUT_RING (chan, 0x00000000); /* FP_INTERP[0x280] */ + OUT_RING (chan, 0x00000000); /* FP_INTERP[0x2c0] */ + OUT_RING (chan, 0x00000000); /* FP_INTERP[0x300] */ + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x0000000f); /* FP_RESULT_MASK (0x8000 Face ?) */ + OUT_RING (chan, 0x00000000); /* 0x2 = FragDepth, 0x1 = SampleMask */ + OUT_RING (chan, 0xfff01c00); + OUT_RING (chan, 0xc07e007c); /* linterp f32 $r0 v[$r63+0x7c] */ + OUT_RING (chan, 0x10001c00); + OUT_RING (chan, 0xc8000000); /* rcp f32 $r0 $r0 */ + OUT_RING (chan, 0x03f0dc40); + OUT_RING (chan, 0xc07e0094); /* pinterp f32 $r3 $r0 v[$r63+0x94] */ + OUT_RING (chan, 0x03f09c40); + OUT_RING (chan, 0xc07e0090); /* pinterp f32 $r2 $r0 v[$r63+0x90] */ + OUT_RING (chan, 0xfc211e86); + OUT_RING (chan, 0x8013c001); /* tex { $r4,5,6,7 } $t1 { $r2,3 } */ + OUT_RING (chan, 0x03f05c40); + OUT_RING (chan, 0xc07e0084); /* pinterp f32 $r1 $r0 v[$r63+0x84] */ + OUT_RING (chan, 0x03f01c40); + OUT_RING (chan, 0xc07e0080); /* pinterp f32 $r0 $r0 v[$r63+0x80] */ + OUT_RING (chan, 0xfc001e86); + OUT_RING (chan, 0x8013c000); /* tex { $r0,1,2,3 } $t0 { $r0,1 } */ + OUT_RING (chan, 0x1c30dc40); + OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r3 $r3 $r7 */ + OUT_RING (chan, 0x18209c40); + OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r2 $r2 $r6 */ + OUT_RING (chan, 0x14105c40); + OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r1 $r1 $r5 */ + OUT_RING (chan, 0x10001c40); + OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r0 $r0 $r4 */ + OUT_RING (chan, 0x00001de7); + OUT_RING (chan, 0x80000000); /* exit */ + + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_OUT_HIGH, 2); + if (OUT_RELOCh(chan, bo, PFP_CCASA, NOUVEAU_BO(VRAM, VRAM, WR)) || + OUT_RELOCl(chan, bo, PFP_CCASA, NOUVEAU_BO(VRAM, VRAM, WR))) { + MARK_UNDO(chan); + return FALSE; + } + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_LINE_LENGTH_IN, 2); + OUT_RING (chan, 13 * 8 + 20 * 4); + OUT_RING (chan, 1); + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_EXEC, 1); + OUT_RING (chan, 0x100111); + BEGIN_RING_NI(chan, NvSubM2MF, NVC0_M2MF_DATA, 13 * 2 + 20); + OUT_RING (chan, 0x00021462); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x80000000); + OUT_RING (chan, 0x00000a0a); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x0000000f); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0xfff01c00); + OUT_RING (chan, 0xc07e007c); /* linterp f32 $r0 v[$r63+0x7c] */ + OUT_RING (chan, 0x10001c00); + OUT_RING (chan, 0xc8000000); /* rcp f32 $r0 $r0 */ + OUT_RING (chan, 0x03f0dc40); + OUT_RING (chan, 0xc07e0084); /* pinterp f32 $r3 $r0 v[$r63+0x84] */ + OUT_RING (chan, 0x03f09c40); + OUT_RING (chan, 0xc07e0080); /* pinterp f32 $r2 $r0 v[$r63+0x80] */ + OUT_RING (chan, 0xfc211e86); + OUT_RING (chan, 0x80120000); /* tex { _,_,_,$r4 } $t0 { $r2,3 } */ + OUT_RING (chan, 0x03f05c40); + OUT_RING (chan, 0xc07e0094); /* pinterp f32 $r1 $r0 v[$r63+0x94] */ + OUT_RING (chan, 0x03f01c40); + OUT_RING (chan, 0xc07e0090); /* pinterp f32 $r0 $r0 v[$r63+0x90] */ + OUT_RING (chan, 0xfc001e86); + OUT_RING (chan, 0x8013c001); /* tex { $r0,1,2,3 } $t1 { $r0,1 } */ + OUT_RING (chan, 0x1030dc40); + OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r3 $r3 $r4 */ + OUT_RING (chan, 0x10209c40); + OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r2 $r2 $r4 */ + OUT_RING (chan, 0x10105c40); + OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r1 $r1 $r4 */ + OUT_RING (chan, 0x10001c40); + OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r0 $r0 $r4 */ + OUT_RING (chan, 0x00001de7); + OUT_RING (chan, 0x80000000); /* exit */ + + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_OUT_HIGH, 2); + if (OUT_RELOCh(chan, bo, PFP_S_A8, NOUVEAU_BO(VRAM, VRAM, WR)) || + OUT_RELOCl(chan, bo, PFP_S_A8, NOUVEAU_BO(VRAM, VRAM, WR))) { + MARK_UNDO(chan); + return FALSE; + } + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_LINE_LENGTH_IN, 2); + OUT_RING (chan, 9 * 8 + 20 * 4); + OUT_RING (chan, 1); + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_EXEC, 1); + OUT_RING (chan, 0x100111); + BEGIN_RING_NI(chan, NvSubM2MF, NVC0_M2MF_DATA, 9 * 2 + 20); + OUT_RING (chan, 0x00021462); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x80000000); + OUT_RING (chan, 0x0000000a); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x0000000f); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0xfff01c00); + OUT_RING (chan, 0xc07e007c); /* linterp f32 $r0 v[$r63+0x7c] */ + OUT_RING (chan, 0x10001c00); + OUT_RING (chan, 0xc8000000); /* rcp f32 $r0 $r0 */ + OUT_RING (chan, 0x03f05c40); + OUT_RING (chan, 0xc07e0084); /* pinterp f32 $r1 $r0 v[$r63+0x84] */ + OUT_RING (chan, 0x03f01c40); + OUT_RING (chan, 0xc07e0080); /* pinterp f32 $r0 $r0 v[$r63+0x80] */ + OUT_RING (chan, 0xfc001e86); + OUT_RING (chan, 0x80120000); /* tex { _ _ _ $r0 } $t0 { $r0 $r1 } */ + OUT_RING (chan, 0x0000dde4); + OUT_RING (chan, 0x28000000); /* mov b32 $r3 $r0 */ + OUT_RING (chan, 0x00009de4); + OUT_RING (chan, 0x28000000); /* mov b32 $r2 $r0 */ + OUT_RING (chan, 0x00005de4); + OUT_RING (chan, 0x28000000); /* mov b32 $r1 $r0 */ + OUT_RING (chan, 0x00001de7); + OUT_RING (chan, 0x80000000); /* exit */ + + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_OUT_HIGH, 2); + if (OUT_RELOCh(chan, bo, PFP_C_A8, NOUVEAU_BO(VRAM, VRAM, WR)) || + OUT_RELOCl(chan, bo, PFP_C_A8, NOUVEAU_BO(VRAM, VRAM, WR))) { + MARK_UNDO(chan); + return FALSE; + } + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_LINE_LENGTH_IN, 2); + OUT_RING (chan, 13 * 8 + 20 * 4); + OUT_RING (chan, 1); + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_EXEC, 1); + OUT_RING (chan, 0x100111); + BEGIN_RING_NI(chan, NvSubM2MF, NVC0_M2MF_DATA, 13 * 2 + 20); + OUT_RING (chan, 0x00021462); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x80000000); + OUT_RING (chan, 0x00000a0a); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x0000000f); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0xfff01c00); + OUT_RING (chan, 0xc07e007c); /* linterp f32 $r0 v[$r63+0x7c] */ + OUT_RING (chan, 0x10001c00); + OUT_RING (chan, 0xc8000000); /* rcp f32 $r0 $r0 */ + OUT_RING (chan, 0x03f0dc40); + OUT_RING (chan, 0xc07e0094); /* pinterp f32 $r3 $r0 v[$r63+0x94] */ + OUT_RING (chan, 0x03f09c40); + OUT_RING (chan, 0xc07e0090); /* pinterp f32 $r2 $r0 v[$r63+0x90] */ + OUT_RING (chan, 0xfc205e86); + OUT_RING (chan, 0x80120001); /* tex { _ _ _ $r1 } $t1 { $r2 $r3 } */ + OUT_RING (chan, 0x03f0dc40); + OUT_RING (chan, 0xc07e0084); /* pinterp f32 $r3 $r0 v[$r63+0x84] */ + OUT_RING (chan, 0x03f09c40); + OUT_RING (chan, 0xc07e0080); /* pinterp f32 $r2 $r0 v[$r63+0x80] */ + OUT_RING (chan, 0xfc201e86); + OUT_RING (chan, 0x80120000); /* tex { _ _ _ $r0 } $t0 { $r2 $r3 } */ + OUT_RING (chan, 0x0400dc40); + OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r3 $r0 $r1 */ + OUT_RING (chan, 0x0c009de4); + OUT_RING (chan, 0x28000000); /* mov b32 $r2 $r3 */ + OUT_RING (chan, 0x0c005de4); + OUT_RING (chan, 0x28000000); /* mov b32 $r1 $r3 */ + OUT_RING (chan, 0x0c001de4); + OUT_RING (chan, 0x28000000); /* mov b32 $r0 $r3 */ + OUT_RING (chan, 0x00001de7); + OUT_RING (chan, 0x80000000); /* exit */ + + FIRE_RING (chan); + + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_OUT_HIGH, 2); + if (OUT_RELOCh(chan, bo, PFP_NV12, NOUVEAU_BO(VRAM, VRAM, WR)) || + OUT_RELOCl(chan, bo, PFP_NV12, NOUVEAU_BO(VRAM, VRAM, WR))) { + MARK_UNDO(chan); + return FALSE; + } + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_LINE_LENGTH_IN, 2); + OUT_RING (chan, 16 * 8 + 20 * 4); + OUT_RING (chan, 1); + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_EXEC, 1); + OUT_RING (chan, 0x100111); + BEGIN_RING_NI(chan, NvSubM2MF, NVC0_M2MF_DATA, 16 * 2 + 20); + OUT_RING (chan, 0x00021462); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x80000000); + OUT_RING (chan, 0x00000a0a); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x0000000f); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0xfff01c00); + OUT_RING (chan, 0xc07e007c); /* linterp f32 $r0 v[$r63+0x7c] */ + OUT_RING (chan, 0x10001c00); + OUT_RING (chan, 0xc8000000); /* rcp f32 $r0 $r0 */ + OUT_RING (chan, 0x03f0dc40); + OUT_RING (chan, 0xc07e0084); /* pinterp f32 $r3 $r0 v[$r63+0x84] */ + OUT_RING (chan, 0x03f09c40); + OUT_RING (chan, 0xc07e0080); /* pinterp f32 $r2 $r0 v[$r63+0x80] */ + OUT_RING (chan, 0xfc205e86); + OUT_RING (chan, 0x80120000); /* tex { _ _ _ $r1 } $t0 { $r2 $r3 } */ + OUT_RING (chan, 0x03f0dc40); + OUT_RING (chan, 0xc07e0094); /* pinterp f32 $r3 $r0 v[$r63+0x94] */ + OUT_RING (chan, 0x03f09c40); + OUT_RING (chan, 0xc07e0090); /* pinterp f32 $r2 $r0 v[$r63+0x90] */ + OUT_RING (chan, 0xfc211e86); + OUT_RING (chan, 0x80130001); /* tex { _ _ $r4 $r5 } $t1 { $r2 $r3 } */ + OUT_RING (chan, 0x28101c42); + OUT_RING (chan, 0x30fc7757); /* mul ftz f32 $r0 $r1 0.616543 */ + OUT_RING (chan, 0x08109c42); + OUT_RING (chan, 0x32fe8493); /* mul ftz f32 $r2 $r1 -1.258934 */ + OUT_RING (chan, 0xec10dc42); + OUT_RING (chan, 0x32fe0704); /* mul ftz f32 $r3 $r1 -1.013709 */ + OUT_RING (chan, 0x00405c40); + OUT_RING (chan, 0x30004000); /* add ftz f32 $r1 mul $r4 c0[0] $r0 */ + OUT_RING (chan, 0x10409c40); + OUT_RING (chan, 0x30044000); /* add ftz f32 $r2 mul $r4 c0[0x4] $r2 */ + OUT_RING (chan, 0x30501c40); + OUT_RING (chan, 0x30064000); /* add ftz f32 $r0 mul $r5 c0[0xc] $r3 */ + OUT_RING (chan, 0x20505c40); + OUT_RING (chan, 0x30024000); /* add ftz f32 $r1 mul $r5 c0[0x8] $r1 */ + OUT_RING (chan, 0x00001de7); + OUT_RING (chan, 0x80000000); /* exit */ + + BEGIN_RING(chan, NvSub3D, NVC0TCL_CB_SIZE, 3); + OUT_RING (chan, 256); + if (OUT_RELOCh(chan, bo, CB_OFFSET, NOUVEAU_BO(VRAM, VRAM, RD)) || + OUT_RELOCl(chan, bo, CB_OFFSET, NOUVEAU_BO(VRAM, VRAM, RD))) { + MARK_UNDO(chan); + return FALSE; + } + BEGIN_RING(chan, NvSub3D, NVC0TCL_CB_POS, 5); + OUT_RING (chan, 0); + OUT_RINGf (chan, -0.391730f); + OUT_RINGf (chan, 2.017000f); + OUT_RINGf (chan, -0.812900f); + OUT_RINGf (chan, 1.595800f); + + BEGIN_RING(chan, NvSub3D, NVC0TCL_CODE_FLUSH, 1); + OUT_RING (chan, 0x1111); + + FIRE_RING (chan); + + BEGIN_RING(chan, NvSub3D, NVC0TCL_SP_SELECT(5), 2); + OUT_RING (chan, 0x51); + OUT_RING (chan, PFP_S); + BEGIN_RING(chan, NvSub3D, NVC0TCL_SP_GPR_ALLOC(5), 1); + OUT_RING (chan, 8); + + BEGIN_RING(chan, NvSub3D, NVC0TCL_CB_BIND(4), 1); + OUT_RING (chan, 0x01); + + BEGIN_RING(chan, NvSub3D, NVC0TCL_EARLY_FRAGMENT_TESTS, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, NvSub3D, 0x0360, 2); + OUT_RING (chan, 0x20164010); + OUT_RING (chan, 0x20); + BEGIN_RING(chan, NvSub3D, 0x196c, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, NvSub3D, 0x1664, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, NvSub3D, NVC0TCL_FRAG_COLOR_CLAMP_EN, 1); + OUT_RING (chan, 0x11111111); + + BEGIN_RING(chan, NvSub3D, NVC0TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (chan, 0); + + BEGIN_RING(chan, NvSub3D, NVC0TCL_RASTERIZE_ENABLE, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, NvSub3D, NVC0TCL_SP_SELECT(4), 1); + OUT_RING (chan, 0x40); + BEGIN_RING(chan, NvSub3D, NVC0TCL_GP_BUILTIN_RESULT_EN, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, NvSub3D, NVC0TCL_SP_SELECT(3), 1); + OUT_RING (chan, 0x30); + BEGIN_RING(chan, NvSub3D, NVC0TCL_SP_SELECT(2), 1); + OUT_RING (chan, 0x20); + BEGIN_RING(chan, NvSub3D, NVC0TCL_SP_SELECT(0), 1); + OUT_RING (chan, 0x00); + + BEGIN_RING(chan, NvSub3D, 0x1604, 1); + OUT_RING (chan, 4); + BEGIN_RING(chan, NvSub3D, NVC0TCL_POINT_SPRITE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, NvSub3D, NVC0TCL_SCISSOR_ENABLE(0), 1); + OUT_RING (chan, 1); + + BEGIN_RING(chan, NvSub3D, NVC0TCL_VIEWPORT_HORIZ(0), 2); + OUT_RING (chan, (8192 << 16) | 0); + OUT_RING (chan, (8192 << 16) | 0); + BEGIN_RING(chan, NvSub3D, NVC0TCL_SCISSOR_HORIZ(0), 2); + OUT_RING (chan, (8192 << 16) | 0); + OUT_RING (chan, (8192 << 16) | 0); + + FIRE_RING (chan); usleep(50); + + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "NVC0TCL has been initialized\n"); + + return TRUE; +} + diff --git a/src/nvc0_accel.h b/src/nvc0_accel.h new file mode 100644 index 0000000..1d72695 --- /dev/null +++ b/src/nvc0_accel.h @@ -0,0 +1,83 @@ +#ifndef __NVC0_ACCEL_H__ +#define __NVC0_ACCEL_H__ + +#define BEGIN_RING(c, g, m, s) BEGIN_RING_NVC0(c, g, m, s) +#define BEGIN_RING_NI(c, g, m, s) BEGIN_RING_NI_NVC0(c, g, m, s) + +/* scratch buffer offsets */ +#define CODE_OFFSET 0x00000000 /* Code */ +#define TIC_OFFSET 0x00002000 /* Texture Image Control */ +#define TSC_OFFSET 0x00003000 /* Texture Sampler Control */ + +#define NTFY_OFST 0x08000 +#define MISC_OFST 0x10000 + +/* fragment programs */ +#define PFP_S 0x0000 /* (src) */ +#define PFP_C 0x0100 /* (src IN mask) */ +#define PFP_CCA 0x0200 /* (src IN mask) component-alpha */ +#define PFP_CCASA 0x0300 /* (src IN mask) component-alpha src-alpha */ +#define PFP_S_A8 0x0400 /* (src) a8 rt */ +#define PFP_C_A8 0x0500 /* (src IN mask) a8 rt - same for CA and CA_SA */ +#define PFP_NV12 0x0600 /* NV12 YUV->RGB */ + +/* vertex programs */ +#define PVP_PASS 0x0700 /* vertex pass-through shader */ + +/* shader constants */ +#define CB_OFFSET 0x1000 + +#define VTX_ATTR(a, c, t, s) \ + ((NVC0TCL_VTX_ATTR_DEFINE_TYPE_##t) | \ + ((a) << NVC0TCL_VTX_ATTR_DEFINE_ATTR_SHIFT) | \ + ((c) << NVC0TCL_VTX_ATTR_DEFINE_COMP_SHIFT) | \ + ((s) << NVC0TCL_VTX_ATTR_DEFINE_SIZE_SHIFT)) + +static __inline__ void +VTX1s(NVPtr pNv, float sx, float sy, unsigned dx, unsigned dy) +{ + struct nouveau_channel *chan = pNv->chan; + + BEGIN_RING(chan, NvSub3D, NVC0TCL_VTX_ATTR_DEFINE, 3); + OUT_RING (chan, VTX_ATTR(1, 2, FLOAT, 4)); + OUT_RINGf (chan, sx); + OUT_RINGf (chan, sy); +#if 1 + BEGIN_RING(chan, NvSub3D, NVC0TCL_VTX_ATTR_DEFINE, 2); + OUT_RING (chan, VTX_ATTR(0, 2, USCALED, 2)); + OUT_RING (chan, (dy << 16) | dx); +#else + BEGIN_RING(chan, NvSub3D, NVC0TCL_VTX_ATTR_DEFINE, 3); + OUT_RING (chan, VTX_ATTR(0, 2, FLOAT, 4)); + OUT_RINGf (chan, (float)dx); + OUT_RINGf (chan, (float)dy); +#endif +} + +static __inline__ void +VTX2s(NVPtr pNv, float s1x, float s1y, float s2x, float s2y, + unsigned dx, unsigned dy) +{ + struct nouveau_channel *chan = pNv->chan; + + BEGIN_RING(chan, NvSub3D, NVC0TCL_VTX_ATTR_DEFINE, 3); + OUT_RING (chan, VTX_ATTR(1, 2, FLOAT, 4)); + OUT_RINGf (chan, s1x); + OUT_RINGf (chan, s1y); + BEGIN_RING(chan, NvSub3D, NVC0TCL_VTX_ATTR_DEFINE, 3); + OUT_RING (chan, VTX_ATTR(2, 2, FLOAT, 4)); + OUT_RINGf (chan, s2x); + OUT_RINGf (chan, s2y); +#if 1 + BEGIN_RING(chan, NvSub3D, NVC0TCL_VTX_ATTR_DEFINE, 2); + OUT_RING (chan, VTX_ATTR(0, 2, USCALED, 2)); + OUT_RING (chan, (dy << 16) | dx); +#else + BEGIN_RING(chan, NvSub3D, NVC0TCL_VTX_ATTR_DEFINE, 3); + OUT_RING (chan, VTX_ATTR(0, 2, FLOAT, 4)); + OUT_RINGf (chan, (float)dx); + OUT_RINGf (chan, (float)dy); +#endif +} + +#endif diff --git a/src/nvc0_exa.c b/src/nvc0_exa.c new file mode 100644 index 0000000..91c0743 --- /dev/null +++ b/src/nvc0_exa.c @@ -0,0 +1,1239 @@ +/* + * Copyright 2007 NVIDIA, Corporation + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nv_include.h" +#include "nv_rop.h" +#include "nvc0_accel.h" +#include "nv50_texture.h" + +#define NOUVEAU_BO(a, b, c) (NOUVEAU_BO_##a | NOUVEAU_BO_##b | NOUVEAU_BO_##c) + +Bool +NVC0AccelDownloadM2MF(PixmapPtr pspix, int x, int y, int w, int h, + char *dst, unsigned dst_pitch) +{ + ScrnInfoPtr pScrn = xf86Screens[pspix->drawable.pScreen->myNum]; + NVPtr pNv = NVPTR(pScrn); + struct nouveau_channel *chan = pNv->chan; + struct nouveau_bo *bo = nouveau_pixmap_bo(pspix); + const int cpp = pspix->drawable.bitsPerPixel / 8; + const int line_len = w * cpp; + const int line_limit = (128 << 10) / line_len; + unsigned src_offset = 0, src_pitch = 0, tiled = 1; + + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "NVC0AccelDownloadM2MF: %i.%i %ix%i\n", + x, y, w, h); + + if (!nv50_style_tiled_pixmap(pspix)) { + tiled = 0; + src_pitch = exaGetPixmapPitch(pspix); + src_offset = (y * src_pitch) + (x * cpp); + } else { + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_TILING_MODE_IN, 5); + OUT_RING (chan, bo->tile_mode << 4); + OUT_RING (chan, pspix->drawable.width * cpp); + OUT_RING (chan, pspix->drawable.height); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + } + + while (h) { + const char *src; + int line_count, i; + + /* GART size >= 128 KiB assumed */ + line_count = h; + if (line_count > line_limit) + line_count = line_limit; + + MARK_RING(chan, 16, 4); + + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_OUT_HIGH, 2); + OUT_RELOCh(chan, pNv->GART, 0, NOUVEAU_BO(GART, GART, WR)); + OUT_RELOCl(chan, pNv->GART, 0, NOUVEAU_BO(GART, GART, WR)); + + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_IN_HIGH, 6); + OUT_RELOCh(chan, bo, src_offset, NOUVEAU_BO(VRAM, GART, RD)); + OUT_RELOCl(chan, bo, src_offset, NOUVEAU_BO(VRAM, GART, RD)); + OUT_RING (chan, src_pitch); + OUT_RING (chan, line_len); + OUT_RING (chan, line_len); + OUT_RING (chan, line_count); + + if (tiled) { + BEGIN_RING(chan, NvSubM2MF, + NVC0_M2MF_TILING_POSITION_IN_X, 2); + OUT_RING (chan, x * cpp); + OUT_RING (chan, y); + } + + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_EXEC, 1); + OUT_RING (chan, 0x100000 | (tiled << 8)); + + if (nouveau_bo_map(pNv->GART, NOUVEAU_BO_RD)) { + MARK_UNDO(chan); + return FALSE; + } + src = pNv->GART->map; + + if (dst_pitch == line_len) { + memcpy(dst, src, dst_pitch * line_count); + } else { + for (i = 0; i < line_count; ++i) { + memcpy(dst, src, line_len); + src += line_len; + dst += dst_pitch; + } + } + nouveau_bo_unmap(pNv->GART); + + if (!tiled) + src_offset += line_count * src_pitch; + h -= line_count; + y += line_count; + } + + return TRUE; +} + +Bool +NVC0AccelUploadM2MF(PixmapPtr pdpix, int x, int y, int w, int h, + const char *src, int src_pitch) +{ + ScrnInfoPtr pScrn = xf86Screens[pdpix->drawable.pScreen->myNum]; + NVPtr pNv = NVPTR(pScrn); + struct nouveau_channel *chan = pNv->chan; + struct nouveau_bo *bo = nouveau_pixmap_bo(pdpix); + int cpp = pdpix->drawable.bitsPerPixel / 8; + int line_len = w * cpp; + int line_limit = (128 << 10) / line_len; + unsigned dst_offset = 0, dst_pitch = 0, tiled = 1; + + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "NVC0AccelUploadM2MF: %i.%i %ix%i\n", + x, y, w, h); + + if (!nv50_style_tiled_pixmap(pdpix)) { + tiled = 0; + dst_pitch = exaGetPixmapPitch(pdpix); + dst_offset = (y * dst_pitch) + (x * cpp); + } else { + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_TILING_MODE_OUT, 5); + OUT_RING (chan, bo->tile_mode << 4); + OUT_RING (chan, pdpix->drawable.width * cpp); + OUT_RING (chan, pdpix->drawable.height); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + } + + while (h) { + char *dst; + int i, line_count; + + line_count = h; + if (line_count > line_limit) + line_count = line_limit; + + if (nouveau_bo_map(pNv->GART, NOUVEAU_BO_WR)) + return FALSE; + dst = pNv->GART->map; + + if (src_pitch == line_len) { + memcpy(dst, src, src_pitch * line_count); + src += src_pitch * line_count; + } else { + for (i = 0; i < line_count; i++) { + memcpy(dst, src, line_len); + src += src_pitch; + dst += line_len; + } + } + nouveau_bo_unmap(pNv->GART); + + if (MARK_RING(chan, 16, 4)) + return FALSE; + + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_IN_HIGH, 2); + OUT_RELOCh(chan, pNv->GART, 0, NOUVEAU_BO(GART, GART, RD)); + OUT_RELOCl(chan, pNv->GART, 0, NOUVEAU_BO(GART, GART, RD)); + + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_OUT_HIGH, 2); + OUT_RELOCh(chan, bo, dst_offset, NOUVEAU_BO(VRAM, GART, WR)); + OUT_RELOCl(chan, bo, dst_offset, NOUVEAU_BO(VRAM, GART, WR)); + + if (tiled) { + BEGIN_RING(chan, NvSubM2MF, + NVC0_M2MF_TILING_POSITION_OUT_X, 2); + OUT_RING (chan, x * cpp); + OUT_RING (chan, y); + } + + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_PITCH_IN, 4); + OUT_RING (chan, line_len); + OUT_RING (chan, dst_pitch); + OUT_RING (chan, line_len); + OUT_RING (chan, line_count); + + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_EXEC, 1); + OUT_RING (chan, 0x100000 | (tiled << 4)); + FIRE_RING (chan); + + if (!tiled) + dst_offset += line_count * dst_pitch; + h -= line_count; + y += line_count; + } + + return TRUE; +} + + +struct nvc0_exa_state { + struct { + PictTransformPtr transform; + float width; + float height; + } unit[2]; + + Bool have_mask; +}; + +static struct nvc0_exa_state exa_state; + +#define NVC0EXA_LOCALS(p) \ + ScrnInfoPtr pScrn = xf86Screens[(p)->drawable.pScreen->myNum]; \ + NVPtr pNv = NVPTR(pScrn); \ + struct nouveau_channel *chan = pNv->chan; (void)chan; \ + struct nvc0_exa_state *state = &exa_state; (void)state + +#define BF(f) NVC0TCL_BLEND_FUNC_SRC_RGB_##f + +struct nvc0_blend_op { + unsigned src_alpha; + unsigned dst_alpha; + unsigned src_blend; + unsigned dst_blend; +}; + +static struct nvc0_blend_op +NVC0EXABlendOp[] = { +/* Clear */ { 0, 0, BF( ZERO), BF( ZERO) }, +/* Src */ { 0, 0, BF( ONE), BF( ZERO) }, +/* Dst */ { 0, 0, BF( ZERO), BF( ONE) }, +/* Over */ { 1, 0, BF( ONE), BF(ONE_MINUS_SRC_ALPHA) }, +/* OverReverse */ { 0, 1, BF(ONE_MINUS_DST_ALPHA), BF( ONE) }, +/* In */ { 0, 1, BF( DST_ALPHA), BF( ZERO) }, +/* InReverse */ { 1, 0, BF( ZERO), BF( SRC_ALPHA) }, +/* Out */ { 0, 1, BF(ONE_MINUS_DST_ALPHA), BF( ZERO) }, +/* OutReverse */ { 1, 0, BF( ZERO), BF(ONE_MINUS_SRC_ALPHA) }, +/* Atop */ { 1, 1, BF( DST_ALPHA), BF(ONE_MINUS_SRC_ALPHA) }, +/* AtopReverse */ { 1, 1, BF(ONE_MINUS_DST_ALPHA), BF( SRC_ALPHA) }, +/* Xor */ { 1, 1, BF(ONE_MINUS_DST_ALPHA), BF(ONE_MINUS_SRC_ALPHA) }, +/* Add */ { 0, 0, BF( ONE), BF( ONE) }, +}; + +static Bool +NVC0EXA2DSurfaceFormat(PixmapPtr ppix, uint32_t *fmt) +{ + NVC0EXA_LOCALS(ppix); + + switch (ppix->drawable.bitsPerPixel) { + case 8 : *fmt = NV50_2D_SRC_FORMAT_R8_UNORM; break; + case 15: *fmt = NV50_2D_SRC_FORMAT_X1R5G5B5_UNORM; break; + case 16: *fmt = NV50_2D_SRC_FORMAT_R5G6B5_UNORM; break; + case 24: *fmt = NV50_2D_SRC_FORMAT_X8R8G8B8_UNORM; break; + case 30: *fmt = NV50_2D_SRC_FORMAT_A2B10G10R10_UNORM; break; + case 32: *fmt = NV50_2D_SRC_FORMAT_A8R8G8B8_UNORM; break; + default: + NOUVEAU_FALLBACK("Unknown surface format for bpp=%d\n", + ppix->drawable.bitsPerPixel); + return FALSE; + } + + return TRUE; +} + +static void NVC0EXASetClip(PixmapPtr ppix, int x, int y, int w, int h) +{ + NVC0EXA_LOCALS(ppix); + + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXASetClip: %i.%i %ix%i\n", + x, y, w, h); + + BEGIN_RING(chan, NvSub2D, NV50_2D_CLIP_X, 4); + OUT_RING (chan, x); + OUT_RING (chan, y); + OUT_RING (chan, w); + OUT_RING (chan, h); +} + +static Bool +NVC0EXAAcquireSurface2D(PixmapPtr ppix, int is_src) +{ + NVC0EXA_LOCALS(ppix); + struct nouveau_bo *bo = nouveau_pixmap_bo(ppix); + int mthd = is_src ? NV50_2D_SRC_FORMAT : NV50_2D_DST_FORMAT; + uint32_t fmt, bo_flags; + + if (!NVC0EXA2DSurfaceFormat(ppix, &fmt)) + return FALSE; + + bo_flags = NOUVEAU_BO_VRAM; + bo_flags |= is_src ? NOUVEAU_BO_RD : NOUVEAU_BO_WR; + + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "EXAAcquSurf2D: is_src=%i, tile_flags=%x, size=%ix%i\n", + is_src, bo->tile_flags, + ppix->drawable.width, ppix->drawable.height); + + if (!nv50_style_tiled_pixmap(ppix)) { + BEGIN_RING(chan, NvSub2D, mthd, 2); + OUT_RING (chan, fmt); + OUT_RING (chan, 1); + BEGIN_RING(chan, NvSub2D, mthd + 0x14, 1); + OUT_RING (chan, (uint32_t)exaGetPixmapPitch(ppix)); + } else { + BEGIN_RING(chan, NvSub2D, mthd, 5); + OUT_RING (chan, fmt); + OUT_RING (chan, 0); + OUT_RING (chan, bo->tile_mode << 4); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + } + + BEGIN_RING(chan, NvSub2D, mthd + 0x18, 4); + OUT_RING (chan, ppix->drawable.width); + OUT_RING (chan, ppix->drawable.height); + if (OUT_RELOCh(chan, bo, 0, bo_flags) || + OUT_RELOCl(chan, bo, 0, bo_flags)) + return FALSE; + + if (is_src == 0) + NVC0EXASetClip(ppix, 0, 0, ppix->drawable.width, ppix->drawable.height); + + return TRUE; +} + +static void +NVC0EXASetPattern(PixmapPtr pdpix, int col0, int col1, int pat0, int pat1) +{ + NVC0EXA_LOCALS(pdpix); + + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "EXASetPattern: %i %i, %i %i\n", + col0, col1, pat0, pat1); + + BEGIN_RING(chan, NvSub2D, NV50_2D_PATTERN_COLOR(0), 4); + OUT_RING (chan, col0); + OUT_RING (chan, col1); + OUT_RING (chan, pat0); + OUT_RING (chan, pat1); +} + +static void +NVC0EXASetROP(PixmapPtr pdpix, int alu, Pixel planemask) +{ + NVC0EXA_LOCALS(pdpix); + int rop; + + if (planemask != ~0) + rop = NVROP[alu].copy_planemask; + else + rop = NVROP[alu].copy; + + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXASetROP\n"); + + BEGIN_RING(chan, NvSub2D, NV50_2D_OPERATION, 1); + if (alu == GXcopy && EXA_PM_IS_SOLID(&pdpix->drawable, planemask)) { + OUT_RING (chan, NV50_2D_OPERATION_SRCCOPY); + return; + } else { + OUT_RING (chan, NV50_2D_OPERATION_SRCCOPY_PREMULT); + } + + BEGIN_RING(chan, NvSub2D, NV50_2D_PATTERN_FORMAT, 2); + switch (pdpix->drawable.bitsPerPixel) { + case 8: OUT_RING (chan, 3); break; + case 15: OUT_RING (chan, 1); break; + case 16: OUT_RING (chan, 0); break; + case 24: + case 32: + default: + OUT_RING (chan, 2); + break; + } + OUT_RING (chan, 1); + + /* There are 16 ALUs. + * 0-15: copy + * 16-31: copy_planemask + */ + + if (!EXA_PM_IS_SOLID(&pdpix->drawable, planemask)) { + alu += 16; + NVC0EXASetPattern(pdpix, 0, planemask, ~0, ~0); + } else { + if (pNv->currentRop > 15) + NVC0EXASetPattern(pdpix, ~0, ~0, ~0, ~0); + } + + if (pNv->currentRop != alu) { + BEGIN_RING(chan, NvSub2D, NV50_2D_ROP, 1); + OUT_RING (chan, rop); + pNv->currentRop = alu; + } +} + +static void +NVC0EXAStateSolidResubmit(struct nouveau_channel *chan) +{ + ScrnInfoPtr pScrn = chan->user_private; + NVPtr pNv = NVPTR(pScrn); + + NVC0EXAPrepareSolid(pNv->pdpix, pNv->alu, pNv->planemask, + pNv->fg_colour); +} + +Bool +NVC0EXAPrepareSolid(PixmapPtr pdpix, int alu, Pixel planemask, Pixel fg) +{ + NVC0EXA_LOCALS(pdpix); + uint32_t fmt; + + if (!NVC0EXA2DSurfaceFormat(pdpix, &fmt)) + NOUVEAU_FALLBACK("rect format\n"); + + if (MARK_RING(chan, 64, 4)) + NOUVEAU_FALLBACK("ring space\n"); + + if (!NVC0EXAAcquireSurface2D(pdpix, 0)) { + MARK_UNDO(chan); + NOUVEAU_FALLBACK("dest pixmap\n"); + } + + NVC0EXASetROP(pdpix, alu, planemask); + + BEGIN_RING(chan, NvSub2D, NV50_2D_DRAW_SHAPE, 3); + OUT_RING (chan, NV50_2D_DRAW_SHAPE_RECTANGLES); + OUT_RING (chan, fmt); + OUT_RING (chan, fg); + + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXAPrepareSolid: fmt %x\n", fmt); + + pNv->pdpix = pdpix; + pNv->alu = alu; + pNv->planemask = planemask; + pNv->fg_colour = fg; + chan->flush_notify = NVC0EXAStateSolidResubmit; + return TRUE; +} + +void +NVC0EXASolid(PixmapPtr pdpix, int x1, int y1, int x2, int y2) +{ + NVC0EXA_LOCALS(pdpix); + + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXASolid: %i %i %i %i\n", x1, y1, x2, y2); + + WAIT_RING (chan, 5); + BEGIN_RING(chan, NvSub2D, NV50_2D_DRAW_POINT32_X(0), 4); + OUT_RING (chan, x1); + OUT_RING (chan, y1); + OUT_RING (chan, x2); + OUT_RING (chan, y2); + + if ((x2 - x1) * (y2 - y1) >= 512) + FIRE_RING (chan); +} + +void +NVC0EXADoneSolid(PixmapPtr pdpix) +{ + NVC0EXA_LOCALS(pdpix); + + chan->flush_notify = NULL; +} + +static void +NVC0EXAStateCopyResubmit(struct nouveau_channel *chan) +{ + ScrnInfoPtr pScrn = chan->user_private; + NVPtr pNv = NVPTR(pScrn); + + NVC0EXAPrepareCopy(pNv->pspix, pNv->pdpix, 0, 0, pNv->alu, + pNv->planemask); +} + +Bool +NVC0EXAPrepareCopy(PixmapPtr pspix, PixmapPtr pdpix, int dx, int dy, + int alu, Pixel planemask) +{ + NVC0EXA_LOCALS(pdpix); + + if (MARK_RING(chan, 64, 4)) + NOUVEAU_FALLBACK("ring space\n"); + + if (!NVC0EXAAcquireSurface2D(pspix, 1)) { + MARK_UNDO(chan); + NOUVEAU_FALLBACK("src pixmap\n"); + } + + if (!NVC0EXAAcquireSurface2D(pdpix, 0)) { + MARK_UNDO(chan); + NOUVEAU_FALLBACK("dest pixmap\n"); + } + + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXAPrepareCopy"); + + NVC0EXASetROP(pdpix, alu, planemask); + + pNv->pspix = pspix; + pNv->pdpix = pdpix; + pNv->alu = alu; + pNv->planemask = planemask; + chan->flush_notify = NVC0EXAStateCopyResubmit; + return TRUE; +} + +void +NVC0EXACopy(PixmapPtr pdpix, int srcX , int srcY, + int dstX , int dstY, + int width, int height) +{ + NVC0EXA_LOCALS(pdpix); + + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXACopy: %i.%i -> %i.%i, %ix%i\n", + srcX, srcY, dstX, dstY, width, height); + + WAIT_RING (chan, 17); + BEGIN_RING(chan, NvSub2D, NV50_2D_SERIALIZE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, NvSub2D, 0x088c, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, NvSub2D, NV50_2D_BLIT_DST_X, 12); + OUT_RING (chan, dstX); + OUT_RING (chan, dstY); + OUT_RING (chan, width); + OUT_RING (chan, height); + OUT_RING (chan, 0); /* DU,V_DX,Y_FRACT,INT */ + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); /* BLIT_SRC_X,Y_FRACT,INT */ + OUT_RING (chan, srcX); + OUT_RING (chan, 0); + OUT_RING (chan, srcY); + + if (width * height >= 512) + FIRE_RING (chan); +} + +void +NVC0EXADoneCopy(PixmapPtr pdpix) +{ + NVC0EXA_LOCALS(pdpix); + + chan->flush_notify = NULL; +} + +static void +NVC0EXAStateSIFCResubmit(struct nouveau_channel *chan) +{ + ScrnInfoPtr pScrn = chan->user_private; + NVPtr pNv = NVPTR(pScrn); + + if (MARK_RING(pNv->chan, 32, 2)) + return; + + if (NVC0EXAAcquireSurface2D(pNv->pdpix, 0)) + MARK_UNDO(pNv->chan); +} + +Bool +NVC0EXAUploadSIFC(const char *src, int src_pitch, + PixmapPtr pdpix, int x, int y, int w, int h, int cpp) +{ + NVC0EXA_LOCALS(pdpix); + int line_dwords = (w * cpp + 3) / 4; + uint32_t sifc_fmt; + + if (!NVC0EXA2DSurfaceFormat(pdpix, &sifc_fmt)) + NOUVEAU_FALLBACK("hostdata format\n"); + + if (MARK_RING(chan, 64, 2)) + return FALSE; + + if (!NVC0EXAAcquireSurface2D(pdpix, 0)) { + MARK_UNDO(chan); + NOUVEAU_FALLBACK("dest pixmap\n"); + } + + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXAUploadSIFC\n"); + + /* If the pitch isn't aligned to a dword you can + * get corruption at the end of a line. + */ + NVC0EXASetClip(pdpix, x, y, w, h); + + BEGIN_RING(chan, NvSub2D, NV50_2D_OPERATION, 1); + OUT_RING (chan, NV50_2D_OPERATION_SRCCOPY); + BEGIN_RING(chan, NvSub2D, NV50_2D_SIFC_BITMAP_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, sifc_fmt); + BEGIN_RING(chan, NvSub2D, NV50_2D_SIFC_WIDTH, 10); + OUT_RING (chan, (line_dwords * 4) / cpp); + OUT_RING (chan, h); + OUT_RING (chan, 0); /* SIFC_DX,Y_DU,V_FRACT,INT */ + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); /* SIFC_DST_X,Y_FRACT,INT */ + OUT_RING (chan, x); + OUT_RING (chan, 0); + OUT_RING (chan, y); + + pNv->pdpix = pdpix; + chan->flush_notify = NVC0EXAStateSIFCResubmit; + + while (h--) { + const char *ptr = src; + int count = line_dwords; + + while (count) { + int size = count > 1792 ? 1792 : count; + + WAIT_RING (chan, size + 1); + BEGIN_RING_NI(chan, NvSub2D, NV50_2D_SIFC_DATA, size); + OUT_RINGp (chan, ptr, size); + + ptr += size * 4; + count -= size; + } + + src += src_pitch; + } + + chan->flush_notify = NULL; + return TRUE; +} + +static Bool +NVC0EXACheckRenderTarget(PicturePtr ppict) +{ + if (ppict->pDrawable->width > 8192 || + ppict->pDrawable->height > 8192) + NOUVEAU_FALLBACK("render target dimensions exceeded %dx%d\n", + ppict->pDrawable->width, + ppict->pDrawable->height); + + switch (ppict->format) { + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + case PICT_r5g6b5: + case PICT_a8: + case PICT_x1r5g5b5: + case PICT_a1r5g5b5: + case PICT_x8b8g8r8: + case PICT_a2b10g10r10: + case PICT_x2b10g10r10: + case PICT_a2r10g10b10: + case PICT_x2r10g10b10: + break; + default: + NOUVEAU_FALLBACK("picture format 0x%08x\n", ppict->format); + } + + return TRUE; +} + +static Bool +NVC0EXARenderTarget(PixmapPtr ppix, PicturePtr ppict) +{ + NVC0EXA_LOCALS(ppix); + struct nouveau_bo *bo = nouveau_pixmap_bo(ppix); + unsigned format; + + /*XXX: Scanout buffer not tiled, someone needs to figure it out */ + if (!nv50_style_tiled_pixmap(ppix)) + NOUVEAU_FALLBACK("pixmap is scanout buffer\n"); + + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXARenderTarget\n"); + + switch (ppict->format) { + case PICT_a8r8g8b8: format = NVC0TCL_RT_FORMAT_A8R8G8B8_UNORM; break; + case PICT_x8r8g8b8: format = NVC0TCL_RT_FORMAT_X8R8G8B8_UNORM; break; + case PICT_r5g6b5: format = NVC0TCL_RT_FORMAT_R5G6B5_UNORM; break; + case PICT_a8: format = NVC0TCL_RT_FORMAT_A8_UNORM; break; + case PICT_x1r5g5b5: format = NVC0TCL_RT_FORMAT_X1R5G5B5_UNORM; break; + case PICT_a1r5g5b5: format = NVC0TCL_RT_FORMAT_A1R5G5B5_UNORM; break; + case PICT_x8b8g8r8: format = NVC0TCL_RT_FORMAT_X8B8G8R8_UNORM; break; + case PICT_a2b10g10r10: + case PICT_x2b10g10r10: + format = NVC0TCL_RT_FORMAT_A2B10G10R10_UNORM; + break; + case PICT_a2r10g10b10: + case PICT_x2r10g10b10: + format = NVC0TCL_RT_FORMAT_A2R10G10B10_UNORM; + break; + default: + NOUVEAU_FALLBACK("invalid picture format\n"); + } + + BEGIN_RING(chan, NvSub3D, NVC0TCL_RT_ADDRESS_HIGH(0), 8); + if (OUT_RELOCh(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR) || + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR)) + return FALSE; + OUT_RING (chan, ppix->drawable.width); + OUT_RING (chan, ppix->drawable.height); + OUT_RING (chan, format); + OUT_RING (chan, bo->tile_mode << 4); + OUT_RING (chan, 0x00000001); + OUT_RING (chan, 0x00000000); + + return TRUE; +} + +static Bool +NVC0EXACheckTexture(PicturePtr ppict, PicturePtr pdpict, int op) +{ + if (!ppict->pDrawable) + NOUVEAU_FALLBACK("Solid and gradient pictures unsupported.\n"); + + if (ppict->pDrawable->width > 8192 || + ppict->pDrawable->height > 8192) + NOUVEAU_FALLBACK("texture dimensions exceeded %dx%d\n", + ppict->pDrawable->width, + ppict->pDrawable->height); + + switch (ppict->format) { + case PICT_a8r8g8b8: + case PICT_a8b8g8r8: + case PICT_x8r8g8b8: + case PICT_x8b8g8r8: + case PICT_r5g6b5: + case PICT_a8: + case PICT_x1r5g5b5: + case PICT_x1b5g5r5: + case PICT_a1r5g5b5: + case PICT_a1b5g5r5: + case PICT_b5g6r5: + case PICT_b8g8r8a8: + case PICT_b8g8r8x8: + case PICT_a2b10g10r10: + case PICT_x2b10g10r10: + case PICT_x2r10g10b10: + case PICT_a2r10g10b10: + case PICT_x4r4g4b4: + case PICT_x4b4g4r4: + case PICT_a4r4g4b4: + case PICT_a4b4g4r4: + break; + default: + NOUVEAU_FALLBACK("picture format 0x%08x\n", ppict->format); + } + + switch (ppict->filter) { + case PictFilterNearest: + case PictFilterBilinear: + break; + default: + NOUVEAU_FALLBACK("picture filter %d\n", ppict->filter); + } + + /* OpenGL and Render disagree on what should be sampled outside an XRGB + * texture (with no repeating). Opengl has a hardcoded alpha value of + * 1.0, while render expects 0.0. We assume that clipping is done for + * untranformed sources. + */ + if (NVC0EXABlendOp[op].src_alpha && !ppict->repeat && + ppict->transform && (PICT_FORMAT_A(ppict->format) == 0) + && (PICT_FORMAT_A(pdpict->format) != 0)) + NOUVEAU_FALLBACK("REPEAT_NONE unsupported for XRGB source\n"); + + return TRUE; +} + +#define _(X1, X2, X3, X4, FMT) \ + (NV50TIC_0_0_TYPER_UNORM | NV50TIC_0_0_TYPEG_UNORM | \ + NV50TIC_0_0_TYPEB_UNORM | NV50TIC_0_0_TYPEA_UNORM | \ + NV50TIC_0_0_MAP##X1 | NV50TIC_0_0_MAP##X2 | \ + NV50TIC_0_0_MAP##X3 | NV50TIC_0_0_MAP##X4 | \ + NV50TIC_0_0_FMT_##FMT) + +static Bool +NVC0EXATexture(PixmapPtr ppix, PicturePtr ppict, unsigned unit) +{ + NVC0EXA_LOCALS(ppix); + struct nouveau_bo *bo = nouveau_pixmap_bo(ppix); + const unsigned tcb_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; + uint32_t mode; + + /* XXX: maybe add support for linear textures at some point */ + if (!nv50_style_tiled_pixmap(ppix)) + NOUVEAU_FALLBACK("pixmap is scanout buffer\n"); + + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXATexture\n"); + + BEGIN_RING(chan, NvSub3D, NVC0TCL_TIC_ADDRESS_HIGH, 3); + if (OUT_RELOCh(chan, pNv->tesla_scratch, TIC_OFFSET, tcb_flags) || + OUT_RELOCl(chan, pNv->tesla_scratch, TIC_OFFSET, tcb_flags)) + return FALSE; + OUT_RING (chan, 15); + + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_OUT_HIGH, 2); + if (OUT_RELOCh(chan, pNv->tesla_scratch, + TIC_OFFSET + unit * 32, tcb_flags) || + OUT_RELOCl(chan, pNv->tesla_scratch, + TIC_OFFSET + unit * 32, tcb_flags)) + return FALSE; + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_LINE_LENGTH_IN, 2); + OUT_RING (chan, 8 * 4); + OUT_RING (chan, 1); + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_EXEC, 1); + OUT_RING (chan, 0x100111); + BEGIN_RING_NI(chan, NvSubM2MF, NVC0_M2MF_DATA, 8); + + switch (ppict->format) { + case PICT_a8r8g8b8: + OUT_RING(chan, _(B_C0, G_C1, R_C2, A_C3, 8_8_8_8)); + break; + case PICT_a8b8g8r8: + OUT_RING(chan, _(R_C0, G_C1, B_C2, A_C3, 8_8_8_8)); + break; + case PICT_x8r8g8b8: + OUT_RING(chan, _(B_C0, G_C1, R_C2, A_ONE, 8_8_8_8)); + break; + case PICT_x8b8g8r8: + OUT_RING(chan, _(R_C0, G_C1, B_C2, A_ONE, 8_8_8_8)); + break; + case PICT_r5g6b5: + OUT_RING(chan, _(B_C0, G_C1, R_C2, A_ONE, 5_6_5)); + break; + case PICT_a8: + OUT_RING(chan, _(A_C0, B_ZERO, G_ZERO, R_ZERO, 8)); + break; + case PICT_x1r5g5b5: + OUT_RING(chan, _(B_C0, G_C1, R_C2, A_ONE, 1_5_5_5)); + break; + case PICT_x1b5g5r5: + OUT_RING(chan, _(R_C0, G_C1, B_C2, A_ONE, 1_5_5_5)); + break; + case PICT_a1r5g5b5: + OUT_RING(chan, _(B_C0, G_C1, R_C2, A_C3, 1_5_5_5)); + break; + case PICT_a1b5g5r5: + OUT_RING(chan, _(R_C0, G_C1, B_C2, A_C3, 1_5_5_5)); + break; + case PICT_b5g6r5: + OUT_RING(chan, _(R_C0, G_C1, B_C2, A_ONE, 5_6_5)); + break; + case PICT_b8g8r8x8: + OUT_RING(chan, _(A_ONE, R_C1, G_C2, B_C3, 8_8_8_8)); + break; + case PICT_b8g8r8a8: + OUT_RING(chan, _(A_C0, R_C1, G_C2, B_C3, 8_8_8_8)); + break; + case PICT_a2b10g10r10: + OUT_RING(chan, _(R_C0, G_C1, B_C2, A_C3, 2_10_10_10)); + break; + case PICT_x2b10g10r10: + OUT_RING(chan, _(R_C0, G_C1, B_C2, A_ONE, 2_10_10_10)); + break; + case PICT_x2r10g10b10: + OUT_RING(chan, _(B_C0, G_C1, R_C2, A_ONE, 2_10_10_10)); + break; + case PICT_a2r10g10b10: + OUT_RING(chan, _(B_C0, G_C1, R_C2, A_C3, 2_10_10_10)); + break; + case PICT_x4r4g4b4: + OUT_RING(chan, _(B_C0, G_C1, R_C2, A_ONE, 4_4_4_4)); + break; + case PICT_x4b4g4r4: + OUT_RING(chan, _(R_C0, G_C1, B_C2, A_ONE, 4_4_4_4)); + break; + case PICT_a4r4g4b4: + OUT_RING(chan, _(B_C0, G_C1, R_C2, A_C3, 4_4_4_4)); + break; + case PICT_a4b4g4r4: + OUT_RING(chan, _(R_C0, G_C1, B_C2, A_C3, 4_4_4_4)); + break; + default: + NOUVEAU_FALLBACK("invalid picture format, this SHOULD NOT HAPPEN. Expect trouble.\n"); + } +#undef _ + + mode = 0xd0005000 | (bo->tile_mode << 22); + if (OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD) || + OUT_RELOCd(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_HIGH | NOUVEAU_BO_OR, mode, mode)) + return FALSE; + OUT_RING (chan, 0x00300000); + OUT_RING (chan, (1 << 31) | ppix->drawable.width); + OUT_RING (chan, (1 << 16) | ppix->drawable.height); + OUT_RING (chan, 0x03000000); + OUT_RING (chan, 0x00000000); + + BEGIN_RING(chan, NvSub3D, NVC0TCL_TSC_ADDRESS_HIGH, 3); + if (OUT_RELOCh(chan, pNv->tesla_scratch, TSC_OFFSET, tcb_flags) || + OUT_RELOCl(chan, pNv->tesla_scratch, TSC_OFFSET, tcb_flags)) + return FALSE; + OUT_RING (chan, 0); + + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_OUT_HIGH, 2); + if (OUT_RELOCh(chan, pNv->tesla_scratch, + TSC_OFFSET + unit * 32, tcb_flags) || + OUT_RELOCl(chan, pNv->tesla_scratch, + TSC_OFFSET + unit * 32, tcb_flags)) + return FALSE; + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_LINE_LENGTH_IN, 2); + OUT_RING (chan, 8 * 4); + OUT_RING (chan, 1); + BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_EXEC, 1); + OUT_RING (chan, 0x100111); + BEGIN_RING_NI(chan, NvSubM2MF, NVC0_M2MF_DATA, 8); + + if (ppict->repeat) { + switch (ppict->repeatType) { + case RepeatPad: + OUT_RING (chan, 0x00024000 | + NV50TSC_1_0_WRAPS_CLAMP | + NV50TSC_1_0_WRAPT_CLAMP | + NV50TSC_1_0_WRAPR_CLAMP); + break; + case RepeatReflect: + OUT_RING (chan, 0x00024000 | + NV50TSC_1_0_WRAPS_MIRROR_REPEAT | + NV50TSC_1_0_WRAPT_MIRROR_REPEAT | + NV50TSC_1_0_WRAPR_MIRROR_REPEAT); + break; + case RepeatNormal: + default: + OUT_RING (chan, 0x00024000 | + NV50TSC_1_0_WRAPS_REPEAT | + NV50TSC_1_0_WRAPT_REPEAT | + NV50TSC_1_0_WRAPR_REPEAT); + break; + } + } else { + OUT_RING (chan, 0x00024000 | + NV50TSC_1_0_WRAPS_CLAMP_TO_BORDER | + NV50TSC_1_0_WRAPT_CLAMP_TO_BORDER | + NV50TSC_1_0_WRAPR_CLAMP_TO_BORDER); + } + if (ppict->filter == PictFilterBilinear) { + OUT_RING (chan, + NV50TSC_1_1_MAGF_LINEAR | + NV50TSC_1_1_MINF_LINEAR | NV50TSC_1_1_MIPF_NONE); + } else { + OUT_RING (chan, + NV50TSC_1_1_MAGF_NEAREST | + NV50TSC_1_1_MINF_NEAREST | NV50TSC_1_1_MIPF_NONE); + } + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x00000000); + OUT_RINGf (chan, 0.0f); + OUT_RINGf (chan, 0.0f); + OUT_RINGf (chan, 0.0f); + OUT_RINGf (chan, 0.0f); + + state->unit[unit].width = ppix->drawable.width; + state->unit[unit].height = ppix->drawable.height; + state->unit[unit].transform = ppict->transform; + return TRUE; +} + +static Bool +NVC0EXACheckBlend(int op) +{ + if (op > PictOpAdd) + NOUVEAU_FALLBACK("unsupported blend op %d\n", op); + return TRUE; +} + +static void +NVC0EXABlend(PixmapPtr ppix, PicturePtr ppict, int op, int component_alpha) +{ + NVC0EXA_LOCALS(ppix); + struct nvc0_blend_op *b = &NVC0EXABlendOp[op]; + unsigned sblend = b->src_blend; + unsigned dblend = b->dst_blend; + + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXABlend\n"); + + if (b->dst_alpha) { + if (!PICT_FORMAT_A(ppict->format)) { + if (sblend == BF(DST_ALPHA)) + sblend = BF(ONE); + else + if (sblend == BF(ONE_MINUS_DST_ALPHA)) + sblend = BF(ZERO); + } + } + + if (b->src_alpha && component_alpha) { + if (dblend == BF(SRC_ALPHA)) + dblend = BF(SRC_COLOR); + else + if (dblend == BF(ONE_MINUS_SRC_ALPHA)) + dblend = BF(ONE_MINUS_SRC_COLOR); + } + + if (sblend == BF(ONE) && dblend == BF(ZERO)) { + BEGIN_RING(chan, NvSub3D, NVC0TCL_BLEND_ENABLE(0), 1); + OUT_RING (chan, 0); + } else { + BEGIN_RING(chan, NvSub3D, NVC0TCL_BLEND_ENABLE(0), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, NvSub3D, NVC0TCL_BLEND_EQUATION_RGB, 5); + OUT_RING (chan, NVC0TCL_BLEND_EQUATION_RGB_FUNC_ADD); + OUT_RING (chan, sblend); + OUT_RING (chan, dblend); + OUT_RING (chan, NVC0TCL_BLEND_EQUATION_ALPHA_FUNC_ADD); + OUT_RING (chan, sblend); + BEGIN_RING(chan, NvSub3D, NVC0TCL_BLEND_FUNC_DST_ALPHA, 1); + OUT_RING (chan, dblend); + } +} + +Bool +NVC0EXACheckComposite(int op, + PicturePtr pspict, PicturePtr pmpict, PicturePtr pdpict) +{ + if (!NVC0EXACheckBlend(op)) + NOUVEAU_FALLBACK("blend not supported\n"); + + if (!NVC0EXACheckRenderTarget(pdpict)) + NOUVEAU_FALLBACK("render target invalid\n"); + + if (!NVC0EXACheckTexture(pspict, pdpict, op)) + NOUVEAU_FALLBACK("src picture invalid\n"); + + ErrorF("EXACheckComposite\n"); + + if (pmpict) { + if (pmpict->componentAlpha && + PICT_FORMAT_RGB(pmpict->format) && + NVC0EXABlendOp[op].src_alpha && + NVC0EXABlendOp[op].src_blend != BF(ZERO)) + NOUVEAU_FALLBACK("component-alpha not supported\n"); + + if (!NVC0EXACheckTexture(pmpict, pdpict, op)) + NOUVEAU_FALLBACK("mask picture invalid\n"); + } + + return TRUE; +} + +static void +NVC0EXAStateCompositeResubmit(struct nouveau_channel *chan) +{ + ScrnInfoPtr pScrn = chan->user_private; + NVPtr pNv = NVPTR(pScrn); + + NVC0EXAPrepareComposite(pNv->alu, pNv->pspict, pNv->pmpict, pNv->pdpict, + pNv->pspix, pNv->pmpix, pNv->pdpix); +} + +Bool +NVC0EXAPrepareComposite(int op, + PicturePtr pspict, PicturePtr pmpict, PicturePtr pdpict, + PixmapPtr pspix, PixmapPtr pmpix, PixmapPtr pdpix) +{ + NVC0EXA_LOCALS(pspix); + const unsigned shd_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; + + if (MARK_RING (chan, 128, 4 + 2 + 2 * 10)) + NOUVEAU_FALLBACK("ring space\n"); + + // fonts: !pmpict, op == 12 (Add, ONE/ONE) + /* + if (pmpict || op != 12) + NOUVEAU_FALLBACK("comp-alpha"); + */ + + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXAPrepareComposite\n"); + + BEGIN_RING(chan, NvSub2D, NV50_2D_SERIALIZE, 1); + OUT_RING (chan, 0); + + if (!NVC0EXARenderTarget(pdpix, pdpict)) { + MARK_UNDO(chan); + NOUVEAU_FALLBACK("render target invalid\n"); + } + + NVC0EXABlend(pdpix, pdpict, op, pmpict && pmpict->componentAlpha && + PICT_FORMAT_RGB(pmpict->format)); + + BEGIN_RING(chan, NvSub3D, NVC0TCL_CODE_ADDRESS_HIGH, 2); + if (OUT_RELOCh(chan, pNv->tesla_scratch, CODE_OFFSET, shd_flags) || + OUT_RELOCl(chan, pNv->tesla_scratch, CODE_OFFSET, shd_flags)) { + MARK_UNDO(chan); + return FALSE; + } + + if (!NVC0EXATexture(pspix, pspict, 0)) { + MARK_UNDO(chan); + NOUVEAU_FALLBACK("src picture invalid\n"); + } + BEGIN_RING(chan, NvSub3D, NVC0TCL_BIND_TIC(4), 1); + OUT_RING (chan, (0 << 9) | (0 << 1) | NVC0TCL_BIND_TIC_ACTIVE); + + if (pmpict) { + if (!NVC0EXATexture(pmpix, pmpict, 1)) { + MARK_UNDO(chan); + NOUVEAU_FALLBACK("mask picture invalid\n"); + } + state->have_mask = TRUE; + + BEGIN_RING(chan, NvSub3D, NVC0TCL_BIND_TIC(4), 1); + OUT_RING (chan, (1 << 9) | (1 << 1) | NVC0TCL_BIND_TIC_ACTIVE); + + BEGIN_RING(chan, NvSub3D, NVC0TCL_SP_START_ID(5), 1); + if (pdpict->format == PICT_a8) { + OUT_RING (chan, PFP_C_A8); + } else { + if (pmpict->componentAlpha && + PICT_FORMAT_RGB(pmpict->format)) { + if (NVC0EXABlendOp[op].src_alpha) + OUT_RING (chan, PFP_CCASA); + else + OUT_RING (chan, PFP_CCA); + } else { + OUT_RING (chan, PFP_C); + } + } + } else { + state->have_mask = FALSE; + + BEGIN_RING(chan, NvSub3D, NVC0TCL_BIND_TIC(4), 1); + OUT_RING (chan, (1 << 1) | 0); + + BEGIN_RING(chan, NvSub3D, NVC0TCL_SP_START_ID(5), 1); + if (pdpict->format == PICT_a8) + OUT_RING (chan, PFP_S_A8); + else + OUT_RING (chan, PFP_S); + } + + BEGIN_RING(chan, NvSub3D, NVC0TCL_TSC_FLUSH, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, NvSub3D, NVC0TCL_TIC_FLUSH, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, NvSub3D, NVC0TCL_TEX_CACHE_CTL, 1); + OUT_RING (chan, 0); + + pNv->alu = op; + pNv->pspict = pspict; + pNv->pmpict = pmpict; + pNv->pdpict = pdpict; + pNv->pspix = pspix; + pNv->pmpix = pmpix; + pNv->pdpix = pdpix; + chan->flush_notify = NVC0EXAStateCompositeResubmit; + return TRUE; +} + +#define xFixedToFloat(v) \ + ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0)) + +static inline void +NVC0EXATransform(PictTransformPtr t, int x, int y, float sx, float sy, + float *x_ret, float *y_ret) +{ + if (t) { + PictVector v; + + v.vector[0] = IntToxFixed(x); + v.vector[1] = IntToxFixed(y); + v.vector[2] = xFixed1; + PictureTransformPoint(t, &v); + *x_ret = xFixedToFloat(v.vector[0]) / sx; + *y_ret = xFixedToFloat(v.vector[1]) / sy; + } else { + *x_ret = (float)x / sx; + *y_ret = (float)y / sy; + } +} + +void +NVC0EXAComposite(PixmapPtr pdpix, + int sx, int sy, int mx, int my, + int dx, int dy, int w, int h) +{ + NVC0EXA_LOCALS(pdpix); + float sX0, sX1, sX2, sY0, sY1, sY2; + + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXAComposite\n"); + + WAIT_RING (chan, 64); + BEGIN_RING(chan, NvSub3D, NVC0TCL_SCISSOR_HORIZ(0), 2); + OUT_RING (chan, ((dx + w) << 16) | dx); + OUT_RING (chan, ((dy + h) << 16) | dy); + BEGIN_RING(chan, NvSub3D, NVC0TCL_VERTEX_BEGIN, 1); + OUT_RING (chan, NVC0TCL_VERTEX_BEGIN_MODE_TRIANGLES); + + NVC0EXATransform(state->unit[0].transform, sx, sy + (h * 2), + state->unit[0].width, state->unit[0].height, + &sX0, &sY0); + NVC0EXATransform(state->unit[0].transform, sx, sy, + state->unit[0].width, state->unit[0].height, + &sX1, &sY1); + NVC0EXATransform(state->unit[0].transform, sx + (w * 2), sy, + state->unit[0].width, state->unit[0].height, + &sX2, &sY2); + + if (state->have_mask) { + float mX0, mX1, mX2, mY0, mY1, mY2; + + NVC0EXATransform(state->unit[1].transform, mx, my + (h * 2), + state->unit[1].width, state->unit[1].height, + &mX0, &mY0); + NVC0EXATransform(state->unit[1].transform, mx, my, + state->unit[1].width, state->unit[1].height, + &mX1, &mY1); + NVC0EXATransform(state->unit[1].transform, mx + (w * 2), my, + state->unit[1].width, state->unit[1].height, + &mX2, &mY2); + + VTX2s(pNv, sX0, sY0, mX0, mY0, dx, dy + (h * 2)); + VTX2s(pNv, sX1, sY1, mX1, mY1, dx, dy); + VTX2s(pNv, sX2, sY2, mX2, mY2, dx + (w * 2), dy); + } else { + VTX1s(pNv, sX0, sY0, dx, dy + (h * 2)); + VTX1s(pNv, sX1, sY1, dx, dy); + VTX1s(pNv, sX2, sY2, dx + (w * 2), dy); + } + + BEGIN_RING(chan, NvSub3D, NVC0TCL_VERTEX_END, 1); + OUT_RING (chan, 0); +} + +void +NVC0EXADoneComposite(PixmapPtr pdpix) +{ + NVC0EXA_LOCALS(pdpix); + + chan->flush_notify = NULL; +} + |