From 98409fc88e1e12c2de73223738a0b974a9c1cae3 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 8 Dec 2011 22:25:02 +1000 Subject: nva3: initial support for using the copy engine for copies Disabled by default, xorg.conf option to enable. This is for testing only, and will probably not be used for normal EXA copy ever. The code will be useful now to test that PCOPY works, and later on for the NVIDIA->Intel framebuffer copies. Signed-off-by: Dave Airlie --- src/nouveau_local.h | 5 +++ src/nv50_exa.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/nv_accel_common.c | 34 +++++++++++++++ src/nv_const.h | 2 + src/nv_dma.c | 13 ++++++ src/nv_dma.h | 1 + src/nv_driver.c | 3 ++ src/nv_type.h | 4 ++ 8 files changed, 175 insertions(+) diff --git a/src/nouveau_local.h b/src/nouveau_local.h index cc2fe34..31f06e7 100644 --- a/src/nouveau_local.h +++ b/src/nouveau_local.h @@ -171,4 +171,9 @@ BEGIN_1IC0(struct nouveau_channel *chan, int subc, int mthd, int size) #define NV50_GRAPH(subc, mthd) SUBC_##subc((NV50_GRAPH_##mthd)) #define NVC0_GRAPH(subc, mthd) SUBC_##subc((NVC0_GRAPH_##mthd)) +/* CE channel (nva3-) */ +#define SUBC_M2MF(mthd) 0, (mthd) +#define SUBC_NVSW(mthd) 1, (mthd) +#define SUBC_COPY(mthd) 6, (mthd) + #endif diff --git a/src/nv50_exa.c b/src/nv50_exa.c index 6529e74..164a4fc 100644 --- a/src/nv50_exa.c +++ b/src/nv50_exa.c @@ -273,6 +273,107 @@ NV50EXADoneSolid(PixmapPtr pdpix) chan->flush_notify = NULL; } +static Bool +NV50EXAPrepareCopyAsync(PixmapPtr pspix, PixmapPtr pdpix, + int dx, int dy, int alu, Pixel planemask) +{ + ScrnInfoPtr pScrn = xf86Screens[pspix->drawable.pScreen->myNum]; + NVPtr pNv = NVPTR(pScrn); + struct nouveau_bo *src = nouveau_pixmap_bo(pspix); + struct nouveau_bo *dst = nouveau_pixmap_bo(pdpix); + struct nouveau_grobj *copy0 = pNv->NvCopy0; + struct nouveau_channel *chan = copy0->channel; + unsigned cpp = pspix->drawable.bitsPerPixel / 8; + + planemask |= ~0 << pspix->drawable.bitsPerPixel; + if (alu != GXcopy || planemask != ~0) + NOUVEAU_FALLBACK("not solid copy\n"); + + /*XXX: work around libdrm sucking, will be fixed in rework */ + FIRE_RING(pNv->chan); + + if (src->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK) { + BEGIN_NV04(chan, SUBC_COPY(0x0200), 5); + OUT_RING (chan, src->tile_mode << 4); + OUT_RING (chan, pspix->drawable.width * cpp); + OUT_RING (chan, pspix->drawable.height); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + } else { + BEGIN_NV04(chan, SUBC_COPY(0x031c), 1); + OUT_RING (chan, exaGetPixmapPitch(pspix)); + } + + if (dst->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK) { + BEGIN_NV04(chan, SUBC_COPY(0x0220), 5); + OUT_RING (chan, dst->tile_mode << 4); + OUT_RING (chan, pdpix->drawable.width * cpp); + OUT_RING (chan, pdpix->drawable.height); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + } else { + BEGIN_NV04(chan, SUBC_COPY(0x0320), 1); + OUT_RING (chan, exaGetPixmapPitch(pdpix)); + } + + pNv->copy_async = TRUE; + pNv->pspix = pspix; + pNv->pdpix = pdpix; + return TRUE; +} + +static void +NV50EXACopyAsync(PixmapPtr pdpix, int srcX , int srcY, + int dstX , int dstY, int width, int height) +{ + ScrnInfoPtr pScrn = xf86Screens[pdpix->drawable.pScreen->myNum]; + NVPtr pNv = NVPTR(pScrn); + PixmapPtr pspix = pNv->pspix; + struct nouveau_bo *src = nouveau_pixmap_bo(pspix); + struct nouveau_bo *dst = nouveau_pixmap_bo(pdpix); + struct nouveau_grobj *copy0 = pNv->NvCopy0; + struct nouveau_channel *chan = copy0->channel; + unsigned cpp = pspix->drawable.bitsPerPixel / 8; + uint32_t src_dom = NOUVEAU_BO_VRAM; + uint32_t dst_dom = NOUVEAU_BO_VRAM; + uint32_t src_off = 0, dst_off = 0; + uint32_t exec = 0; + + if (!MARK_RING(chan, 64, 4)) { + if (src->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK) { + BEGIN_NV04(chan, SUBC_COPY(0x0214), 2); + OUT_RING (chan, srcX * cpp); + OUT_RING (chan, srcY); + } else { + src_off = srcY * exaGetPixmapPitch(pspix) + srcX * cpp; + exec |= 0x00000010; + } + + if (dst->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK) { + BEGIN_NV04(chan, SUBC_COPY(0x0234), 2); + OUT_RING (chan, dstX * cpp); + OUT_RING (chan, dstY); + } else { + dst_off = dstY * exaGetPixmapPitch(pdpix) + dstX * cpp; + exec |= 0x00000100; + } + + BEGIN_NV04(chan, SUBC_COPY(0x030c), 4); + OUT_RELOCh(chan, src, src_off, src_dom | NOUVEAU_BO_RD); + OUT_RELOCl(chan, src, src_off, src_dom | NOUVEAU_BO_RD); + OUT_RELOCh(chan, dst, dst_off, dst_dom | NOUVEAU_BO_WR); + OUT_RELOCl(chan, dst, dst_off, dst_dom | NOUVEAU_BO_WR); + BEGIN_NV04(chan, SUBC_COPY(0x0324), 2); + OUT_RING (chan, width * cpp); + OUT_RING (chan, height); + BEGIN_NV04(chan, SUBC_COPY(0x0300), 1); + OUT_RING (chan, exec); + + /*XXX: work around libdrm sucking, will be fixed in rework */ + FIRE_RING (chan); + } +} + static void NV50EXAStateCopyResubmit(struct nouveau_channel *chan) { @@ -289,6 +390,12 @@ NV50EXAPrepareCopy(PixmapPtr pspix, PixmapPtr pdpix, int dx, int dy, { NV50EXA_LOCALS(pdpix); + if (pNv->NvCopy0) { + if (NV50EXAPrepareCopyAsync(pspix, pdpix, dx, dy, + alu, planemask)) + return TRUE; + } + if (MARK_RING(chan, 64, 4)) NOUVEAU_FALLBACK("ring space\n"); @@ -304,6 +411,7 @@ NV50EXAPrepareCopy(PixmapPtr pspix, PixmapPtr pdpix, int dx, int dy, NV50EXASetROP(pdpix, alu, planemask); + pNv->copy_async = FALSE; pNv->pspix = pspix; pNv->pdpix = pdpix; pNv->alu = alu; @@ -319,6 +427,11 @@ NV50EXACopy(PixmapPtr pdpix, int srcX , int srcY, { NV50EXA_LOCALS(pdpix); + if (pNv->copy_async) { + NV50EXACopyAsync(pdpix, srcX, srcY, dstX, dstY, width, height); + return; + } + WAIT_RING (chan, 17); BEGIN_NV04(chan, SUBC_2D(0x0110), 1); OUT_RING (chan, 0); diff --git a/src/nv_accel_common.c b/src/nv_accel_common.c index 3219dbe..80b6214 100644 --- a/src/nv_accel_common.c +++ b/src/nv_accel_common.c @@ -604,6 +604,35 @@ NVAccelInit2D_NV50(ScrnInfoPtr pScrn) return TRUE; } +static Bool +NVAccelInitCOPY0(ScrnInfoPtr pScrn) +{ + NVPtr pNv = NVPTR(pScrn); + struct nouveau_channel *chan = pNv->ce_chan ? pNv->ce_chan : pNv->chan; + uint32_t class; + + if (pNv->Architecture == NV_ARCH_50) + class = 0x85b5; + else + class = 0x90b5; + + if (!pNv->NvCopy0) { + if (nouveau_grobj_alloc(chan, NvCopy0, class, &pNv->NvCopy0)) + return FALSE; + } + + if (pNv->Architecture == NV_ARCH_50) { + BEGIN_NV04(chan, NV01_SUBC(COPY, OBJECT), 1); + OUT_RING (chan, pNv->NvCopy0->handle); + BEGIN_NV04(chan, SUBC_COPY(0x0180), 3); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->vram->handle); + } + + return TRUE; +} + #define INIT_CONTEXT_OBJECT(name) do { \ ret = NVAccelInit##name(pScrn); \ if (!ret) { \ @@ -673,6 +702,10 @@ NVAccelCommonInit(ScrnInfoPtr pScrn) break; } + /* Copy Engine */ + if (pNv->dev->chipset >= 0xa3) + INIT_CONTEXT_OBJECT(COPY0); + return TRUE; } @@ -700,6 +733,7 @@ void NVAccelFree(ScrnInfoPtr pScrn) nouveau_grobj_free(&pNv->NvMemFormat); nouveau_grobj_free(&pNv->NvSW); nouveau_grobj_free(&pNv->Nv3D); + nouveau_grobj_free(&pNv->NvCopy0); nouveau_bo_ref(NULL, &pNv->tesla_scratch); nouveau_bo_ref(NULL, &pNv->shader_mem); diff --git a/src/nv_const.h b/src/nv_const.h index a27a951..9414b75 100644 --- a/src/nv_const.h +++ b/src/nv_const.h @@ -15,6 +15,7 @@ typedef enum { OPTION_GLX_VBLANK, OPTION_ZAPHOD_HEADS, OPTION_PAGE_FLIP, + OPTION_ASYNC_COPY, } NVOpts; @@ -28,6 +29,7 @@ static const OptionInfoRec NVOptions[] = { { OPTION_GLX_VBLANK, "GLXVBlank", OPTV_BOOLEAN, {0}, FALSE }, { OPTION_ZAPHOD_HEADS, "ZaphodHeads", OPTV_STRING, {0}, FALSE }, { OPTION_PAGE_FLIP, "PageFlip", OPTV_BOOLEAN, {0}, FALSE }, + { OPTION_ASYNC_COPY, "AsyncCopy", OPTV_BOOLEAN, {0}, FALSE }, { -1, NULL, OPTV_NONE, {0}, FALSE } }; diff --git a/src/nv_dma.c b/src/nv_dma.c index 49ed40a..f3b3807 100644 --- a/src/nv_dma.c +++ b/src/nv_dma.c @@ -64,6 +64,19 @@ NVInitDma(ScrnInfoPtr pScrn) xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Opened GPU channel %d\n", pNv->chan->id); + if (pNv->async_copy_enabled) { + ret = nouveau_channel_alloc(pNv->dev, NvDmaFB, NvDmaTT, 24*1024, + &pNv->ce_chan); + if (ret) { + xf86DrvMsg(pScrn->scrnIndex, X_WARNING, + "Error creating CE GPU channel %d\n", ret); + } else { + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "Opened CE GPU channel %d\n", + pNv->ce_chan->id); + } + } + return TRUE; } diff --git a/src/nv_dma.h b/src/nv_dma.h index d21ff39..7e3da38 100644 --- a/src/nv_dma.h +++ b/src/nv_dma.h @@ -20,6 +20,7 @@ enum DMAObjects { NvContextBeta4 = 0x8000001C, Nv2D = 0x80000020, NvSW = 0x80000021, + NvCopy0 = 0x80000022, NvDmaFB = 0xD8000001, NvDmaTT = 0xD8000002, NvDmaNotifier0 = 0xD8000003, diff --git a/src/nv_driver.c b/src/nv_driver.c index 87ef2c4..d41d34d 100644 --- a/src/nv_driver.c +++ b/src/nv_driver.c @@ -805,6 +805,9 @@ NVPreInit(ScrnInfoPtr pScrn, int flags) if (pNv->Architecture >= NV_ARCH_50) pNv->wfb_enabled = xf86ReturnOptValBool( pNv->Options, OPTION_WFB, FALSE); + if (pNv->dev->chipset >= 0xa3) + pNv->async_copy_enabled = xf86ReturnOptValBool( + pNv->Options, OPTION_ASYNC_COPY, FALSE); pNv->tiled_scanout = TRUE; } diff --git a/src/nv_type.h b/src/nv_type.h index 4204556..0362bb0 100644 --- a/src/nv_type.h +++ b/src/nv_type.h @@ -53,6 +53,7 @@ typedef struct _NVRec { Bool tiled_scanout; Bool glx_vblank; Bool has_pageflip; + Bool async_copy_enabled; ScreenBlockHandlerProcPtr BlockHandler; CreateScreenResourcesProcPtr CreateScreenResources; CloseScreenProcPtr CloseScreen; @@ -79,6 +80,7 @@ typedef struct _NVRec { /* GPU context */ struct nouveau_channel *chan; + struct nouveau_channel *ce_chan; struct nouveau_notifier *notify0; struct nouveau_notifier *vblank_sem; struct nouveau_grobj *NvContextSurfaces; @@ -95,6 +97,7 @@ typedef struct _NVRec { struct nouveau_grobj *Nv2D; struct nouveau_grobj *Nv3D; struct nouveau_grobj *NvSW; + struct nouveau_grobj *NvCopy0; struct nouveau_bo *tesla_scratch; struct nouveau_bo *shader_mem; struct nouveau_bo *xv_filtertable_mem; @@ -108,6 +111,7 @@ typedef struct _NVRec { unsigned point_x, point_y; unsigned width_in, width_out; unsigned height_in, height_out; + Bool copy_async; } NVRec; #define NVPTR(p) ((NVPtr)((p)->driverPrivate)) -- cgit v1.2.3