diff options
author | Ben Skeggs <bskeggs@redhat.com> | 2009-02-20 15:26:24 +1000 |
---|---|---|
committer | Ben Skeggs <bskeggs@redhat.com> | 2009-02-20 15:26:24 +1000 |
commit | 8616e8499bc16ebd8e49efe022929d4519bdc44b (patch) | |
tree | 6eafa8357f4a6b2b44f6b5e3dc122a684243660b | |
parent | 3c649c9329eeb0df557d0b7675c5dfb6969bf716 (diff) |
nv50: rework vm handling so we can support >512MiB VRAM
-rw-r--r-- | linux-core/nouveau_bo.c | 17 | ||||
-rw-r--r-- | linux-core/nouveau_gem.c | 37 | ||||
-rw-r--r-- | shared-core/nouveau_drv.h | 10 | ||||
-rw-r--r-- | shared-core/nouveau_mem.c | 251 | ||||
-rw-r--r-- | shared-core/nouveau_object.c | 53 | ||||
-rw-r--r-- | shared-core/nouveau_state.c | 2 |
6 files changed, 234 insertions, 136 deletions
diff --git a/linux-core/nouveau_bo.c b/linux-core/nouveau_bo.c index e084a4d3..1d4d1be4 100644 --- a/linux-core/nouveau_bo.c +++ b/linux-core/nouveau_bo.c @@ -296,6 +296,7 @@ nouveau_bo_move(struct drm_buffer_object *bo, int evict, int no_wait, { struct drm_nouveau_private *dev_priv = bo->dev->dev_private; struct drm_bo_mem_reg *old_mem = &bo->mem; + int ret; if (dev_priv->init_state != NOUVEAU_CARD_INIT_DONE) return drm_bo_move_memcpy(bo, evict, no_wait, new_mem); @@ -304,9 +305,7 @@ nouveau_bo_move(struct drm_buffer_object *bo, int evict, int no_wait, (new_mem->mem_type == DRM_BO_MEM_VRAM || new_mem->mem_type == DRM_BO_MEM_PRIV0) && !(new_mem->proposed_flags & DRM_NOUVEAU_BO_FLAG_NOVM)) { - struct nouveau_gpuobj *pt = dev_priv->vm_vram_pt; - unsigned offset = new_mem->mm_node->start << PAGE_SHIFT; - unsigned count = new_mem->size / 65536; + uint64_t offset = new_mem->mm_node->start << PAGE_SHIFT; unsigned tile = 0; if (new_mem->proposed_flags & DRM_NOUVEAU_BO_FLAG_TILE) { @@ -316,13 +315,11 @@ nouveau_bo_move(struct drm_buffer_object *bo, int evict, int no_wait, tile = 0x00007000; } - while (count--) { - unsigned pte = offset / 65536; - - INSTANCE_WR(pt, (pte * 2) + 0, offset | 1); - INSTANCE_WR(pt, (pte * 2) + 1, 0x00000000 | tile); - offset += 65536; - } + ret = nv50_mem_vm_bind_linear(bo->dev, + offset + dev_priv->vm_vram_base, + new_mem->size, tile, offset); + if (ret) + return ret; } if (old_mem->flags & DRM_BO_FLAG_CLEAN) { diff --git a/linux-core/nouveau_gem.c b/linux-core/nouveau_gem.c index bb599990..d290f26e 100644 --- a/linux-core/nouveau_gem.c +++ b/linux-core/nouveau_gem.c @@ -662,12 +662,14 @@ nouveau_gem_ioctl_cpu_fini(struct drm_device *dev, void *data, int nouveau_gem_ioctl_tile(struct drm_device *dev, void *data, - struct drm_file *file_priv) + struct drm_file *file_priv) { struct drm_nouveau_private *dev_priv = dev->dev_private; struct drm_nouveau_gem_tile *req = data; struct nouveau_gem_object *ngem; struct drm_gem_object *gem; + unsigned offset, tile = 0; + int ret; NOUVEAU_CHECK_INITIALISED_WITH_RETURN; NOUVEAU_CHECK_MM_ENABLED_WITH_RETURN; @@ -677,30 +679,21 @@ nouveau_gem_ioctl_tile(struct drm_device *dev, void *data, return -EINVAL; ngem = gem->driver_private; - { - struct nouveau_gpuobj *pt = dev_priv->vm_vram_pt; - unsigned offset = ngem->bo->offset + req->delta; - unsigned count = req->size / 65536; - unsigned tile = 0; - - offset -= dev_priv->vm_vram_base; - - if (req->flags & NOUVEAU_MEM_TILE) { - if (req->flags & NOUVEAU_MEM_TILE_ZETA) - tile = 0x00002800; - else - tile = 0x00007000; - } - - while (count--) { - unsigned pte = offset / 65536; + offset = ngem->bo->offset + req->delta; + offset -= dev_priv->vm_vram_base; - INSTANCE_WR(pt, (pte * 2) + 0, offset | 1); - INSTANCE_WR(pt, (pte * 2) + 1, 0x00000000 | tile); - offset += 65536; - } + if (req->flags & NOUVEAU_MEM_TILE) { + if (req->flags & NOUVEAU_MEM_TILE_ZETA) + tile = 0x00002800; + else + tile = 0x00007000; } + ret = nv50_mem_vm_bind_linear(dev, ngem->bo->offset + req->delta, + req->size, tile, offset); + if (ret) + return ret; + mutex_lock(&dev->struct_mutex); drm_gem_object_unreference(gem); mutex_unlock(&dev->struct_mutex); diff --git a/shared-core/nouveau_drv.h b/shared-core/nouveau_drv.h index 39a59320..7a4cf582 100644 --- a/shared-core/nouveau_drv.h +++ b/shared-core/nouveau_drv.h @@ -159,7 +159,7 @@ struct nouveau_channel /* NV50 VM */ struct nouveau_gpuobj *vm_pd; struct nouveau_gpuobj_ref *vm_gart_pt; - struct nouveau_gpuobj_ref *vm_vram_pt; + struct nouveau_gpuobj_ref **vm_vram_pt; /* Objects */ struct nouveau_gpuobj_ref *ramin; /* Private instmem */ @@ -320,7 +320,8 @@ struct drm_nouveau_private { uint64_t vm_vram_base; uint64_t vm_vram_size; uint64_t vm_end; - struct nouveau_gpuobj *vm_vram_pt; + struct nouveau_gpuobj **vm_vram_pt; + int vm_vram_pt_nr; /* the mtrr covering the FB */ int fb_mtrr; @@ -441,6 +442,11 @@ extern void nouveau_mem_free(struct drm_device *dev, struct mem_block*); extern int nouveau_mem_init(struct drm_device *); extern int nouveau_mem_init_ttm(struct drm_device *); extern void nouveau_mem_close(struct drm_device *); +extern int nv50_mem_vm_bind_linear(struct drm_device *, uint64_t virt, + uint32_t size, uint32_t flags, + uint64_t phys); +extern void nv50_mem_vm_unbind(struct drm_device *, uint64_t virt, + uint32_t size); /* nouveau_notifier.c */ extern int nouveau_notifier_init_channel(struct nouveau_channel *); diff --git a/shared-core/nouveau_mem.c b/shared-core/nouveau_mem.c index 43eca753..5c31224b 100644 --- a/shared-core/nouveau_mem.c +++ b/shared-core/nouveau_mem.c @@ -202,6 +202,144 @@ void nouveau_mem_release(struct drm_file *file_priv, struct mem_block *heap) } /* + * NV50 VM helpers + */ +#define VMBLOCK (512*1024*1024) +static int +nv50_mem_vm_preinit(struct drm_device *dev) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + + dev_priv->vm_gart_base = roundup(0, VMBLOCK); + dev_priv->vm_gart_size = VMBLOCK; + + dev_priv->vm_vram_base = dev_priv->vm_gart_base + dev_priv->vm_gart_size; + dev_priv->vm_vram_size = roundup(nouveau_mem_fb_amount(dev), VMBLOCK); + dev_priv->vm_end = dev_priv->vm_vram_base + dev_priv->vm_vram_size; + + DRM_DEBUG("NV50VM: GART 0x%016llx-0x%016llx\n", + dev_priv->vm_gart_base, + dev_priv->vm_gart_base + dev_priv->vm_gart_size - 1); + DRM_DEBUG("NV50VM: VRAM 0x%016llx-0x%016llx\n", + dev_priv->vm_vram_base, + dev_priv->vm_vram_base + dev_priv->vm_vram_size - 1); + return 0; +} + +static void +nv50_mem_vm_takedown(struct drm_device *dev) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + int i; + + if (!dev_priv->vm_vram_pt) + return; + + for (i = 0; i < dev_priv->vm_vram_pt_nr; i++) { + if (!dev_priv->vm_vram_pt[i]) + break; + + nouveau_gpuobj_del(dev, &dev_priv->vm_vram_pt[i]); + } + + drm_free(dev_priv->vm_vram_pt, + dev_priv->vm_vram_pt_nr * sizeof(struct nouveau_gpuobj *), + DRM_MEM_DRIVER); + dev_priv->vm_vram_pt = NULL; + dev_priv->vm_vram_pt_nr = 0; +} + +static int +nv50_mem_vm_init(struct drm_device *dev) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + const int nr = dev_priv->vm_vram_size / VMBLOCK; + int i, ret; + + dev_priv->vm_vram_pt_nr = nr; + dev_priv->vm_vram_pt = drm_calloc(nr, sizeof(struct nouveau_gpuobj *), + DRM_MEM_DRIVER); + if (!dev_priv->vm_vram_pt) + return -ENOMEM; + + for (i = 0; i < nr; i++) { + ret = nouveau_gpuobj_new(dev, NULL, VMBLOCK/65536*8, 0, + NVOBJ_FLAG_ZERO_ALLOC | + NVOBJ_FLAG_ALLOW_NO_REFS, + &dev_priv->vm_vram_pt[i]); + if (ret) { + DRM_ERROR("Error creating VRAM page tables: %d\n", ret); + return ret; + } + } + + return 0; +} + +int +nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size, + uint32_t flags, uint64_t phys) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_gpuobj **pgt; + unsigned psz, pfl; + + if (virt >= dev_priv->vm_gart_base && + (virt + size) < (dev_priv->vm_gart_base + dev_priv->vm_gart_size)) { + psz = 4096; + pgt = &dev_priv->gart_info.sg_ctxdma; + pfl = 0x21; + virt -= dev_priv->vm_gart_base; + } else + if (virt >= dev_priv->vm_vram_base && + (virt + size) < (dev_priv->vm_vram_base + dev_priv->vm_vram_size)) { + psz = 65536; + pgt = dev_priv->vm_vram_pt; + pfl = 0x01; + virt -= dev_priv->vm_vram_base; + } else { + DRM_ERROR("Invalid address: 0x%16llx-0x%16llx\n", + virt, virt + size - 1); + return -EINVAL; + } + + size &= ~(psz - 1); + + if (flags & 0x80000000) { + while (size) { + struct nouveau_gpuobj *pt = pgt[virt / (512*1024*1024)]; + int pte = ((virt % (512*1024*1024)) / psz) * 2; + + INSTANCE_WR(pt, pte++, 0x00000000); + INSTANCE_WR(pt, pte++, 0x00000000); + + size -= psz; + virt += psz; + } + } else { + while (size) { + struct nouveau_gpuobj *pt = pgt[virt / (512*1024*1024)]; + int pte = ((virt % (512*1024*1024)) / psz) * 2; + + INSTANCE_WR(pt, pte++, phys | pfl); + INSTANCE_WR(pt, pte++, flags); + + size -= psz; + phys += psz; + virt += psz; + } + } + + return 0; +} + +void +nv50_mem_vm_unbind(struct drm_device *dev, uint64_t virt, uint32_t size) +{ + nv50_mem_vm_bind_linear(dev, virt, size, 0x80000000, 0); +} + +/* * Cleanup everything */ void nouveau_mem_takedown(struct mem_block **heap) @@ -225,6 +363,9 @@ void nouveau_mem_close(struct drm_device *dev) { struct drm_nouveau_private *dev_priv = dev->dev_private; + if (dev_priv->card_type >= NV_50) + nv50_mem_vm_takedown(dev); + if (!dev_priv->mm_enabled) { nouveau_mem_takedown(&dev_priv->agp_heap); nouveau_mem_takedown(&dev_priv->fb_heap); @@ -405,21 +546,6 @@ nouveau_mem_init_agp(struct drm_device *dev, int ttm) return 0; } - -static int -nv50_mem_vm_preinit(struct drm_device *dev) -{ - struct drm_nouveau_private *dev_priv = dev->dev_private; - - dev_priv->vm_gart_base = 0; - dev_priv->vm_gart_size = 512 * 1024 * 1024; - dev_priv->vm_vram_base = 512 * 1024 * 1024; - dev_priv->vm_vram_size = 512 * 1024 * 1024; - dev_priv->vm_end = 1024ULL * 1024 * 1024; - - return 0; -} - int nouveau_mem_init_ttm(struct drm_device *dev) { @@ -484,16 +610,10 @@ nouveau_mem_init_ttm(struct drm_device *dev) drm_get_resource_len(dev, 1), DRM_MTRR_WC); - /* G8x: Allocate shared page table to map real VRAM pages into */ if (dev_priv->card_type >= NV_50) { - unsigned size = ((512 * 1024 * 1024) / 65536) * 8; - - ret = nouveau_gpuobj_new(dev, NULL, size, 0, - NVOBJ_FLAG_ZERO_ALLOC | - NVOBJ_FLAG_ALLOW_NO_REFS, - &dev_priv->vm_vram_pt); + ret = nv50_mem_vm_init(dev); if (ret) { - DRM_ERROR("Error creating VRAM page table: %d\n", ret); + DRM_ERROR("Error creating VM page tables: %d\n", ret); return ret; } } @@ -605,21 +725,14 @@ int nouveau_mem_init(struct drm_device *dev) } } - /* G8x: Allocate shared page table to map real VRAM pages into */ if (dev_priv->card_type >= NV_50) { - unsigned size = ((512 * 1024 * 1024) / 65536) * 8; - - ret = nouveau_gpuobj_new(dev, NULL, size, 0, - NVOBJ_FLAG_ZERO_ALLOC | - NVOBJ_FLAG_ALLOW_NO_REFS, - &dev_priv->vm_vram_pt); + ret = nv50_mem_vm_init(dev); if (ret) { - DRM_ERROR("Error creating VRAM page table: %d\n", ret); + DRM_ERROR("Error creating VM page tables: %d\n", ret); return ret; } } - return 0; } @@ -755,17 +868,9 @@ alloc_ok: /* On G8x, map memory into VM */ if (block->flags & NOUVEAU_MEM_FB && dev_priv->card_type >= NV_50 && !(flags & NOUVEAU_MEM_NOVM)) { - struct nouveau_gpuobj *pt = dev_priv->vm_vram_pt; unsigned offset = block->start - dev_priv->vm_vram_base; - unsigned count = block->size / 65536; unsigned tile = 0; - if (!pt) { - DRM_ERROR("vm alloc without vm pt\n"); - nouveau_mem_free_block(block); - return NULL; - } - /* The tiling stuff is *not* what NVIDIA does - but both the * 2D and 3D engines seem happy with this simpler method. * Should look into why NVIDIA do what they do at some point. @@ -777,12 +882,12 @@ alloc_ok: tile = 0x00007000; } - while (count--) { - unsigned pte = offset / 65536; - - INSTANCE_WR(pt, (pte * 2) + 0, offset | 1); - INSTANCE_WR(pt, (pte * 2) + 1, 0x00000000 | tile); - offset += 65536; + ret = nv50_mem_vm_bind_linear(dev, block->start, block->size, + tile, offset); + if (ret) { + DRM_ERROR("error binding into vm: %d\n", ret); + nouveau_mem_free_block(block); + return NULL; } } else if (block->flags & NOUVEAU_MEM_FB && dev_priv->card_type >= NV_50) { @@ -860,24 +965,9 @@ void nouveau_mem_free(struct drm_device* dev, struct mem_block* block) /* G8x: Remove pages from vm */ if (block->flags & NOUVEAU_MEM_FB && dev_priv->card_type >= NV_50 && !(block->flags & NOUVEAU_MEM_NOVM)) { - struct nouveau_gpuobj *pt = dev_priv->vm_vram_pt; - unsigned offset = block->start - dev_priv->vm_vram_base; - unsigned count = block->size / 65536; - - if (!pt) { - DRM_ERROR("vm free without vm pt\n"); - goto out_free; - } - - while (count--) { - unsigned pte = offset / 65536; - INSTANCE_WR(pt, (pte * 2) + 0, 0); - INSTANCE_WR(pt, (pte * 2) + 1, 0); - offset += 65536; - } + nv50_mem_vm_unbind(dev, block->start, block->size); } -out_free: nouveau_mem_free_block(block); } @@ -943,6 +1033,8 @@ nouveau_ioctl_mem_tile(struct drm_device *dev, void *data, struct drm_nouveau_private *dev_priv = dev->dev_private; struct drm_nouveau_mem_tile *memtile = data; struct mem_block *block = NULL; + unsigned offset, tile = 0; + int ret; NOUVEAU_CHECK_INITIALISED_WITH_RETURN; NOUVEAU_CHECK_MM_DISABLED_WITH_RETURN; @@ -953,36 +1045,27 @@ nouveau_ioctl_mem_tile(struct drm_device *dev, void *data, if (memtile->flags & NOUVEAU_MEM_FB) block = find_block(dev_priv->fb_heap, memtile->offset); - if (!block) + if (!block || (memtile->delta + memtile->size > block->size)) return -EINVAL; if (block->file_priv != file_priv) return -EPERM; - { - struct nouveau_gpuobj *pt = dev_priv->vm_vram_pt; - unsigned offset = block->start + memtile->delta; - unsigned count = memtile->size / 65536; - unsigned tile = 0; - - offset -= dev_priv->vm_vram_base; + offset = block->start + memtile->delta; + offset -= dev_priv->vm_vram_base; - if (memtile->flags & NOUVEAU_MEM_TILE) { - if (memtile->flags & NOUVEAU_MEM_TILE_ZETA) - tile = 0x00002800; - else - tile = 0x00007000; - } - - while (count--) { - unsigned pte = offset / 65536; - - INSTANCE_WR(pt, (pte * 2) + 0, offset | 1); - INSTANCE_WR(pt, (pte * 2) + 1, 0x00000000 | tile); - offset += 65536; - } + if (memtile->flags & NOUVEAU_MEM_TILE) { + if (memtile->flags & NOUVEAU_MEM_TILE_ZETA) + tile = 0x00002800; + else + tile = 0x00007000; } + ret = nv50_mem_vm_bind_linear(dev, block->start + memtile->delta, + memtile->size, tile, offset); + if (ret) + return ret; + return 0; } diff --git a/shared-core/nouveau_object.c b/shared-core/nouveau_object.c index 74fc3d10..10e715d6 100644 --- a/shared-core/nouveau_object.c +++ b/shared-core/nouveau_object.c @@ -992,11 +992,11 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan, /* NV50 VM * - Allocate per-channel page-directory - * - Point offset 0-512MiB at shared PCIEGART table - * - Point offset 512-1024MiB at shared VRAM table + * - Map GART and VRAM into the channel's address space at the + * locations determined during init. */ if (dev_priv->card_type >= NV_50) { - uint32_t vm_offset; + uint32_t vm_offset, pde; vm_offset = (dev_priv->chipset & 0xf0) == 0x50 ? 0x1400 : 0x200; vm_offset += chan->ramin->gpuobj->im_pramin->start; @@ -1008,21 +1008,35 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan, INSTANCE_WR(chan->vm_pd, (i+4)/4, 0xdeadcafe); } - if ((ret = nouveau_gpuobj_ref_add(dev, NULL, 0, - dev_priv->gart_info.sg_ctxdma, - &chan->vm_gart_pt))) + pde = (dev_priv->vm_gart_base / (512*1024*1024)) * 2; + ret = nouveau_gpuobj_ref_add(dev, NULL, 0, + dev_priv->gart_info.sg_ctxdma, + &chan->vm_gart_pt); + if (ret) return ret; - INSTANCE_WR(chan->vm_pd, (0+0)/4, + INSTANCE_WR(chan->vm_pd, pde++, chan->vm_gart_pt->instance | 0x03); - INSTANCE_WR(chan->vm_pd, (0+4)/4, 0x00000000); + INSTANCE_WR(chan->vm_pd, pde++, 0x00000000); - if ((ret = nouveau_gpuobj_ref_add(dev, NULL, 0, - dev_priv->vm_vram_pt, - &chan->vm_vram_pt))) - return ret; - INSTANCE_WR(chan->vm_pd, (8+0)/4, - chan->vm_vram_pt->instance | 0x61); - INSTANCE_WR(chan->vm_pd, (8+4)/4, 0x00000000); + chan->vm_vram_pt = + drm_calloc(dev_priv->vm_vram_pt_nr, + sizeof(struct nouveau_gpuobj_ref *), + DRM_MEM_DRIVER); + if (!chan->vm_vram_pt) + return -ENOMEM; + + pde = (dev_priv->vm_vram_base / (512*1024*1024)) * 2; + for (i = 0; i < dev_priv->vm_vram_pt_nr; i++) { + ret = nouveau_gpuobj_ref_add(dev, NULL, 0, + dev_priv->vm_vram_pt[i], + &chan->vm_vram_pt[i]); + if (ret) + return ret; + + INSTANCE_WR(chan->vm_pd, pde++, + chan->vm_vram_pt[i]->instance | 0x61); + INSTANCE_WR(chan->vm_pd, pde++, 0x00000000); + } } /* RAMHT */ @@ -1100,9 +1114,11 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan, void nouveau_gpuobj_channel_takedown(struct nouveau_channel *chan) { + struct drm_nouveau_private *dev_priv = chan->dev->dev_private; struct drm_device *dev = chan->dev; struct list_head *entry, *tmp; struct nouveau_gpuobj_ref *ref; + int i; DRM_DEBUG("ch%d\n", chan->id); @@ -1116,7 +1132,12 @@ nouveau_gpuobj_channel_takedown(struct nouveau_channel *chan) nouveau_gpuobj_del(dev, &chan->vm_pd); nouveau_gpuobj_ref_del(dev, &chan->vm_gart_pt); - nouveau_gpuobj_ref_del(dev, &chan->vm_vram_pt); + if (chan->vm_vram_pt) { + for (i = 0; i < dev_priv->vm_vram_pt_nr; i++) + nouveau_gpuobj_ref_del(dev, &chan->vm_vram_pt[i]); + drm_free(chan->vm_vram_pt, dev_priv->vm_vram_pt_nr * + sizeof(struct nouveau_gpuobj_ref *), DRM_MEM_DRIVER); + } if (chan->ramin_heap) nouveau_mem_takedown(&chan->ramin_heap); diff --git a/shared-core/nouveau_state.c b/shared-core/nouveau_state.c index f39a041d..22ab6883 100644 --- a/shared-core/nouveau_state.c +++ b/shared-core/nouveau_state.c @@ -369,8 +369,6 @@ static void nouveau_card_takedown(struct drm_device *dev) nouveau_sgdma_takedown(dev); nouveau_gpuobj_takedown(dev); - nouveau_gpuobj_del(dev, &dev_priv->vm_vram_pt); - nouveau_mem_close(dev); engine->instmem.takedown(dev); |