Merge tag 'drm-for-v4.16' of git://people.freedesktop.org/~airlied/linux

Pull drm updates from Dave Airlie: "This seems to have been a comparatively quieter merge window, I assume due to holidays etc. The "biggest" change is AMD header cleanups, which merge/remove a bunch of them. The AMD gpu scheduler is now being made generic with the etnaviv driver wanting to reuse the code, hopefully other drivers can go in the same direction. Otherwise it's the usual lots of stuff in i915/amdgpu, not so much stuff elsewhere. Core: - Add .last_close and .output_poll_changed helpers to reduce driver footprints - Fix plane clipping - Improved debug printing support - Add panel orientation property - Update edid derived properties at edid setting - Reduction in fbdev driver footprint - Move amdgpu scheduler into core for other drivers to use. i915: - Selftest and IGT improvements - Fast boot prep work on IPS, pipe config - HW workarounds for Cannonlake, Geminilake - Cannonlake clock and HDMI2.0 fixes - GPU cache invalidation and context switch improvements - Display planes cleanup - New PMU interface for perf queries - New firmware support for KBL/SKL - Geminilake HW workaround for perforamce - Coffeelake stolen memory improvements - GPU reset robustness work - Cannonlake horizontal plane flipping - GVT work amdgpu/radeon: - RV and Vega header file cleanups (lots of lines gone!) - TTM operation context support - 48-bit GPUVM support for Vega/RV - ECC support for Vega - Resizeable BAR support - Multi-display sync support - Enable swapout for reserved BOs during allocation - S3 fixes on Raven - GPU reset cleanup and fixes - 2+1 level GPU page table amdkfd: - GFX7/8 SDMA user queues support - Hardware scheduling for multiple processes - dGPU prep work rcar: - Added R8A7743/5 support - System suspend/resume support sun4i: - Multi-plane support for YUV formats - A83T and LVDS support msm: - Devfreq support for GPU tegra: - Prep work for adding Tegra186 support - Tegra186 HDMI support - HDMI2.0 and zpos support by using generic helpers tilcdc: - Misc fixes omapdrm: - Support memory bandwidth limits - DSI command mode panel cleanups - DMM error handling exynos: - drop the old IPP subdriver. etnaviv: - Occlusion query fixes - Job handling fixes - Prep work for hooking in gpu scheduler armada: - Move closer to atomic modesetting - Allow disabling primary plane if overlay is full screen imx: - Format modifier support - Add tile prefetch to PRE - Runtime PM support for PRG ast: - fix LUT loading" * tag 'drm-for-v4.16' of git://people.freedesktop.org/~airlied/linux: (1471 commits) drm/ast: Load lut in crtc_commit drm: Check for lessee in DROP_MASTER ioctl drm: fix gpu scheduler link order drm/amd/display: Demote error print to debug print when ATOM impl missing dma-buf: fix reservation_object_wait_timeout_rcu once more v2 drm/amdgpu: Avoid leaking PM domain on driver unbind (v2) drm/amd/amdgpu: Add Polaris version check drm/amdgpu: Reenable manual GPU reset from sysfs drm/amdgpu: disable MMHUB power gating on raven drm/ttm: Don't unreserve swapped BOs that were previously reserved drm/ttm: Don't add swapped BOs to swap-LRU list drm/amdgpu: only check for ECC on Vega10 drm/amd/powerplay: Fix smu_table_entry.handle type drm/ttm: add VADDR_FLAG_UPDATED_COUNT to correctly update dma_page global count drm: Fix PANEL_ORIENTATION_QUIRKS breaking the Kconfig DRM menuconfig drm/radeon: fill in rb backend map on evergreen/ni. drm/amdgpu/gfx9: fix ngg enablement to clear gds reserved memory (v2) drm/ttm: only free pages rather than update global memory count together drm/amdgpu: fix CPU based VM updates drm/amdgpu: fix typo in amdgpu_vce_validate_bo ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2018-02-01 17:48:47 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2018-02-01 17:48:47 -0800
commit: 4bf772b14675411a69b3c807f73006de0fe4b649 (patch)
tree: b841e3ba0e3429695589cb0ab73871fa12f42c38 /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
parent: 3879ae653a3e98380fe2daf653338830b7ca0097 (diff)
parent: 24b8ef699e8221d2b7f813adaab13eec053e1507 (diff)
1 files changed, 327 insertions, 697 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index c8c26f21993c..6fc16eecf2dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -34,52 +34,6 @@
 #include "amdgpu_trace.h"
 
 /*
- * PASID manager
- *
- * PASIDs are global address space identifiers that can be shared
- * between the GPU, an IOMMU and the driver. VMs on different devices
- * may use the same PASID if they share the same address
- * space. Therefore PASIDs are allocated using a global IDA. VMs are
- * looked up from the PASID per amdgpu_device.
- */
-static DEFINE_IDA(amdgpu_vm_pasid_ida);
-
-/**
- * amdgpu_vm_alloc_pasid - Allocate a PASID
- * @bits: Maximum width of the PASID in bits, must be at least 1
- *
- * Allocates a PASID of the given width while keeping smaller PASIDs
- * available if possible.
- *
- * Returns a positive integer on success. Returns %-EINVAL if bits==0.
- * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on
- * memory allocation failure.
- */
-int amdgpu_vm_alloc_pasid(unsigned int bits)
-{
-	int pasid = -EINVAL;
-
-	for (bits = min(bits, 31U); bits > 0; bits--) {
-		pasid = ida_simple_get(&amdgpu_vm_pasid_ida,
-				       1U << (bits - 1), 1U << bits,
-				       GFP_KERNEL);
-		if (pasid != -ENOSPC)
-			break;
-	}
-
-	return pasid;
-}
-
-/**
- * amdgpu_vm_free_pasid - Free a PASID
- * @pasid: PASID to free
- */
-void amdgpu_vm_free_pasid(unsigned int pasid)
-{
-	ida_simple_remove(&amdgpu_vm_pasid_ida, pasid);
-}
-
-/*
  * GPUVM
  * GPUVM is similar to the legacy gart on older asics, however
  * rather than there being a single global gart table
@@ -139,6 +93,35 @@ struct amdgpu_prt_cb {
 };
 
 /**
+ * amdgpu_vm_level_shift - return the addr shift for each level
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns the number of bits the pfn needs to be right shifted for a level.
+ */
+static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
+				      unsigned level)
+{
+	unsigned shift = 0xff;
+
+	switch (level) {
+	case AMDGPU_VM_PDB2:
+	case AMDGPU_VM_PDB1:
+	case AMDGPU_VM_PDB0:
+		shift = 9 * (AMDGPU_VM_PDB0 - level) +
+			adev->vm_manager.block_size;
+		break;
+	case AMDGPU_VM_PTB:
+		shift = 0;
+		break;
+	default:
+		dev_err(adev->dev, "the level%d isn't supported.\n", level);
+	}
+
+	return shift;
+}
+
+/**
  * amdgpu_vm_num_entries - return the number of entries in a PD/PT
  *
  * @adev: amdgpu_device pointer
@@ -148,17 +131,18 @@ struct amdgpu_prt_cb {
 static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
 				      unsigned level)
 {
-	if (level == 0)
+	unsigned shift = amdgpu_vm_level_shift(adev,
+					       adev->vm_manager.root_level);
+
+	if (level == adev->vm_manager.root_level)
 		/* For the root directory */
-		return adev->vm_manager.max_pfn >>
-			(adev->vm_manager.block_size *
-			 adev->vm_manager.num_level);
-	else if (level == adev->vm_manager.num_level)
+		return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift;
+	else if (level != AMDGPU_VM_PTB)
+		/* Everything in between */
+		return 512;
+	else
 		/* For the page tables on the leaves */
 		return AMDGPU_VM_PTE_COUNT(adev);
-	else
-		/* Everything in between */
-		return 1 << adev->vm_manager.block_size;
 }
 
 /**
@@ -288,8 +272,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 				  uint64_t saddr, uint64_t eaddr,
 				  unsigned level)
 {
-	unsigned shift = (adev->vm_manager.num_level - level) *
-		adev->vm_manager.block_size;
+	unsigned shift = amdgpu_vm_level_shift(adev, level);
 	unsigned pt_idx, from, to;
 	int r;
 	u64 flags;
@@ -312,9 +295,6 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 	    to >= amdgpu_vm_num_entries(adev, level))
 		return -EINVAL;
 
-	if (to > parent->last_entry_used)
-		parent->last_entry_used = to;
-
 	++level;
 	saddr = saddr & ((1 << shift) - 1);
 	eaddr = eaddr & ((1 << shift) - 1);
@@ -329,7 +309,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 
 	if (vm->pte_support_ats) {
 		init_value = AMDGPU_PTE_DEFAULT_ATC;
-		if (level != adev->vm_manager.num_level - 1)
+		if (level != AMDGPU_VM_PTB)
 			init_value |= AMDGPU_PDE_PTE;
 
 	}
@@ -369,10 +349,9 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 			spin_lock(&vm->status_lock);
 			list_add(&entry->base.vm_status, &vm->relocated);
 			spin_unlock(&vm->status_lock);
-			entry->addr = 0;
 		}
 
-		if (level < adev->vm_manager.num_level) {
+		if (level < AMDGPU_VM_PTB) {
 			uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
 			uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
 				((1 << shift) - 1);
@@ -418,287 +397,8 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 	saddr /= AMDGPU_GPU_PAGE_SIZE;
 	eaddr /= AMDGPU_GPU_PAGE_SIZE;
 
-	return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, 0);
-}
-
-/**
- * amdgpu_vm_had_gpu_reset - check if reset occured since last use
- *
- * @adev: amdgpu_device pointer
- * @id: VMID structure
- *
- * Check if GPU reset occured since last use of the VMID.
- */
-static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device *adev,
-				    struct amdgpu_vm_id *id)
-{
-	return id->current_gpu_reset_count !=
-		atomic_read(&adev->gpu_reset_counter);
-}
-
-static bool amdgpu_vm_reserved_vmid_ready(struct amdgpu_vm *vm, unsigned vmhub)
-{
-	return !!vm->reserved_vmid[vmhub];
-}
-
-/* idr_mgr->lock must be held */
-static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm,
-					       struct amdgpu_ring *ring,
-					       struct amdgpu_sync *sync,
-					       struct dma_fence *fence,
-					       struct amdgpu_job *job)
-{
-	struct amdgpu_device *adev = ring->adev;
-	unsigned vmhub = ring->funcs->vmhub;
-	uint64_t fence_context = adev->fence_context + ring->idx;
-	struct amdgpu_vm_id *id = vm->reserved_vmid[vmhub];
-	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-	struct dma_fence *updates = sync->last_vm_update;
-	int r = 0;
-	struct dma_fence *flushed, *tmp;
-	bool needs_flush = vm->use_cpu_for_update;
-
-	flushed  = id->flushed_updates;
-	if ((amdgpu_vm_had_gpu_reset(adev, id)) ||
-	    (atomic64_read(&id->owner) != vm->client_id) ||
-	    (job->vm_pd_addr != id->pd_gpu_addr) ||
-	    (updates && (!flushed || updates->context != flushed->context ||
-			dma_fence_is_later(updates, flushed))) ||
-	    (!id->last_flush || (id->last_flush->context != fence_context &&
-				 !dma_fence_is_signaled(id->last_flush)))) {
-		needs_flush = true;
-		/* to prevent one context starved by another context */
-		id->pd_gpu_addr = 0;
-		tmp = amdgpu_sync_peek_fence(&id->active, ring);
-		if (tmp) {
-			r = amdgpu_sync_fence(adev, sync, tmp);
-			return r;
-		}
-	}
-
-	/* Good we can use this VMID. Remember this submission as
-	* user of the VMID.
-	*/
-	r = amdgpu_sync_fence(ring->adev, &id->active, fence);
-	if (r)
-		goto out;
-
-	if (updates && (!flushed || updates->context != flushed->context ||
-			dma_fence_is_later(updates, flushed))) {
-		dma_fence_put(id->flushed_updates);
-		id->flushed_updates = dma_fence_get(updates);
-	}
-	id->pd_gpu_addr = job->vm_pd_addr;
-	atomic64_set(&id->owner, vm->client_id);
-	job->vm_needs_flush = needs_flush;
-	if (needs_flush) {
-		dma_fence_put(id->last_flush);
-		id->last_flush = NULL;
-	}
-	job->vm_id = id - id_mgr->ids;
-	trace_amdgpu_vm_grab_id(vm, ring, job);
-out:
-	return r;
-}
-
-/**
- * amdgpu_vm_grab_id - allocate the next free VMID
- *
- * @vm: vm to allocate id for
- * @ring: ring we want to submit job to
- * @sync: sync object where we add dependencies
- * @fence: fence protecting ID from reuse
- *
- * Allocate an id for the vm, adding fences to the sync obj as necessary.
- */
-int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
-		      struct amdgpu_sync *sync, struct dma_fence *fence,
-		      struct amdgpu_job *job)
-{
-	struct amdgpu_device *adev = ring->adev;
-	unsigned vmhub = ring->funcs->vmhub;
-	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-	uint64_t fence_context = adev->fence_context + ring->idx;
-	struct dma_fence *updates = sync->last_vm_update;
-	struct amdgpu_vm_id *id, *idle;
-	struct dma_fence **fences;
-	unsigned i;
-	int r = 0;
-
-	mutex_lock(&id_mgr->lock);
-	if (amdgpu_vm_reserved_vmid_ready(vm, vmhub)) {
-		r = amdgpu_vm_grab_reserved_vmid_locked(vm, ring, sync, fence, job);
-		mutex_unlock(&id_mgr->lock);
-		return r;
-	}
-	fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
-	if (!fences) {
-		mutex_unlock(&id_mgr->lock);
-		return -ENOMEM;
-	}
-	/* Check if we have an idle VMID */
-	i = 0;
-	list_for_each_entry(idle, &id_mgr->ids_lru, list) {
-		fences[i] = amdgpu_sync_peek_fence(&idle->active, ring);
-		if (!fences[i])
-			break;
-		++i;
-	}
-
-	/* If we can't find a idle VMID to use, wait till one becomes available */
-	if (&idle->list == &id_mgr->ids_lru) {
-		u64 fence_context = adev->vm_manager.fence_context + ring->idx;
-		unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
-		struct dma_fence_array *array;
-		unsigned j;
-
-		for (j = 0; j < i; ++j)
-			dma_fence_get(fences[j]);
-
-		array = dma_fence_array_create(i, fences, fence_context,
-					   seqno, true);
-		if (!array) {
-			for (j = 0; j < i; ++j)
-				dma_fence_put(fences[j]);
-			kfree(fences);
-			r = -ENOMEM;
-			goto error;
-		}
-
-
-		r = amdgpu_sync_fence(ring->adev, sync, &array->base);
-		dma_fence_put(&array->base);
-		if (r)
-			goto error;
-
-		mutex_unlock(&id_mgr->lock);
-		return 0;
-
-	}
-	kfree(fences);
-
-	job->vm_needs_flush = vm->use_cpu_for_update;
-	/* Check if we can use a VMID already assigned to this VM */
-	list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) {
-		struct dma_fence *flushed;
-		bool needs_flush = vm->use_cpu_for_update;
-
-		/* Check all the prerequisites to using this VMID */
-		if (amdgpu_vm_had_gpu_reset(adev, id))
-			continue;
-
-		if (atomic64_read(&id->owner) != vm->client_id)
-			continue;
-
-		if (job->vm_pd_addr != id->pd_gpu_addr)
-			continue;
-
-		if (!id->last_flush ||
-		    (id->last_flush->context != fence_context &&
-		     !dma_fence_is_signaled(id->last_flush)))
-			needs_flush = true;
-
-		flushed  = id->flushed_updates;
-		if (updates && (!flushed || dma_fence_is_later(updates, flushed)))
-			needs_flush = true;
-
-		/* Concurrent flushes are only possible starting with Vega10 */
-		if (adev->asic_type < CHIP_VEGA10 && needs_flush)
-			continue;
-
-		/* Good we can use this VMID. Remember this submission as
-		 * user of the VMID.
-		 */
-		r = amdgpu_sync_fence(ring->adev, &id->active, fence);
-		if (r)
-			goto error;
-
-		if (updates && (!flushed || dma_fence_is_later(updates, flushed))) {
-			dma_fence_put(id->flushed_updates);
-			id->flushed_updates = dma_fence_get(updates);
-		}
-
-		if (needs_flush)
-			goto needs_flush;
-		else
-			goto no_flush_needed;
-
-	};
-
-	/* Still no ID to use? Then use the idle one found earlier */
-	id = idle;
-
-	/* Remember this submission as user of the VMID */
-	r = amdgpu_sync_fence(ring->adev, &id->active, fence);
-	if (r)
-		goto error;
-
-	id->pd_gpu_addr = job->vm_pd_addr;
-	dma_fence_put(id->flushed_updates);
-	id->flushed_updates = dma_fence_get(updates);
-	atomic64_set(&id->owner, vm->client_id);
-
-needs_flush:
-	job->vm_needs_flush = true;
-	dma_fence_put(id->last_flush);
-	id->last_flush = NULL;
-
-no_flush_needed:
-	list_move_tail(&id->list, &id_mgr->ids_lru);
-
-	job->vm_id = id - id_mgr->ids;
-	trace_amdgpu_vm_grab_id(vm, ring, job);
-
-error:
-	mutex_unlock(&id_mgr->lock);
-	return r;
-}
-
-static void amdgpu_vm_free_reserved_vmid(struct amdgpu_device *adev,
-					  struct amdgpu_vm *vm,
-					  unsigned vmhub)
-{
-	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-
-	mutex_lock(&id_mgr->lock);
-	if (vm->reserved_vmid[vmhub]) {
-		list_add(&vm->reserved_vmid[vmhub]->list,
-			&id_mgr->ids_lru);
-		vm->reserved_vmid[vmhub] = NULL;
-		atomic_dec(&id_mgr->reserved_vmid_num);
-	}
-	mutex_unlock(&id_mgr->lock);
-}
-
-static int amdgpu_vm_alloc_reserved_vmid(struct amdgpu_device *adev,
-					 struct amdgpu_vm *vm,
-					 unsigned vmhub)
-{
-	struct amdgpu_vm_id_manager *id_mgr;
-	struct amdgpu_vm_id *idle;
-	int r = 0;
-
-	id_mgr = &adev->vm_manager.id_mgr[vmhub];
-	mutex_lock(&id_mgr->lock);
-	if (vm->reserved_vmid[vmhub])
-		goto unlock;
-	if (atomic_inc_return(&id_mgr->reserved_vmid_num) >
-	    AMDGPU_VM_MAX_RESERVED_VMID) {
-		DRM_ERROR("Over limitation of reserved vmid\n");
-		atomic_dec(&id_mgr->reserved_vmid_num);
-		r = -EINVAL;
-		goto unlock;
-	}
-	/* Select the first entry VMID */
-	idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vm_id, list);
-	list_del_init(&idle->list);
-	vm->reserved_vmid[vmhub] = idle;
-	mutex_unlock(&id_mgr->lock);
-
-	return 0;
-unlock:
-	mutex_unlock(&id_mgr->lock);
-	return r;
+	return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
+				      adev->vm_manager.root_level);
 }
 
 /**
@@ -715,7 +415,7 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
 
 	has_compute_vm_bug = false;
 
-	ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
 	if (ip_block) {
 		/* Compute has a VM bug for GFX version < 7.
 		   Compute has a VM bug for GFX 8 MEC firmware version < 673.*/
@@ -741,14 +441,14 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
 {
 	struct amdgpu_device *adev = ring->adev;
 	unsigned vmhub = ring->funcs->vmhub;
-	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-	struct amdgpu_vm_id *id;
+	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	struct amdgpu_vmid *id;
 	bool gds_switch_needed;
 	bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug;
 
-	if (job->vm_id == 0)
+	if (job->vmid == 0)
 		return false;
-	id = &id_mgr->ids[job->vm_id];
+	id = &id_mgr->ids[job->vmid];
 	gds_switch_needed = ring->funcs->emit_gds_switch && (
 		id->gds_base != job->gds_base ||
 		id->gds_size != job->gds_size ||
@@ -757,7 +457,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
 		id->oa_base != job->oa_base ||
 		id->oa_size != job->oa_size);
 
-	if (amdgpu_vm_had_gpu_reset(adev, id))
+	if (amdgpu_vmid_had_gpu_reset(adev, id))
 		return true;
 
 	return vm_flush_needed || gds_switch_needed;
@@ -772,7 +472,7 @@ static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
  * amdgpu_vm_flush - hardware flush the vm
  *
  * @ring: ring to use for flush
- * @vm_id: vmid number to use
+ * @vmid: vmid number to use
  * @pd_addr: address of the page directory
  *
  * Emit a VM flush when it is necessary.
@@ -781,8 +481,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
 {
 	struct amdgpu_device *adev = ring->adev;
 	unsigned vmhub = ring->funcs->vmhub;
-	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-	struct amdgpu_vm_id *id = &id_mgr->ids[job->vm_id];
+	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
 	bool gds_switch_needed = ring->funcs->emit_gds_switch && (
 		id->gds_base != job->gds_base ||
 		id->gds_size != job->gds_size ||
@@ -794,7 +494,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
 	unsigned patch_offset = 0;
 	int r;
 
-	if (amdgpu_vm_had_gpu_reset(adev, id)) {
+	if (amdgpu_vmid_had_gpu_reset(adev, id)) {
 		gds_switch_needed = true;
 		vm_flush_needed = true;
 	}
@@ -811,8 +511,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
 	if (ring->funcs->emit_vm_flush && vm_flush_needed) {
 		struct dma_fence *fence;
 
-		trace_amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr);
-		amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr);
+		trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
+		amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
 
 		r = amdgpu_fence_emit(ring, &fence);
 		if (r)
@@ -832,7 +532,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
 		id->gws_size = job->gws_size;
 		id->oa_base = job->oa_base;
 		id->oa_size = job->oa_size;
-		amdgpu_ring_emit_gds_switch(ring, job->vm_id, job->gds_base,
+		amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
 					    job->gds_size, job->gws_base,
 					    job->gws_size, job->oa_base,
 					    job->oa_size);
@@ -850,49 +550,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
 }
 
 /**
- * amdgpu_vm_reset_id - reset VMID to zero
- *
- * @adev: amdgpu device structure
- * @vm_id: vmid number to use
- *
- * Reset saved GDW, GWS and OA to force switch on next flush.
- */
-void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
-			unsigned vmid)
-{
-	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-	struct amdgpu_vm_id *id = &id_mgr->ids[vmid];
-
-	atomic64_set(&id->owner, 0);
-	id->gds_base = 0;
-	id->gds_size = 0;
-	id->gws_base = 0;
-	id->gws_size = 0;
-	id->oa_base = 0;
-	id->oa_size = 0;
-}
-
-/**
- * amdgpu_vm_reset_all_id - reset VMID to zero
- *
- * @adev: amdgpu device structure
- *
- * Reset VMID to force flush on next use
- */
-void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev)
-{
-	unsigned i, j;
-
-	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-		struct amdgpu_vm_id_manager *id_mgr =
-			&adev->vm_manager.id_mgr[i];
-
-		for (j = 1; j < id_mgr->num_ids; ++j)
-			amdgpu_vm_reset_id(adev, i, j);
-	}
-}
-
-/**
  * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
  *
  * @vm: requested vm
@@ -1043,162 +700,52 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 }
 
 /*
- * amdgpu_vm_update_level - update a single level in the hierarchy
+ * amdgpu_vm_update_pde - update a single level in the hierarchy
  *
- * @adev: amdgpu_device pointer
+ * @param: parameters for the update
  * @vm: requested vm
  * @parent: parent directory
+ * @entry: entry to update
  *
- * Makes sure all entries in @parent are up to date.
- * Returns 0 for success, error for failure.
+ * Makes sure the requested entry in parent is up to date.
  */
-static int amdgpu_vm_update_level(struct amdgpu_device *adev,
-				  struct amdgpu_vm *vm,
-				  struct amdgpu_vm_pt *parent)
+static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
+				 struct amdgpu_vm *vm,
+				 struct amdgpu_vm_pt *parent,
+				 struct amdgpu_vm_pt *entry)
 {
-	struct amdgpu_bo *shadow;
-	struct amdgpu_ring *ring = NULL;
+	struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL, *pbo;
 	uint64_t pd_addr, shadow_addr = 0;
-	uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0;
-	unsigned count = 0, pt_idx, ndw = 0;
-	struct amdgpu_job *job;
-	struct amdgpu_pte_update_params params;
-	struct dma_fence *fence = NULL;
-	uint32_t incr;
-
-	int r;
-
-	if (!parent->entries)
-		return 0;
+	uint64_t pde, pt, flags;
+	unsigned level;
 
-	memset(&params, 0, sizeof(params));
-	params.adev = adev;
-	shadow = parent->base.bo->shadow;
+	/* Don't update huge pages here */
+	if (entry->huge)
+		return;
 
 	if (vm->use_cpu_for_update) {
 		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
-		r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
-		if (unlikely(r))
-			return r;
-
-		params.func = amdgpu_vm_cpu_set_ptes;
 	} else {
-		ring = container_of(vm->entity.sched, struct amdgpu_ring,
-				    sched);
-
-		/* padding, etc. */
-		ndw = 64;
-
-		/* assume the worst case */
-		ndw += parent->last_entry_used * 6;
-
 		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
-
-		if (shadow) {
+		shadow = parent->base.bo->shadow;
+		if (shadow)
 			shadow_addr = amdgpu_bo_gpu_offset(shadow);
-			ndw *= 2;
-		} else {
-			shadow_addr = 0;
-		}
-
-		r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
-		if (r)
-			return r;
-
-		params.ib = &job->ibs[0];
-		params.func = amdgpu_vm_do_set_ptes;
-	}
-
-
-	/* walk over the address space and update the directory */
-	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
-		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
-		struct amdgpu_bo *bo = entry->base.bo;
-		uint64_t pde, pt;
-
-		if (bo == NULL)
-			continue;
-
-		spin_lock(&vm->status_lock);
-		list_del_init(&entry->base.vm_status);
-		spin_unlock(&vm->status_lock);
-
-		pt = amdgpu_bo_gpu_offset(bo);
-		pt = amdgpu_gart_get_vm_pde(adev, pt);
-		/* Don't update huge pages here */
-		if ((parent->entries[pt_idx].addr & AMDGPU_PDE_PTE) ||
-		    parent->entries[pt_idx].addr == (pt | AMDGPU_PTE_VALID))
-			continue;
-
-		parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID;
-
-		pde = pd_addr + pt_idx * 8;
-		incr = amdgpu_bo_size(bo);
-		if (((last_pde + 8 * count) != pde) ||
-		    ((last_pt + incr * count) != pt) ||
-		    (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
-
-			if (count) {
-				if (shadow)
-					params.func(&params,
-						    last_shadow,
-						    last_pt, count,
-						    incr,
-						    AMDGPU_PTE_VALID);
-
-				params.func(&params, last_pde,
-					    last_pt, count, incr,
-					    AMDGPU_PTE_VALID);
-			}
-
-			count = 1;
-			last_pde = pde;
-			last_shadow = shadow_addr + pt_idx * 8;
-			last_pt = pt;
-		} else {
-			++count;
-		}
-	}
-
-	if (count) {
-		if (vm->root.base.bo->shadow)
-			params.func(&params, last_shadow, last_pt,
-				    count, incr, AMDGPU_PTE_VALID);
-
-		params.func(&params, last_pde, last_pt,
-			    count, incr, AMDGPU_PTE_VALID);
 	}
 
-	if (!vm->use_cpu_for_update) {
-		if (params.ib->length_dw == 0) {
-			amdgpu_job_free(job);
-		} else {
-			amdgpu_ring_pad_ib(ring, params.ib);
-			amdgpu_sync_resv(adev, &job->sync,
-					 parent->base.bo->tbo.resv,
-					 AMDGPU_FENCE_OWNER_VM, false);
-			if (shadow)
-				amdgpu_sync_resv(adev, &job->sync,
-						 shadow->tbo.resv,
-						 AMDGPU_FENCE_OWNER_VM, false);
-
-			WARN_ON(params.ib->length_dw > ndw);
-			r = amdgpu_job_submit(job, ring, &vm->entity,
-					AMDGPU_FENCE_OWNER_VM, &fence);
-			if (r)
-				goto error_free;
+	for (level = 0, pbo = parent->base.bo->parent; pbo; ++level)
+		pbo = pbo->parent;
 
-			amdgpu_bo_fence(parent->base.bo, fence, true);
-			dma_fence_put(vm->last_update);
-			vm->last_update = fence;
-		}
+	level += params->adev->vm_manager.root_level;
+	pt = amdgpu_bo_gpu_offset(bo);
+	flags = AMDGPU_PTE_VALID;
+	amdgpu_gart_get_vm_pde(params->adev, level, &pt, &flags);
+	if (shadow) {
+		pde = shadow_addr + (entry - parent->entries) * 8;
+		params->func(params, pde, pt, 1, 0, flags);
 	}
 
-	return 0;
-
-error_free:
-	amdgpu_job_free(job);
-	return r;
+	pde = pd_addr + (entry - parent->entries) * 8;
+	params->func(params, pde, pt, 1, 0, flags);
 }
 
 /*
@@ -1208,27 +755,29 @@ error_free:
  *
  * Mark all PD level as invalid after an error.
  */
-static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
-				       struct amdgpu_vm_pt *parent)
+static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
+				       struct amdgpu_vm *vm,
+				       struct amdgpu_vm_pt *parent,
+				       unsigned level)
 {
-	unsigned pt_idx;
+	unsigned pt_idx, num_entries;
 
 	/*
 	 * Recurse into the subdirectories. This recursion is harmless because
 	 * we only have a maximum of 5 layers.
 	 */
-	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
+	num_entries = amdgpu_vm_num_entries(adev, level);
+	for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) {
 		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
 
 		if (!entry->base.bo)
 			continue;
 
-		entry->addr = ~0ULL;
 		spin_lock(&vm->status_lock);
 		if (list_empty(&entry->base.vm_status))
 			list_add(&entry->base.vm_status, &vm->relocated);
 		spin_unlock(&vm->status_lock);
-		amdgpu_vm_invalidate_level(vm, entry);
+		amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
 	}
 }
 
@@ -1244,38 +793,63 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
 int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 				 struct amdgpu_vm *vm)
 {
+	struct amdgpu_pte_update_params params;
+	struct amdgpu_job *job;
+	unsigned ndw = 0;
 	int r = 0;
 
+	if (list_empty(&vm->relocated))
+		return 0;
+
+restart:
+	memset(&params, 0, sizeof(params));
+	params.adev = adev;
+
+	if (vm->use_cpu_for_update) {
+		r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
+		if (unlikely(r))
+			return r;
+
+		params.func = amdgpu_vm_cpu_set_ptes;
+	} else {
+		ndw = 512 * 8;
+		r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
+		if (r)
+			return r;
+
+		params.ib = &job->ibs[0];
+		params.func = amdgpu_vm_do_set_ptes;
+	}
+
 	spin_lock(&vm->status_lock);
 	while (!list_empty(&vm->relocated)) {
-		struct amdgpu_vm_bo_base *bo_base;
+		struct amdgpu_vm_bo_base *bo_base, *parent;
+		struct amdgpu_vm_pt *pt, *entry;
 		struct amdgpu_bo *bo;
 
 		bo_base = list_first_entry(&vm->relocated,
 					   struct amdgpu_vm_bo_base,
 					   vm_status);
+		list_del_init(&bo_base->vm_status);
 		spin_unlock(&vm->status_lock);
 
 		bo = bo_base->bo->parent;
-		if (bo) {
-			struct amdgpu_vm_bo_base *parent;
-			struct amdgpu_vm_pt *pt;
-
-			parent = list_first_entry(&bo->va,
-						  struct amdgpu_vm_bo_base,
-						  bo_list);
-			pt = container_of(parent, struct amdgpu_vm_pt, base);
-
-			r = amdgpu_vm_update_level(adev, vm, pt);
-			if (r) {
-				amdgpu_vm_invalidate_level(vm, &vm->root);
-				return r;
-			}
-			spin_lock(&vm->status_lock);
-		} else {
+		if (!bo) {
 			spin_lock(&vm->status_lock);
-			list_del_init(&bo_base->vm_status);
+			continue;
 		}
+
+		parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base,
+					  bo_list);
+		pt = container_of(parent, struct amdgpu_vm_pt, base);
+		entry = container_of(bo_base, struct amdgpu_vm_pt, base);
+
+		amdgpu_vm_update_pde(&params, vm, pt, entry);
+
+		spin_lock(&vm->status_lock);
+		if (!vm->use_cpu_for_update &&
+		    (ndw - params.ib->length_dw) < 32)
+			break;
 	}
 	spin_unlock(&vm->status_lock);
 
@@ -1283,8 +857,44 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 		/* Flush HDP */
 		mb();
 		amdgpu_gart_flush_gpu_tlb(adev, 0);
+	} else if (params.ib->length_dw == 0) {
+		amdgpu_job_free(job);
+	} else {
+		struct amdgpu_bo *root = vm->root.base.bo;
+		struct amdgpu_ring *ring;
+		struct dma_fence *fence;
+
+		ring = container_of(vm->entity.sched, struct amdgpu_ring,
+				    sched);
+
+		amdgpu_ring_pad_ib(ring, params.ib);
+		amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
+				 AMDGPU_FENCE_OWNER_VM, false);
+		if (root->shadow)
+			amdgpu_sync_resv(adev, &job->sync,
+					 root->shadow->tbo.resv,
+					 AMDGPU_FENCE_OWNER_VM, false);
+
+		WARN_ON(params.ib->length_dw > ndw);
+		r = amdgpu_job_submit(job, ring, &vm->entity,
+				      AMDGPU_FENCE_OWNER_VM, &fence);
+		if (r)
+			goto error;
+
+		amdgpu_bo_fence(root, fence, true);
+		dma_fence_put(vm->last_update);
+		vm->last_update = fence;
 	}
 
+	if (!list_empty(&vm->relocated))
+		goto restart;
+
+	return 0;
+
+error:
+	amdgpu_vm_invalidate_level(adev, vm, &vm->root,
+				   adev->vm_manager.root_level);
+	amdgpu_job_free(job);
 	return r;
 }
 
@@ -1302,18 +912,19 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
 			 struct amdgpu_vm_pt **entry,
 			 struct amdgpu_vm_pt **parent)
 {
-	unsigned idx, level = p->adev->vm_manager.num_level;
+	unsigned level = p->adev->vm_manager.root_level;
 
 	*parent = NULL;
 	*entry = &p->vm->root;
 	while ((*entry)->entries) {
-		idx = addr >> (p->adev->vm_manager.block_size * level--);
-		idx %= amdgpu_bo_size((*entry)->base.bo) / 8;
+		unsigned shift = amdgpu_vm_level_shift(p->adev, level++);
+
 		*parent = *entry;
-		*entry = &(*entry)->entries[idx];
+		*entry = &(*entry)->entries[addr >> shift];
+		addr &= (1ULL << shift) - 1;
 	}
 
-	if (level)
+	if (level != AMDGPU_VM_PTB)
 		*entry = NULL;
 }
 
@@ -1335,56 +946,42 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
 					unsigned nptes, uint64_t dst,
 					uint64_t flags)
 {
-	bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes);
 	uint64_t pd_addr, pde;
 
 	/* In the case of a mixed PT the PDE must point to it*/
-	if (p->adev->asic_type < CHIP_VEGA10 ||
-	    nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
-	    p->src ||
-	    !(flags & AMDGPU_PTE_VALID)) {
-
-		dst = amdgpu_bo_gpu_offset(entry->base.bo);
-		dst = amdgpu_gart_get_vm_pde(p->adev, dst);
-		flags = AMDGPU_PTE_VALID;
-	} else {
+	if (p->adev->asic_type >= CHIP_VEGA10 && !p->src &&
+	    nptes == AMDGPU_VM_PTE_COUNT(p->adev)) {
 		/* Set the huge page flag to stop scanning at this PDE */
 		flags |= AMDGPU_PDE_PTE;
 	}
 
-	if (entry->addr == (dst | flags))
+	if (!(flags & AMDGPU_PDE_PTE)) {
+		if (entry->huge) {
+			/* Add the entry to the relocated list to update it. */
+			entry->huge = false;
+			spin_lock(&p->vm->status_lock);
+			list_move(&entry->base.vm_status, &p->vm->relocated);
+			spin_unlock(&p->vm->status_lock);
+		}
 		return;
+	}
 
-	entry->addr = (dst | flags);
-
-	if (use_cpu_update) {
-		/* In case a huge page is replaced with a system
-		 * memory mapping, p->pages_addr != NULL and
-		 * amdgpu_vm_cpu_set_ptes would try to translate dst
-		 * through amdgpu_vm_map_gart. But dst is already a
-		 * GPU address (of the page table). Disable
-		 * amdgpu_vm_map_gart temporarily.
-		 */
-		dma_addr_t *tmp;
-
-		tmp = p->pages_addr;
-		p->pages_addr = NULL;
+	entry->huge = true;
+	amdgpu_gart_get_vm_pde(p->adev, AMDGPU_VM_PDB0,
+			       &dst, &flags);
 
+	if (p->func == amdgpu_vm_cpu_set_ptes) {
 		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
-		pde = pd_addr + (entry - parent->entries) * 8;
-		amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags);
-
-		p->pages_addr = tmp;
 	} else {
 		if (parent->base.bo->shadow) {
 			pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow);
 			pde = pd_addr + (entry - parent->entries) * 8;
-			amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
+			p->func(p, pde, dst, 1, 0, flags);
 		}
 		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
-		pde = pd_addr + (entry - parent->entries) * 8;
-		amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
 	}
+	pde = pd_addr + (entry - parent->entries) * 8;
+	p->func(p, pde, dst, 1, 0, flags);
 }
 
 /**
@@ -1429,7 +1026,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 		amdgpu_vm_handle_huge_pages(params, entry, parent,
 					    nptes, dst, flags);
 		/* We don't need to update PTEs for huge pages */
-		if (entry->addr & AMDGPU_PDE_PTE)
+		if (entry->huge)
 			continue;
 
 		pt = entry->base.bo;
@@ -1588,14 +1185,14 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
          *
          * The second command is for the shadow pagetables.
 	 */
-	ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2;
+	if (vm->root.base.bo->shadow)
+		ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2;
+	else
+		ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1);
 
 	/* padding, etc. */
 	ndw = 64;
 
-	/* one PDE write for each huge page */
-	ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6;
-
 	if (pages_addr) {
 		/* copy commands needed */
 		ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
@@ -1639,7 +1236,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 		addr = 0;
 	}
 
-	r = amdgpu_sync_fence(adev, &job->sync, exclusive);
+	r = amdgpu_sync_fence(adev, &job->sync, exclusive, false);
 	if (r)
 		goto error_free;
 
@@ -1670,7 +1267,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 error_free:
 	amdgpu_job_free(job);
-	amdgpu_vm_invalidate_level(vm, &vm->root);
 	return r;
 }
 
@@ -2081,18 +1677,31 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 	spin_lock(&vm->status_lock);
 	while (!list_empty(&vm->moved)) {
 		struct amdgpu_bo_va *bo_va;
+		struct reservation_object *resv;
 
 		bo_va = list_first_entry(&vm->moved,
 			struct amdgpu_bo_va, base.vm_status);
 		spin_unlock(&vm->status_lock);
 
+		resv = bo_va->base.bo->tbo.resv;
+
 		/* Per VM BOs never need to bo cleared in the page tables */
-		clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv;
+		if (resv == vm->root.base.bo->tbo.resv)
+			clear = false;
+		/* Try to reserve the BO to avoid clearing its ptes */
+		else if (!amdgpu_vm_debug && reservation_object_trylock(resv))
+			clear = false;
+		/* Somebody else is using the BO right now */
+		else
+			clear = true;
 
 		r = amdgpu_vm_bo_update(adev, bo_va, clear);
 		if (r)
 			return r;
 
+		if (!clear && resv != vm->root.base.bo->tbo.resv)
+			reservation_object_unlock(resv);
+
 		spin_lock(&vm->status_lock);
 	}
 	spin_unlock(&vm->status_lock);
@@ -2132,8 +1741,26 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
 	INIT_LIST_HEAD(&bo_va->valids);
 	INIT_LIST_HEAD(&bo_va->invalids);
 
-	if (bo)
-		list_add_tail(&bo_va->base.bo_list, &bo->va);
+	if (!bo)
+		return bo_va;
+
+	list_add_tail(&bo_va->base.bo_list, &bo->va);
+
+	if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
+		return bo_va;
+
+	if (bo->preferred_domains &
+	    amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
+		return bo_va;
+
+	/*
+	 * We checked all the prerequisites, but it looks like this per VM BO
+	 * is currently evicted. add the BO to the evicted list to make sure it
+	 * is validated on next VM use to avoid fault.
+	 * */
+	spin_lock(&vm->status_lock);
+	list_move_tail(&bo_va->base.vm_status, &vm->evicted);
+	spin_unlock(&vm->status_lock);
 
 	return bo_va;
 }
@@ -2556,47 +2183,69 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
 }
 
 /**
- * amdgpu_vm_set_fragment_size - adjust fragment size in PTE
- *
- * @adev: amdgpu_device pointer
- * @fragment_size_default: the default fragment size if it's set auto
- */
-void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev,
-				 uint32_t fragment_size_default)
-{
-	if (amdgpu_vm_fragment_size == -1)
-		adev->vm_manager.fragment_size = fragment_size_default;
-	else
-		adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
-}
-
-/**
  * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size
  *
  * @adev: amdgpu_device pointer
  * @vm_size: the default vm size if it's set auto
  */
-void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size,
-			   uint32_t fragment_size_default)
+void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
+			   uint32_t fragment_size_default, unsigned max_level,
+			   unsigned max_bits)
 {
-	/* adjust vm size firstly */
-	if (amdgpu_vm_size == -1)
-		adev->vm_manager.vm_size = vm_size;
-	else
-		adev->vm_manager.vm_size = amdgpu_vm_size;
+	uint64_t tmp;
+
+	/* adjust vm size first */
+	if (amdgpu_vm_size != -1) {
+		unsigned max_size = 1 << (max_bits - 30);
 
-	/* block size depends on vm size */
-	if (amdgpu_vm_block_size == -1)
+		vm_size = amdgpu_vm_size;
+		if (vm_size > max_size) {
+			dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n",
+				 amdgpu_vm_size, max_size);
+			vm_size = max_size;
+		}
+	}
+
+	adev->vm_manager.max_pfn = (uint64_t)vm_size << 18;
+
+	tmp = roundup_pow_of_two(adev->vm_manager.max_pfn);
+	if (amdgpu_vm_block_size != -1)
+		tmp >>= amdgpu_vm_block_size - 9;
+	tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1;
+	adev->vm_manager.num_level = min(max_level, (unsigned)tmp);
+	switch (adev->vm_manager.num_level) {
+	case 3:
+		adev->vm_manager.root_level = AMDGPU_VM_PDB2;
+		break;
+	case 2:
+		adev->vm_manager.root_level = AMDGPU_VM_PDB1;
+		break;
+	case 1:
+		adev->vm_manager.root_level = AMDGPU_VM_PDB0;
+		break;
+	default:
+		dev_err(adev->dev, "VMPT only supports 2~4+1 levels\n");
+	}
+	/* block size depends on vm size and hw setup*/
+	if (amdgpu_vm_block_size != -1)
 		adev->vm_manager.block_size =
-			amdgpu_vm_get_block_size(adev->vm_manager.vm_size);
+			min((unsigned)amdgpu_vm_block_size, max_bits
+			    - AMDGPU_GPU_PAGE_SHIFT
+			    - 9 * adev->vm_manager.num_level);
+	else if (adev->vm_manager.num_level > 1)
+		adev->vm_manager.block_size = 9;
 	else
-		adev->vm_manager.block_size = amdgpu_vm_block_size;
+		adev->vm_manager.block_size = amdgpu_vm_get_block_size(tmp);
 
-	amdgpu_vm_set_fragment_size(adev, fragment_size_default);
+	if (amdgpu_vm_fragment_size == -1)
+		adev->vm_manager.fragment_size = fragment_size_default;
+	else
+		adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
 
-	DRM_INFO("vm size is %llu GB, block size is %u-bit, fragment size is %u-bit\n",
-		adev->vm_manager.vm_size, adev->vm_manager.block_size,
-		adev->vm_manager.fragment_size);
+	DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n",
+		 vm_size, adev->vm_manager.num_level + 1,
+		 adev->vm_manager.block_size,
+		 adev->vm_manager.fragment_size);
 }
 
 /**
@@ -2615,13 +2264,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		AMDGPU_VM_PTE_COUNT(adev) * 8);
 	unsigned ring_instance;
 	struct amdgpu_ring *ring;
-	struct amd_sched_rq *rq;
+	struct drm_sched_rq *rq;
 	int r, i;
 	u64 flags;
 	uint64_t init_pde_value = 0;
 
 	vm->va = RB_ROOT_CACHED;
-	vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter);
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
 		vm->reserved_vmid[i] = NULL;
 	spin_lock_init(&vm->status_lock);
@@ -2635,9 +2283,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring);
 	ring_instance %= adev->vm_manager.vm_pte_num_rings;
 	ring = adev->vm_manager.vm_pte_rings[ring_instance];
-	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
-	r = amd_sched_entity_init(&ring->sched, &vm->entity,
-				  rq, amdgpu_sched_jobs);
+	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+	r = drm_sched_entity_init(&ring->sched, &vm->entity,
+				  rq, amdgpu_sched_jobs, NULL);
 	if (r)
 		return r;
 
@@ -2670,7 +2318,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
 				AMDGPU_GEM_CREATE_SHADOW);
 
-	r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
+	r = amdgpu_bo_create(adev,
+			     amdgpu_vm_bo_size(adev, adev->vm_manager.root_level),
+			     align, true,
 			     AMDGPU_GEM_DOMAIN_VRAM,
 			     flags,
 			     NULL, NULL, init_pde_value, &vm->root.base.bo);
@@ -2716,7 +2366,7 @@ error_free_root:
 	vm->root.base.bo = NULL;
 
 error_free_sched_entity:
-	amd_sched_entity_fini(&ring->sched, &vm->entity);
+	drm_sched_entity_fini(&ring->sched, &vm->entity);
 
 	return r;
 }
@@ -2724,26 +2374,31 @@ error_free_sched_entity:
 /**
  * amdgpu_vm_free_levels - free PD/PT levels
  *
- * @level: PD/PT starting level to free
+ * @adev: amdgpu device structure
+ * @parent: PD/PT starting level to free
+ * @level: level of parent structure
  *
  * Free the page directory or page table level and all sub levels.
  */
-static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level)
+static void amdgpu_vm_free_levels(struct amdgpu_device *adev,
+				  struct amdgpu_vm_pt *parent,
+				  unsigned level)
 {
-	unsigned i;
+	unsigned i, num_entries = amdgpu_vm_num_entries(adev, level);
 
-	if (level->base.bo) {
-		list_del(&level->base.bo_list);
-		list_del(&level->base.vm_status);
-		amdgpu_bo_unref(&level->base.bo->shadow);
-		amdgpu_bo_unref(&level->base.bo);
+	if (parent->base.bo) {
+		list_del(&parent->base.bo_list);
+		list_del(&parent->base.vm_status);
+		amdgpu_bo_unref(&parent->base.bo->shadow);
+		amdgpu_bo_unref(&parent->base.bo);
 	}
 
-	if (level->entries)
-		for (i = 0; i <= level->last_entry_used; i++)
-			amdgpu_vm_free_levels(&level->entries[i]);
+	if (parent->entries)
+		for (i = 0; i < num_entries; i++)
+			amdgpu_vm_free_levels(adev, &parent->entries[i],
+					      level + 1);
 
-	kvfree(level->entries);
+	kvfree(parent->entries);
 }
 
 /**
@@ -2775,7 +2430,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
 	}
 
-	amd_sched_entity_fini(vm->entity.sched, &vm->entity);
+	drm_sched_entity_fini(vm->entity.sched, &vm->entity);
 
 	if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
 		dev_err(adev->dev, "still active bo inside vm\n");
@@ -2801,13 +2456,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	if (r) {
 		dev_err(adev->dev, "Leaking page tables because BO reservation failed\n");
 	} else {
-		amdgpu_vm_free_levels(&vm->root);
+		amdgpu_vm_free_levels(adev, &vm->root,
+				      adev->vm_manager.root_level);
 		amdgpu_bo_unreserve(root);
 	}
 	amdgpu_bo_unref(&root);
 	dma_fence_put(vm->last_update);
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
-		amdgpu_vm_free_reserved_vmid(adev, vm, i);
+		amdgpu_vmid_free_reserved(adev, vm, i);
 }
 
 /**
@@ -2826,17 +2482,21 @@ bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
 
 	spin_lock(&adev->vm_manager.pasid_lock);
 	vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
-	spin_unlock(&adev->vm_manager.pasid_lock);
-	if (!vm)
+	if (!vm) {
 		/* VM not found, can't track fault credit */
+		spin_unlock(&adev->vm_manager.pasid_lock);
 		return true;
+	}
 
 	/* No lock needed. only accessed by IRQ handler */
-	if (!vm->fault_credit)
+	if (!vm->fault_credit) {
 		/* Too many faults in this VM */
+		spin_unlock(&adev->vm_manager.pasid_lock);
 		return false;
+	}
 
 	vm->fault_credit--;
+	spin_unlock(&adev->vm_manager.pasid_lock);
 	return true;
 }
 
@@ -2849,23 +2509,9 @@ bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
  */
 void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 {
-	unsigned i, j;
-
-	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-		struct amdgpu_vm_id_manager *id_mgr =
-			&adev->vm_manager.id_mgr[i];
-
-		mutex_init(&id_mgr->lock);
-		INIT_LIST_HEAD(&id_mgr->ids_lru);
-		atomic_set(&id_mgr->reserved_vmid_num, 0);
+	unsigned i;
 
-		/* skip over VMID 0, since it is the system VM */
-		for (j = 1; j < id_mgr->num_ids; ++j) {
-			amdgpu_vm_reset_id(adev, i, j);
-			amdgpu_sync_create(&id_mgr->ids[i].active);
-			list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
-		}
-	}
+	amdgpu_vmid_mgr_init(adev);
 
 	adev->vm_manager.fence_context =
 		dma_fence_context_alloc(AMDGPU_MAX_RINGS);
@@ -2873,7 +2519,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 		adev->vm_manager.seqno[i] = 0;
 
 	atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
-	atomic64_set(&adev->vm_manager.client_counter, 0);
 	spin_lock_init(&adev->vm_manager.prt_lock);
 	atomic_set(&adev->vm_manager.num_prt_users, 0);
 
@@ -2906,24 +2551,10 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
  */
 void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
 {
-	unsigned i, j;
-
 	WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr));
 	idr_destroy(&adev->vm_manager.pasid_idr);
 
-	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-		struct amdgpu_vm_id_manager *id_mgr =
-			&adev->vm_manager.id_mgr[i];
-
-		mutex_destroy(&id_mgr->lock);
-		for (j = 0; j < AMDGPU_NUM_VM; ++j) {
-			struct amdgpu_vm_id *id = &id_mgr->ids[j];
-
-			amdgpu_sync_free(&id->active);
-			dma_fence_put(id->flushed_updates);
-			dma_fence_put(id->last_flush);
-		}
-	}
+	amdgpu_vmid_mgr_fini(adev);
 }
 
 int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
@@ -2936,13 +2567,12 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	switch (args->in.op) {
 	case AMDGPU_VM_OP_RESERVE_VMID:
 		/* current, we only have requirement to reserve vmid from gfxhub */
-		r = amdgpu_vm_alloc_reserved_vmid(adev, &fpriv->vm,
-						  AMDGPU_GFXHUB);
+		r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
 		if (r)
 			return r;
 		break;
 	case AMDGPU_VM_OP_UNRESERVE_VMID:
-		amdgpu_vm_free_reserved_vmid(adev, &fpriv->vm, AMDGPU_GFXHUB);
+		amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
 		break;
 	default:
 		return -EINVAL;
author	Linus Torvalds <torvalds@linux-foundation.org>	2018-02-01 17:48:47 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2018-02-01 17:48:47 -0800
commit	4bf772b14675411a69b3c807f73006de0fe4b649 (patch)
tree	b841e3ba0e3429695589cb0ab73871fa12f42c38 /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
parent	3879ae653a3e98380fe2daf653338830b7ca0097 (diff)
parent	24b8ef699e8221d2b7f813adaab13eec053e1507 (diff)