summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2020-04-03 06:09:51 +1000
committerDave Airlie <airlied@redhat.com>2020-04-03 09:07:49 +1000
commit0e7e6198af28c1573267aba1be33dd0b7fb35691 (patch)
treeec51021f5684005c7d6358b81ab7de690a3834e0
parent59e7a8cc2dcf335116d500d684bfb34d1d97a6fe (diff)
parent9431042dbc8ce490d49c7f9d5142805b6249208b (diff)
Merge branch 'ttm-transhuge' of git://people.freedesktop.org/~thomash/linux into drm-nextdrm-next-2020-04-03-1drm-next-2020-04-03
Huge page-table entries for TTM In order to reduce CPU usage [1] and in theory TLB misses this patchset enables huge- and giant page-table entries for TTM and TTM-enabled graphics drivers. Signed-off-by: Dave Airlie <airlied@redhat.com> From: Thomas Hellstrom (VMware) <thomas_os@shipmail.org> Link: https://patchwork.freedesktop.org/patch/msgid/20200325073102.6129-1-thomas_os@shipmail.org
-rw-r--r--drivers/gpu/drm/drm_file.c141
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_vm.c161
-rw-r--r--drivers/gpu/drm/vmwgfx/Makefile1
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.c13
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.h12
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c76
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_thp.c166
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c5
-rw-r--r--include/drm/drm_file.h9
-rw-r--r--include/drm/ttm/ttm_bo_api.h3
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/huge_mm.h41
-rw-r--r--include/linux/mm.h17
-rw-r--r--mm/huge_memory.c44
-rw-r--r--mm/memory.c27
16 files changed, 692 insertions, 28 deletions
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index c4c704e01961..eb009d3ab48f 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -48,6 +48,11 @@
#include "drm_internal.h"
#include "drm_legacy.h"
+#if defined(CONFIG_MMU) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
+#include <uapi/asm/mman.h>
+#include <drm/drm_vma_manager.h>
+#endif
+
/* from BKL pushdown */
DEFINE_MUTEX(drm_global_mutex);
@@ -872,3 +877,139 @@ struct file *mock_drm_getfile(struct drm_minor *minor, unsigned int flags)
return file;
}
EXPORT_SYMBOL_FOR_TESTS_ONLY(mock_drm_getfile);
+
+#ifdef CONFIG_MMU
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/*
+ * drm_addr_inflate() attempts to construct an aligned area by inflating
+ * the area size and skipping the unaligned start of the area.
+ * adapted from shmem_get_unmapped_area()
+ */
+static unsigned long drm_addr_inflate(unsigned long addr,
+ unsigned long len,
+ unsigned long pgoff,
+ unsigned long flags,
+ unsigned long huge_size)
+{
+ unsigned long offset, inflated_len;
+ unsigned long inflated_addr;
+ unsigned long inflated_offset;
+
+ offset = (pgoff << PAGE_SHIFT) & (huge_size - 1);
+ if (offset && offset + len < 2 * huge_size)
+ return addr;
+ if ((addr & (huge_size - 1)) == offset)
+ return addr;
+
+ inflated_len = len + huge_size - PAGE_SIZE;
+ if (inflated_len > TASK_SIZE)
+ return addr;
+ if (inflated_len < len)
+ return addr;
+
+ inflated_addr = current->mm->get_unmapped_area(NULL, 0, inflated_len,
+ 0, flags);
+ if (IS_ERR_VALUE(inflated_addr))
+ return addr;
+ if (inflated_addr & ~PAGE_MASK)
+ return addr;
+
+ inflated_offset = inflated_addr & (huge_size - 1);
+ inflated_addr += offset - inflated_offset;
+ if (inflated_offset > offset)
+ inflated_addr += huge_size;
+
+ if (inflated_addr > TASK_SIZE - len)
+ return addr;
+
+ return inflated_addr;
+}
+
+/**
+ * drm_get_unmapped_area() - Get an unused user-space virtual memory area
+ * suitable for huge page table entries.
+ * @file: The struct file representing the address space being mmap()'d.
+ * @uaddr: Start address suggested by user-space.
+ * @len: Length of the area.
+ * @pgoff: The page offset into the address space.
+ * @flags: mmap flags
+ * @mgr: The address space manager used by the drm driver. This argument can
+ * probably be removed at some point when all drivers use the same
+ * address space manager.
+ *
+ * This function attempts to find an unused user-space virtual memory area
+ * that can accommodate the size we want to map, and that is properly
+ * aligned to facilitate huge page table entries matching actual
+ * huge pages or huge page aligned memory in buffer objects. Buffer objects
+ * are assumed to start at huge page boundary pfns (io memory) or be
+ * populated by huge pages aligned to the start of the buffer object
+ * (system- or coherent memory). Adapted from shmem_get_unmapped_area.
+ *
+ * Return: aligned user-space address.
+ */
+unsigned long drm_get_unmapped_area(struct file *file,
+ unsigned long uaddr, unsigned long len,
+ unsigned long pgoff, unsigned long flags,
+ struct drm_vma_offset_manager *mgr)
+{
+ unsigned long addr;
+ unsigned long inflated_addr;
+ struct drm_vma_offset_node *node;
+
+ if (len > TASK_SIZE)
+ return -ENOMEM;
+
+ /*
+ * @pgoff is the file page-offset the huge page boundaries of
+ * which typically aligns to physical address huge page boundaries.
+ * That's not true for DRM, however, where physical address huge
+ * page boundaries instead are aligned with the offset from
+ * buffer object start. So adjust @pgoff to be the offset from
+ * buffer object start.
+ */
+ drm_vma_offset_lock_lookup(mgr);
+ node = drm_vma_offset_lookup_locked(mgr, pgoff, 1);
+ if (node)
+ pgoff -= node->vm_node.start;
+ drm_vma_offset_unlock_lookup(mgr);
+
+ addr = current->mm->get_unmapped_area(file, uaddr, len, pgoff, flags);
+ if (IS_ERR_VALUE(addr))
+ return addr;
+ if (addr & ~PAGE_MASK)
+ return addr;
+ if (addr > TASK_SIZE - len)
+ return addr;
+
+ if (len < HPAGE_PMD_SIZE)
+ return addr;
+ if (flags & MAP_FIXED)
+ return addr;
+ /*
+ * Our priority is to support MAP_SHARED mapped hugely;
+ * and support MAP_PRIVATE mapped hugely too, until it is COWed.
+ * But if caller specified an address hint, respect that as before.
+ */
+ if (uaddr)
+ return addr;
+
+ inflated_addr = drm_addr_inflate(addr, len, pgoff, flags,
+ HPAGE_PMD_SIZE);
+
+ if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) &&
+ len >= HPAGE_PUD_SIZE)
+ inflated_addr = drm_addr_inflate(inflated_addr, len, pgoff,
+ flags, HPAGE_PUD_SIZE);
+ return inflated_addr;
+}
+#else /* CONFIG_TRANSPARENT_HUGEPAGE */
+unsigned long drm_get_unmapped_area(struct file *file,
+ unsigned long uaddr, unsigned long len,
+ unsigned long pgoff, unsigned long flags,
+ struct drm_vma_offset_manager *mgr)
+{
+ return current->mm->get_unmapped_area(file, uaddr, len, pgoff, flags);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+EXPORT_SYMBOL_GPL(drm_get_unmapped_area);
+#endif /* CONFIG_MMU */
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 389128b8c4dd..0af14835504c 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -156,6 +156,89 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
}
EXPORT_SYMBOL(ttm_bo_vm_reserve);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/**
+ * ttm_bo_vm_insert_huge - Insert a pfn for PUD or PMD faults
+ * @vmf: Fault data
+ * @bo: The buffer object
+ * @page_offset: Page offset from bo start
+ * @fault_page_size: The size of the fault in pages.
+ * @pgprot: The page protections.
+ * Does additional checking whether it's possible to insert a PUD or PMD
+ * pfn and performs the insertion.
+ *
+ * Return: VM_FAULT_NOPAGE on successful insertion, VM_FAULT_FALLBACK if
+ * a huge fault was not possible, or on insertion error.
+ */
+static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf,
+ struct ttm_buffer_object *bo,
+ pgoff_t page_offset,
+ pgoff_t fault_page_size,
+ pgprot_t pgprot)
+{
+ pgoff_t i;
+ vm_fault_t ret;
+ unsigned long pfn;
+ pfn_t pfnt;
+ struct ttm_tt *ttm = bo->ttm;
+ bool write = vmf->flags & FAULT_FLAG_WRITE;
+
+ /* Fault should not cross bo boundary. */
+ page_offset &= ~(fault_page_size - 1);
+ if (page_offset + fault_page_size > bo->num_pages)
+ goto out_fallback;
+
+ if (bo->mem.bus.is_iomem)
+ pfn = ttm_bo_io_mem_pfn(bo, page_offset);
+ else
+ pfn = page_to_pfn(ttm->pages[page_offset]);
+
+ /* pfn must be fault_page_size aligned. */
+ if ((pfn & (fault_page_size - 1)) != 0)
+ goto out_fallback;
+
+ /* Check that memory is contiguous. */
+ if (!bo->mem.bus.is_iomem) {
+ for (i = 1; i < fault_page_size; ++i) {
+ if (page_to_pfn(ttm->pages[page_offset + i]) != pfn + i)
+ goto out_fallback;
+ }
+ } else if (bo->bdev->driver->io_mem_pfn) {
+ for (i = 1; i < fault_page_size; ++i) {
+ if (ttm_bo_io_mem_pfn(bo, page_offset + i) != pfn + i)
+ goto out_fallback;
+ }
+ }
+
+ pfnt = __pfn_to_pfn_t(pfn, PFN_DEV);
+ if (fault_page_size == (HPAGE_PMD_SIZE >> PAGE_SHIFT))
+ ret = vmf_insert_pfn_pmd_prot(vmf, pfnt, pgprot, write);
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+ else if (fault_page_size == (HPAGE_PUD_SIZE >> PAGE_SHIFT))
+ ret = vmf_insert_pfn_pud_prot(vmf, pfnt, pgprot, write);
+#endif
+ else
+ WARN_ON_ONCE(ret = VM_FAULT_FALLBACK);
+
+ if (ret != VM_FAULT_NOPAGE)
+ goto out_fallback;
+
+ return VM_FAULT_NOPAGE;
+out_fallback:
+ count_vm_event(THP_FAULT_FALLBACK);
+ return VM_FAULT_FALLBACK;
+}
+#else
+static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf,
+ struct ttm_buffer_object *bo,
+ pgoff_t page_offset,
+ pgoff_t fault_page_size,
+ pgprot_t pgprot)
+{
+ return VM_FAULT_FALLBACK;
+}
+#endif
+
/**
* ttm_bo_vm_fault_reserved - TTM fault helper
* @vmf: The struct vm_fault given as argument to the fault callback
@@ -163,6 +246,7 @@ EXPORT_SYMBOL(ttm_bo_vm_reserve);
* @num_prefault: Maximum number of prefault pages. The caller may want to
* specify this based on madvice settings and the size of the GPU object
* backed by the memory.
+ * @fault_page_size: The size of the fault in pages.
*
* This function inserts one or more page table entries pointing to the
* memory backing the buffer object, and then returns a return code
@@ -176,7 +260,8 @@ EXPORT_SYMBOL(ttm_bo_vm_reserve);
*/
vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
pgprot_t prot,
- pgoff_t num_prefault)
+ pgoff_t num_prefault,
+ pgoff_t fault_page_size)
{
struct vm_area_struct *vma = vmf->vma;
struct ttm_buffer_object *bo = vma->vm_private_data;
@@ -268,6 +353,13 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
prot = pgprot_decrypted(prot);
}
+ /* We don't prefault on huge faults. Yet. */
+ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && fault_page_size != 1) {
+ ret = ttm_bo_vm_insert_huge(vmf, bo, page_offset,
+ fault_page_size, prot);
+ goto out_io_unlock;
+ }
+
/*
* Speculatively prefault a number of pages. Only error on
* first page.
@@ -334,7 +426,7 @@ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
return ret;
prot = vma->vm_page_prot;
- ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT);
+ ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1);
if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
return ret;
@@ -344,6 +436,66 @@ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
}
EXPORT_SYMBOL(ttm_bo_vm_fault);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/**
+ * ttm_pgprot_is_wrprotecting - Is a page protection value write-protecting?
+ * @prot: The page protection value
+ *
+ * Return: true if @prot is write-protecting. false otherwise.
+ */
+static bool ttm_pgprot_is_wrprotecting(pgprot_t prot)
+{
+ /*
+ * This is meant to say "pgprot_wrprotect(prot) == prot" in a generic
+ * way. Unfortunately there is no generic pgprot_wrprotect.
+ */
+ return pte_val(pte_wrprotect(__pte(pgprot_val(prot)))) ==
+ pgprot_val(prot);
+}
+
+static vm_fault_t ttm_bo_vm_huge_fault(struct vm_fault *vmf,
+ enum page_entry_size pe_size)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ pgprot_t prot;
+ struct ttm_buffer_object *bo = vma->vm_private_data;
+ vm_fault_t ret;
+ pgoff_t fault_page_size = 0;
+ bool write = vmf->flags & FAULT_FLAG_WRITE;
+
+ switch (pe_size) {
+ case PE_SIZE_PMD:
+ fault_page_size = HPAGE_PMD_SIZE >> PAGE_SHIFT;
+ break;
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+ case PE_SIZE_PUD:
+ fault_page_size = HPAGE_PUD_SIZE >> PAGE_SHIFT;
+ break;
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ return VM_FAULT_FALLBACK;
+ }
+
+ /* Fallback on write dirty-tracking or COW */
+ if (write && ttm_pgprot_is_wrprotecting(vma->vm_page_prot))
+ return VM_FAULT_FALLBACK;
+
+ ret = ttm_bo_vm_reserve(bo, vmf);
+ if (ret)
+ return ret;
+
+ prot = vm_get_page_prot(vma->vm_flags);
+ ret = ttm_bo_vm_fault_reserved(vmf, prot, 1, fault_page_size);
+ if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+ return ret;
+
+ dma_resv_unlock(bo->base.resv);
+
+ return ret;
+}
+#endif
+
void ttm_bo_vm_open(struct vm_area_struct *vma)
{
struct ttm_buffer_object *bo = vma->vm_private_data;
@@ -445,7 +597,10 @@ static const struct vm_operations_struct ttm_bo_vm_ops = {
.fault = ttm_bo_vm_fault,
.open = ttm_bo_vm_open,
.close = ttm_bo_vm_close,
- .access = ttm_bo_vm_access
+ .access = ttm_bo_vm_access,
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ .huge_fault = ttm_bo_vm_huge_fault,
+#endif
};
static struct ttm_buffer_object *ttm_bo_vm_lookup(struct ttm_bo_device *bdev,
diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
index 5c3515e8cce1..31f85f09f1fc 100644
--- a/drivers/gpu/drm/vmwgfx/Makefile
+++ b/drivers/gpu/drm/vmwgfx/Makefile
@@ -11,4 +11,5 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
vmwgfx_validation.o vmwgfx_page_dirty.o vmwgfx_streamoutput.o \
ttm_object.o ttm_lock.o
+vmwgfx-$(CONFIG_TRANSPARENT_HUGEPAGE) += vmwgfx_thp.o
obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 71e45b568511..c2247a893ed4 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1247,6 +1247,18 @@ static void vmw_remove(struct pci_dev *pdev)
pci_disable_device(pdev);
}
+static unsigned long
+vmw_get_unmapped_area(struct file *file, unsigned long uaddr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+ struct drm_file *file_priv = file->private_data;
+ struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev);
+
+ return drm_get_unmapped_area(file, uaddr, len, pgoff, flags,
+ &dev_priv->vma_manager);
+}
+
static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val,
void *ptr)
{
@@ -1418,6 +1430,7 @@ static const struct file_operations vmwgfx_driver_fops = {
.compat_ioctl = vmw_compat_ioctl,
#endif
.llseek = noop_llseek,
+ .get_unmapped_area = vmw_get_unmapped_area,
};
static struct drm_driver driver = {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 5ddbcb9f6df4..8cdcd6e5f9e1 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -1000,6 +1000,7 @@ extern int vmw_mmap(struct file *filp, struct vm_area_struct *vma);
extern void vmw_validation_mem_init_ttm(struct vmw_private *dev_priv,
size_t gran);
+
/**
* TTM buffer object driver - vmwgfx_ttm_buffer.c
*/
@@ -1510,6 +1511,17 @@ void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
pgoff_t start, pgoff_t end);
vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf);
vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
+ enum page_entry_size pe_size);
+#endif
+
+/* Transparent hugepage support - vmwgfx_thp.c */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern const struct ttm_mem_type_manager_func vmw_thp_func;
+#else
+#define vmw_thp_func ttm_bo_manager_func
+#endif
/**
* VMW_DEBUG_KMS - Debug output for kernel mode-setting
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
index 60cfbfadd3f2..d4d66532f9c9 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
@@ -473,11 +473,11 @@ vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf)
* a lot of unnecessary write faults.
*/
if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE)
- prot = vma->vm_page_prot;
+ prot = vm_get_page_prot(vma->vm_flags & ~VM_SHARED);
else
prot = vm_get_page_prot(vma->vm_flags);
- ret = ttm_bo_vm_fault_reserved(vmf, prot, num_prefault);
+ ret = ttm_bo_vm_fault_reserved(vmf, prot, num_prefault, 1);
if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
return ret;
@@ -486,3 +486,75 @@ out_unlock:
return ret;
}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
+ enum page_entry_size pe_size)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
+ vma->vm_private_data;
+ struct vmw_buffer_object *vbo =
+ container_of(bo, struct vmw_buffer_object, base);
+ pgprot_t prot;
+ vm_fault_t ret;
+ pgoff_t fault_page_size;
+ bool write = vmf->flags & FAULT_FLAG_WRITE;
+ bool is_cow_mapping =
+ (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
+
+ switch (pe_size) {
+ case PE_SIZE_PMD:
+ fault_page_size = HPAGE_PMD_SIZE >> PAGE_SHIFT;
+ break;
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+ case PE_SIZE_PUD:
+ fault_page_size = HPAGE_PUD_SIZE >> PAGE_SHIFT;
+ break;
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ return VM_FAULT_FALLBACK;
+ }
+
+ /* Always do write dirty-tracking and COW on PTE level. */
+ if (write && (READ_ONCE(vbo->dirty) || is_cow_mapping))
+ return VM_FAULT_FALLBACK;
+
+ ret = ttm_bo_vm_reserve(bo, vmf);
+ if (ret)
+ return ret;
+
+ if (vbo->dirty) {
+ pgoff_t allowed_prefault;
+ unsigned long page_offset;
+
+ page_offset = vmf->pgoff -
+ drm_vma_node_start(&bo->base.vma_node);
+ if (page_offset >= bo->num_pages ||
+ vmw_resources_clean(vbo, page_offset,
+ page_offset + PAGE_SIZE,
+ &allowed_prefault)) {
+ ret = VM_FAULT_SIGBUS;
+ goto out_unlock;
+ }
+
+ /*
+ * Write protect, so we get a new fault on write, and can
+ * split.
+ */
+ prot = vm_get_page_prot(vma->vm_flags & ~VM_SHARED);
+ } else {
+ prot = vm_get_page_prot(vma->vm_flags);
+ }
+
+ ret = ttm_bo_vm_fault_reserved(vmf, prot, 1, fault_page_size);
+ if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+ return ret;
+
+out_unlock:
+ dma_resv_unlock(bo->base.resv);
+
+ return ret;
+}
+#endif
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c b/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c
new file mode 100644
index 000000000000..b7c816ba7166
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Huge page-table-entry support for IO memory.
+ *
+ * Copyright (C) 2007-2019 Vmware, Inc. All rights reservedd.
+ */
+#include "vmwgfx_drv.h"
+#include <drm/ttm/ttm_module.h>
+#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/ttm/ttm_placement.h>
+
+/**
+ * struct vmw_thp_manager - Range manager implementing huge page alignment
+ *
+ * @mm: The underlying range manager. Protected by @lock.
+ * @lock: Manager lock.
+ */
+struct vmw_thp_manager {
+ struct drm_mm mm;
+ spinlock_t lock;
+};
+
+static int vmw_thp_insert_aligned(struct drm_mm *mm, struct drm_mm_node *node,
+ unsigned long align_pages,
+ const struct ttm_place *place,
+ struct ttm_mem_reg *mem,
+ unsigned long lpfn,
+ enum drm_mm_insert_mode mode)
+{
+ if (align_pages >= mem->page_alignment &&
+ (!mem->page_alignment || align_pages % mem->page_alignment == 0)) {
+ return drm_mm_insert_node_in_range(mm, node,
+ mem->num_pages,
+ align_pages, 0,
+ place->fpfn, lpfn, mode);
+ }
+
+ return -ENOSPC;
+}
+
+static int vmw_thp_get_node(struct ttm_mem_type_manager *man,
+ struct ttm_buffer_object *bo,
+ const struct ttm_place *place,
+ struct ttm_mem_reg *mem)
+{
+ struct vmw_thp_manager *rman = (struct vmw_thp_manager *) man->priv;
+ struct drm_mm *mm = &rman->mm;
+ struct drm_mm_node *node;
+ unsigned long align_pages;
+ unsigned long lpfn;
+ enum drm_mm_insert_mode mode = DRM_MM_INSERT_BEST;
+ int ret;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ lpfn = place->lpfn;
+ if (!lpfn)
+ lpfn = man->size;
+
+ mode = DRM_MM_INSERT_BEST;
+ if (place->flags & TTM_PL_FLAG_TOPDOWN)
+ mode = DRM_MM_INSERT_HIGH;
+
+ spin_lock(&rman->lock);
+ if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) {
+ align_pages = (HPAGE_PUD_SIZE >> PAGE_SHIFT);
+ if (mem->num_pages >= align_pages) {
+ ret = vmw_thp_insert_aligned(mm, node, align_pages,
+ place, mem, lpfn, mode);
+ if (!ret)
+ goto found_unlock;
+ }
+ }
+
+ align_pages = (HPAGE_PMD_SIZE >> PAGE_SHIFT);
+ if (mem->num_pages >= align_pages) {
+ ret = vmw_thp_insert_aligned(mm, node, align_pages, place, mem,
+ lpfn, mode);
+ if (!ret)
+ goto found_unlock;
+ }
+
+ ret = drm_mm_insert_node_in_range(mm, node, mem->num_pages,
+ mem->page_alignment, 0,
+ place->fpfn, lpfn, mode);
+found_unlock:
+ spin_unlock(&rman->lock);
+
+ if (unlikely(ret)) {
+ kfree(node);
+ } else {
+ mem->mm_node = node;
+ mem->start = node->start;
+ }
+
+ return 0;
+}
+
+
+
+static void vmw_thp_put_node(struct ttm_mem_type_manager *man,
+ struct ttm_mem_reg *mem)
+{
+ struct vmw_thp_manager *rman = (struct vmw_thp_manager *) man->priv;
+
+ if (mem->mm_node) {
+ spin_lock(&rman->lock);
+ drm_mm_remove_node(mem->mm_node);
+ spin_unlock(&rman->lock);
+
+ kfree(mem->mm_node);
+ mem->mm_node = NULL;
+ }
+}
+
+static int vmw_thp_init(struct ttm_mem_type_manager *man,
+ unsigned long p_size)
+{
+ struct vmw_thp_manager *rman;
+
+ rman = kzalloc(sizeof(*rman), GFP_KERNEL);
+ if (!rman)
+ return -ENOMEM;
+
+ drm_mm_init(&rman->mm, 0, p_size);
+ spin_lock_init(&rman->lock);
+ man->priv = rman;
+ return 0;
+}
+
+static int vmw_thp_takedown(struct ttm_mem_type_manager *man)
+{
+ struct vmw_thp_manager *rman = (struct vmw_thp_manager *) man->priv;
+ struct drm_mm *mm = &rman->mm;
+
+ spin_lock(&rman->lock);
+ if (drm_mm_clean(mm)) {
+ drm_mm_takedown(mm);
+ spin_unlock(&rman->lock);
+ kfree(rman);
+ man->priv = NULL;
+ return 0;
+ }
+ spin_unlock(&rman->lock);
+ return -EBUSY;
+}
+
+static void vmw_thp_debug(struct ttm_mem_type_manager *man,
+ struct drm_printer *printer)
+{
+ struct vmw_thp_manager *rman = (struct vmw_thp_manager *) man->priv;
+
+ spin_lock(&rman->lock);
+ drm_mm_print(&rman->mm, printer);
+ spin_unlock(&rman->lock);
+}
+
+const struct ttm_mem_type_manager_func vmw_thp_func = {
+ .init = vmw_thp_init,
+ .takedown = vmw_thp_takedown,
+ .get_node = vmw_thp_get_node,
+ .put_node = vmw_thp_put_node,
+ .debug = vmw_thp_debug
+};
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
index 3f3b2c7a208a..bf0bc4697959 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
@@ -749,7 +749,7 @@ static int vmw_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
break;
case TTM_PL_VRAM:
/* "On-card" video ram */
- man->func = &ttm_bo_manager_func;
+ man->func = &vmw_thp_func;
man->gpu_offset = 0;
man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_MAPPABLE;
man->available_caching = TTM_PL_FLAG_CACHED;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
index aa7e50f63b94..3c03b1746661 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
@@ -34,7 +34,10 @@ int vmw_mmap(struct file *filp, struct vm_area_struct *vma)
.page_mkwrite = vmw_bo_vm_mkwrite,
.fault = vmw_bo_vm_fault,
.open = ttm_bo_vm_open,
- .close = ttm_bo_vm_close
+ .close = ttm_bo_vm_close,
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ .huge_fault = vmw_bo_vm_huge_fault,
+#endif
};
struct drm_file *file_priv = filp->private_data;
struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev);
diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
index 19df8028a6c4..5aaf1c4593a9 100644
--- a/include/drm/drm_file.h
+++ b/include/drm/drm_file.h
@@ -391,4 +391,13 @@ void drm_send_event(struct drm_device *dev, struct drm_pending_event *e);
struct file *mock_drm_getfile(struct drm_minor *minor, unsigned int flags);
+#ifdef CONFIG_MMU
+struct drm_vma_offset_manager;
+unsigned long drm_get_unmapped_area(struct file *file,
+ unsigned long uaddr, unsigned long len,
+ unsigned long pgoff, unsigned long flags,
+ struct drm_vma_offset_manager *mgr);
+#endif /* CONFIG_MMU */
+
+
#endif /* _DRM_FILE_H_ */
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index b9bc1b00142e..0a9d042e075a 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -727,7 +727,8 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
pgprot_t prot,
- pgoff_t num_prefault);
+ pgoff_t num_prefault,
+ pgoff_t fault_page_size);
vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index abedbffe2c9e..aff0c9524ff5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3392,7 +3392,7 @@ static inline bool io_is_direct(struct file *filp)
return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host);
}
-static inline bool vma_is_dax(struct vm_area_struct *vma)
+static inline bool vma_is_dax(const struct vm_area_struct *vma)
{
return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
}
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 5aca3d1bdb32..f63b0882c1b3 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -47,8 +47,45 @@ extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, pgprot_t newprot,
int prot_numa);
-vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write);
-vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write);
+vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn,
+ pgprot_t pgprot, bool write);
+
+/**
+ * vmf_insert_pfn_pmd - insert a pmd size pfn
+ * @vmf: Structure describing the fault
+ * @pfn: pfn to insert
+ * @pgprot: page protection to use
+ * @write: whether it's a write fault
+ *
+ * Insert a pmd size pfn. See vmf_insert_pfn() for additional info.
+ *
+ * Return: vm_fault_t value.
+ */
+static inline vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn,
+ bool write)
+{
+ return vmf_insert_pfn_pmd_prot(vmf, pfn, vmf->vma->vm_page_prot, write);
+}
+vm_fault_t vmf_insert_pfn_pud_prot(struct vm_fault *vmf, pfn_t pfn,
+ pgprot_t pgprot, bool write);
+
+/**
+ * vmf_insert_pfn_pud - insert a pud size pfn
+ * @vmf: Structure describing the fault
+ * @pfn: pfn to insert
+ * @pgprot: page protection to use
+ * @write: whether it's a write fault
+ *
+ * Insert a pud size pfn. See vmf_insert_pfn() for additional info.
+ *
+ * Return: vm_fault_t value.
+ */
+static inline vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn,
+ bool write)
+{
+ return vmf_insert_pfn_pud_prot(vmf, pfn, vmf->vma->vm_page_prot, write);
+}
+
enum transparent_hugepage_flag {
TRANSPARENT_HUGEPAGE_FLAG,
TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c54fb96cb1e6..bdd79a72bb42 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2867,6 +2867,23 @@ extern long copy_huge_page_from_user(struct page *dst_page,
const void __user *usr_src,
unsigned int pages_per_huge_page,
bool allow_pagefault);
+
+/**
+ * vma_is_special_huge - Are transhuge page-table entries considered special?
+ * @vma: Pointer to the struct vm_area_struct to consider
+ *
+ * Whether transhuge page-table entries are considered "special" following
+ * the definition in vm_normal_page().
+ *
+ * Return: true if transhuge page-table entries should be considered special,
+ * false otherwise.
+ */
+static inline bool vma_is_special_huge(const struct vm_area_struct *vma)
+{
+ return vma_is_dax(vma) || (vma->vm_file &&
+ (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)));
+}
+
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
#ifdef CONFIG_DEBUG_PAGEALLOC
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 24ad53b4dfc0..4036d5e0a6f3 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -824,11 +824,24 @@ out_unlock:
pte_free(mm, pgtable);
}
-vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write)
+/**
+ * vmf_insert_pfn_pmd_prot - insert a pmd size pfn
+ * @vmf: Structure describing the fault
+ * @pfn: pfn to insert
+ * @pgprot: page protection to use
+ * @write: whether it's a write fault
+ *
+ * Insert a pmd size pfn. See vmf_insert_pfn() for additional info and
+ * also consult the vmf_insert_mixed_prot() documentation when
+ * @pgprot != @vmf->vma->vm_page_prot.
+ *
+ * Return: vm_fault_t value.
+ */
+vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn,
+ pgprot_t pgprot, bool write)
{
unsigned long addr = vmf->address & PMD_MASK;
struct vm_area_struct *vma = vmf->vma;
- pgprot_t pgprot = vma->vm_page_prot;
pgtable_t pgtable = NULL;
/*
@@ -856,7 +869,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write)
insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable);
return VM_FAULT_NOPAGE;
}
-EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
+EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd_prot);
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma)
@@ -902,11 +915,24 @@ out_unlock:
spin_unlock(ptl);
}
-vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write)
+/**
+ * vmf_insert_pfn_pud_prot - insert a pud size pfn
+ * @vmf: Structure describing the fault
+ * @pfn: pfn to insert
+ * @pgprot: page protection to use
+ * @write: whether it's a write fault
+ *
+ * Insert a pud size pfn. See vmf_insert_pfn() for additional info and
+ * also consult the vmf_insert_mixed_prot() documentation when
+ * @pgprot != @vmf->vma->vm_page_prot.
+ *
+ * Return: vm_fault_t value.
+ */
+vm_fault_t vmf_insert_pfn_pud_prot(struct vm_fault *vmf, pfn_t pfn,
+ pgprot_t pgprot, bool write)
{
unsigned long addr = vmf->address & PUD_MASK;
struct vm_area_struct *vma = vmf->vma;
- pgprot_t pgprot = vma->vm_page_prot;
/*
* If we had pud_special, we could avoid all these restrictions,
@@ -927,7 +953,7 @@ vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write)
insert_pfn_pud(vma, addr, vmf->pud, pfn, pgprot, write);
return VM_FAULT_NOPAGE;
}
-EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud);
+EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud_prot);
#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
@@ -1802,7 +1828,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd,
tlb->fullmm);
tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
- if (vma_is_dax(vma)) {
+ if (vma_is_special_huge(vma)) {
if (arch_needs_pgtable_deposit())
zap_deposited_table(tlb->mm, pmd);
spin_unlock(ptl);
@@ -2066,7 +2092,7 @@ int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma,
*/
pudp_huge_get_and_clear_full(tlb->mm, addr, pud, tlb->fullmm);
tlb_remove_pud_tlb_entry(tlb, pud, addr);
- if (vma_is_dax(vma)) {
+ if (vma_is_special_huge(vma)) {
spin_unlock(ptl);
/* No zero page support yet */
} else {
@@ -2175,7 +2201,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
*/
if (arch_needs_pgtable_deposit())
zap_deposited_table(mm, pmd);
- if (vma_is_dax(vma))
+ if (vma_is_special_huge(vma))
return;
page = pmd_page(_pmd);
if (!PageDirty(page) && pmd_dirty(_pmd))
diff --git a/mm/memory.c b/mm/memory.c
index e8bfdf0d9d1d..efa59b1b109c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3951,11 +3951,14 @@ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
{
if (vma_is_anonymous(vmf->vma))
return do_huge_pmd_wp_page(vmf, orig_pmd);
- if (vmf->vma->vm_ops->huge_fault)
- return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD);
+ if (vmf->vma->vm_ops->huge_fault) {
+ vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD);
- /* COW handled on pte level: split pmd */
- VM_BUG_ON_VMA(vmf->vma->vm_flags & VM_SHARED, vmf->vma);
+ if (!(ret & VM_FAULT_FALLBACK))
+ return ret;
+ }
+
+ /* COW or write-notify handled on pte level: split pmd. */
__split_huge_pmd(vmf->vma, vmf->pmd, vmf->address, false, NULL);
return VM_FAULT_FALLBACK;
@@ -3968,12 +3971,20 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma)
static vm_fault_t create_huge_pud(struct vm_fault *vmf)
{
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
+ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
/* No support for anonymous transparent PUD pages yet */
if (vma_is_anonymous(vmf->vma))
- return VM_FAULT_FALLBACK;
- if (vmf->vma->vm_ops->huge_fault)
- return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
+ goto split;
+ if (vmf->vma->vm_ops->huge_fault) {
+ vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
+
+ if (!(ret & VM_FAULT_FALLBACK))
+ return ret;
+ }
+split:
+ /* COW or write-notify not handled on PUD level: split pud.*/
+ __split_huge_pud(vmf->vma, vmf->pud, vmf->address);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
return VM_FAULT_FALLBACK;
}