summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c20
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_module.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c27
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h31
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c87
-rw-r--r--include/uapi/linux/kfd_ioctl.h3
8 files changed, 179 insertions, 6 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 505d39156acd..2a4612d8437a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -117,7 +117,7 @@ static int kfd_open(struct inode *inode, struct file *filep)
return -EPERM;
}
- process = kfd_create_process(current);
+ process = kfd_create_process(filep);
if (IS_ERR(process))
return PTR_ERR(process);
@@ -206,6 +206,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
q_properties->ctx_save_restore_area_address =
args->ctx_save_restore_address;
q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
+ q_properties->ctl_stack_size = args->ctl_stack_size;
if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
@@ -1088,6 +1089,10 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
KFD_MMAP_EVENTS_MASK) {
vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK;
return kfd_event_mmap(process, vma);
+ } else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) ==
+ KFD_MMAP_RESERVED_MEM_MASK) {
+ vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK;
+ return kfd_reserved_mem_mmap(process, vma);
}
return -EFAULT;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 621a3b53a038..4f05eacca786 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -27,6 +27,7 @@
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
#include "kfd_pm4_headers_vi.h"
+#include "cwsr_trap_handler_gfx8.asm"
#define MQD_SIZE_ALIGNED 768
@@ -38,7 +39,8 @@ static const struct kfd_device_info kaveri_device_info = {
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .supports_cwsr = false,
};
static const struct kfd_device_info carrizo_device_info = {
@@ -49,7 +51,8 @@ static const struct kfd_device_info carrizo_device_info = {
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .supports_cwsr = true,
};
struct kfd_deviceid {
@@ -212,6 +215,17 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
return AMD_IOMMU_INV_PRI_RSP_INVALID;
}
+static void kfd_cwsr_init(struct kfd_dev *kfd)
+{
+ if (cwsr_enable && kfd->device_info->supports_cwsr) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
+
+ kfd->cwsr_isa = cwsr_trap_gfx8_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
+ kfd->cwsr_enabled = true;
+ }
+}
+
bool kgd2kfd_device_init(struct kfd_dev *kfd,
const struct kgd2kfd_shared_resources *gpu_resources)
{
@@ -286,6 +300,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto device_iommu_pasid_error;
}
+ kfd_cwsr_init(kfd);
+
if (kfd_resume(kfd))
goto kfd_resume_error;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e202921c150e..5c065024e285 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -173,6 +173,9 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
*allocated_vmid = qpd->vmid;
q->properties.vmid = qpd->vmid;
+ q->properties.tba_addr = qpd->tba_addr;
+ q->properties.tma_addr = qpd->tma_addr;
+
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
retval = create_compute_queue_nocpsch(dqm, q, qpd);
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
@@ -846,6 +849,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
}
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
+
+ q->properties.tba_addr = qpd->tba_addr;
+ q->properties.tma_addr = qpd->tma_addr;
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
if (retval)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index f744caeaee04..ee8adf654cd0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -50,6 +50,10 @@ module_param(sched_policy, int, 0444);
MODULE_PARM_DESC(sched_policy,
"Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
+int cwsr_enable = 1;
+module_param(cwsr_enable, int, 0444);
+MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))");
+
int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
module_param(max_num_of_queues_per_device, int, 0444);
MODULE_PARM_DESC(max_num_of_queues_per_device,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 2ba7cea7b99b..00e1f1a9728b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -89,6 +89,28 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
if (q->format == KFD_QUEUE_FORMAT_AQL)
m->cp_hqd_iq_rptr = 1;
+ if (q->tba_addr) {
+ m->compute_tba_lo = lower_32_bits(q->tba_addr >> 8);
+ m->compute_tba_hi = upper_32_bits(q->tba_addr >> 8);
+ m->compute_tma_lo = lower_32_bits(q->tma_addr >> 8);
+ m->compute_tma_hi = upper_32_bits(q->tma_addr >> 8);
+ m->compute_pgm_rsrc2 |=
+ (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
+ }
+
+ if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) {
+ m->cp_hqd_persistent_state |=
+ (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
+ m->cp_hqd_ctx_save_base_addr_lo =
+ lower_32_bits(q->ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_base_addr_hi =
+ upper_32_bits(q->ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
+ m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
+ m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
+ m->cp_hqd_wg_state_offset = q->ctl_stack_size;
+ }
+
*mqd = m;
if (gart_addr)
*gart_addr = addr;
@@ -167,6 +189,11 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
}
+ if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address)
+ m->cp_hqd_ctx_save_control =
+ atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT |
+ mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT;
+
q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 &&
q->queue_percent > 0);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 47504737ab4a..a66876467995 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -41,6 +41,7 @@
#define KFD_MMAP_DOORBELL_MASK 0x8000000000000
#define KFD_MMAP_EVENTS_MASK 0x4000000000000
+#define KFD_MMAP_RESERVED_MEM_MASK 0x2000000000000
/*
* When working with cp scheduler we should assign the HIQ manually or via
@@ -63,6 +64,15 @@
#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
/*
+ * Size of the per-process TBA+TMA buffer: 2 pages
+ *
+ * The first page is the TBA used for the CWSR ISA code. The second
+ * page is used as TMA for daisy changing a user-mode trap handler.
+ */
+#define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2)
+#define KFD_CWSR_TMA_OFFSET PAGE_SIZE
+
+/*
* Kernel module parameter to specify maximum number of supported queues per
* device
*/
@@ -78,6 +88,8 @@ extern int max_num_of_queues_per_device;
/* Kernel module parameter to specify the scheduling policy */
extern int sched_policy;
+extern int cwsr_enable;
+
/*
* Kernel module parameter to specify whether to send sigterm to HSA process on
* unhandled exception
@@ -131,6 +143,7 @@ struct kfd_device_info {
size_t ih_ring_entry_size;
uint8_t num_of_watch_points;
uint16_t mqd_size_aligned;
+ bool supports_cwsr;
};
struct kfd_mem_obj {
@@ -200,6 +213,11 @@ struct kfd_dev {
/* Debug manager */
struct kfd_dbgmgr *dbgmgr;
+
+ /* CWSR */
+ bool cwsr_enabled;
+ const void *cwsr_isa;
+ unsigned int cwsr_isa_size;
};
/* KGD2KFD callbacks */
@@ -332,6 +350,9 @@ struct queue_properties {
uint32_t eop_ring_buffer_size;
uint64_t ctx_save_restore_area_address;
uint32_t ctx_save_restore_area_size;
+ uint32_t ctl_stack_size;
+ uint64_t tba_addr;
+ uint64_t tma_addr;
};
/**
@@ -439,6 +460,11 @@ struct qcm_process_device {
uint32_t num_gws;
uint32_t num_oac;
uint32_t sh_hidden_private_base;
+
+ /* CWSR memory */
+ void *cwsr_kaddr;
+ uint64_t tba_addr;
+ uint64_t tma_addr;
};
@@ -563,7 +589,7 @@ struct amdkfd_ioctl_desc {
void kfd_process_create_wq(void);
void kfd_process_destroy_wq(void);
-struct kfd_process *kfd_create_process(const struct task_struct *);
+struct kfd_process *kfd_create_process(struct file *filep);
struct kfd_process *kfd_get_process(const struct task_struct *);
struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
@@ -577,6 +603,9 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
struct kfd_process *p);
+int kfd_reserved_mem_mmap(struct kfd_process *process,
+ struct vm_area_struct *vma);
+
/* Process device data iterator */
struct kfd_process_device *kfd_get_first_process_device_data(
struct kfd_process *p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 1bb9b2643d5a..39f4c19aaf61 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -28,6 +28,7 @@
#include <linux/amd-iommu.h>
#include <linux/notifier.h>
#include <linux/compat.h>
+#include <linux/mman.h>
struct mm_struct;
@@ -53,6 +54,8 @@ struct kfd_process_release_work {
static struct kfd_process *find_process(const struct task_struct *thread);
static struct kfd_process *create_process(const struct task_struct *thread);
+static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep);
+
void kfd_process_create_wq(void)
{
@@ -68,9 +71,10 @@ void kfd_process_destroy_wq(void)
}
}
-struct kfd_process *kfd_create_process(const struct task_struct *thread)
+struct kfd_process *kfd_create_process(struct file *filep)
{
struct kfd_process *process;
+ struct task_struct *thread = current;
if (!thread->mm)
return ERR_PTR(-EINVAL);
@@ -101,6 +105,8 @@ struct kfd_process *kfd_create_process(const struct task_struct *thread)
up_write(&thread->mm->mmap_sem);
+ kfd_process_init_cwsr(process, filep);
+
return process;
}
@@ -168,6 +174,11 @@ static void kfd_process_wq_release(struct work_struct *work)
amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
list_del(&pdd->per_device_list);
+
+ if (pdd->qpd.cwsr_kaddr)
+ free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
+ get_order(KFD_CWSR_TBA_TMA_SIZE));
+
kfree(pdd);
}
@@ -260,6 +271,46 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
.release = kfd_process_notifier_release,
};
+static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep)
+{
+ int err = 0;
+ unsigned long offset;
+ struct kfd_process_device *temp, *pdd = NULL;
+ struct kfd_dev *dev = NULL;
+ struct qcm_process_device *qpd = NULL;
+
+ mutex_lock(&p->mutex);
+ list_for_each_entry_safe(pdd, temp, &p->per_device_data,
+ per_device_list) {
+ dev = pdd->dev;
+ qpd = &pdd->qpd;
+ if (!dev->cwsr_enabled || qpd->cwsr_kaddr)
+ continue;
+ offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT;
+ qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
+ KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
+ MAP_SHARED, offset);
+
+ if (IS_ERR_VALUE(qpd->tba_addr)) {
+ pr_err("Failure to set tba address. error -%d.\n",
+ (int)qpd->tba_addr);
+ err = qpd->tba_addr;
+ qpd->tba_addr = 0;
+ qpd->cwsr_kaddr = NULL;
+ goto out;
+ }
+
+ memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
+
+ qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
+ pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
+ qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
+ }
+out:
+ mutex_unlock(&p->mutex);
+ return err;
+}
+
static struct kfd_process *create_process(const struct task_struct *thread)
{
struct kfd_process *process;
@@ -535,3 +586,37 @@ struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
return p;
}
+
+int kfd_reserved_mem_mmap(struct kfd_process *process,
+ struct vm_area_struct *vma)
+{
+ struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff);
+ struct kfd_process_device *pdd;
+ struct qcm_process_device *qpd;
+
+ if (!dev)
+ return -EINVAL;
+ if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
+ pr_err("Incorrect CWSR mapping size.\n");
+ return -EINVAL;
+ }
+
+ pdd = kfd_get_process_device_data(dev, process);
+ if (!pdd)
+ return -EINVAL;
+ qpd = &pdd->qpd;
+
+ qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(KFD_CWSR_TBA_TMA_SIZE));
+ if (!qpd->cwsr_kaddr) {
+ pr_err("Error allocating per process CWSR buffer.\n");
+ return -ENOMEM;
+ }
+
+ vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
+ | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
+ /* Mapping pages to user process */
+ return remap_pfn_range(vma, vma->vm_start,
+ PFN_DOWN(__pa(qpd->cwsr_kaddr)),
+ KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
+}
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 6e80501368ae..f7563ef2e883 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -58,7 +58,8 @@ struct kfd_ioctl_create_queue_args {
__u64 eop_buffer_address; /* to KFD */
__u64 eop_buffer_size; /* to KFD */
__u64 ctx_save_restore_address; /* to KFD */
- __u64 ctx_save_restore_size; /* to KFD */
+ __u32 ctx_save_restore_size; /* to KFD */
+ __u32 ctl_stack_size; /* to KFD */
};
struct kfd_ioctl_destroy_queue_args {