summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaíra Canal <mcanal@igalia.com>2023-11-30 13:40:40 -0300
committerMaíra Canal <mcanal@igalia.com>2023-12-01 09:47:36 -0300
commit209e8d2695ee7a67a5b0487bbd1aa75e290d0f41 (patch)
tree0fae1c2818096ad43ae26a4794ac2c982468037c
parentbae7cb5d68001a8d4ceec5964dda74bb9aab7220 (diff)
drm/v3d: Create a CPU job extension for the copy performance query job
A CPU job is a type of job that performs operations that requires CPU intervention. A copy performance query job is a job that copy the complete or partial result of a query to a buffer. In order to copy the result of a performance query to a buffer, we need to get the values from the performance monitors. So, create a user extension for the CPU job that enables the creation of a copy performance query job. This user extension will allow the creation of a CPU job that copy the results of a performance query to a BO with the possibility to indicate the availability with a availability bit. Signed-off-by: Maíra Canal <mcanal@igalia.com> Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Link: https://patchwork.freedesktop.org/patch/msgid/20231130164420.932823-19-mcanal@igalia.com
-rw-r--r--drivers/gpu/drm/v3d/v3d_drv.h1
-rw-r--r--drivers/gpu/drm/v3d/v3d_sched.c65
-rw-r--r--drivers/gpu/drm/v3d/v3d_submit.c82
-rw-r--r--include/uapi/drm/v3d_drm.h50
4 files changed, 198 insertions, 0 deletions
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index 0f7f80ad8d88..3c7d58866570 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -322,6 +322,7 @@ enum v3d_cpu_job_type {
V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY,
V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY,
V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY,
+ V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY,
};
struct v3d_timestamp_query {
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index e83d2907bc83..54015ad765c7 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -452,12 +452,77 @@ v3d_reset_performance_queries(struct v3d_cpu_job *job)
}
}
+static void
+v3d_write_performance_query_result(struct v3d_cpu_job *job, void *data, u32 query)
+{
+ struct v3d_performance_query_info *performance_query = &job->performance_query;
+ struct v3d_copy_query_results_info *copy = &job->copy;
+ struct v3d_file_priv *v3d_priv = job->base.file->driver_priv;
+ struct v3d_dev *v3d = job->base.v3d;
+ struct v3d_perfmon *perfmon;
+ u64 counter_values[V3D_PERFCNT_NUM];
+
+ for (int i = 0; i < performance_query->nperfmons; i++) {
+ perfmon = v3d_perfmon_find(v3d_priv,
+ performance_query->queries[query].kperfmon_ids[i]);
+ if (!perfmon) {
+ DRM_DEBUG("Failed to find perfmon.");
+ continue;
+ }
+
+ v3d_perfmon_stop(v3d, perfmon, true);
+
+ memcpy(&counter_values[i * DRM_V3D_MAX_PERF_COUNTERS], perfmon->values,
+ perfmon->ncounters * sizeof(u64));
+
+ v3d_perfmon_put(perfmon);
+ }
+
+ for (int i = 0; i < performance_query->ncounters; i++)
+ write_to_buffer(data, i, copy->do_64bit, counter_values[i]);
+}
+
+static void
+v3d_copy_performance_query(struct v3d_cpu_job *job)
+{
+ struct v3d_performance_query_info *performance_query = &job->performance_query;
+ struct v3d_copy_query_results_info *copy = &job->copy;
+ struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]);
+ struct dma_fence *fence;
+ bool available, write_result;
+ u8 *data;
+
+ v3d_get_bo_vaddr(bo);
+
+ data = ((u8 *)bo->vaddr) + copy->offset;
+
+ for (int i = 0; i < performance_query->count; i++) {
+ fence = drm_syncobj_fence_get(performance_query->queries[i].syncobj);
+ available = fence ? dma_fence_is_signaled(fence) : false;
+
+ write_result = available || copy->do_partial;
+ if (write_result)
+ v3d_write_performance_query_result(job, data, i);
+
+ if (copy->availability_bit)
+ write_to_buffer(data, performance_query->ncounters,
+ copy->do_64bit, available ? 1u : 0u);
+
+ data += copy->stride;
+
+ dma_fence_put(fence);
+ }
+
+ v3d_put_bo_vaddr(bo);
+}
+
static const v3d_cpu_job_fn cpu_job_function[] = {
[V3D_CPU_JOB_TYPE_INDIRECT_CSD] = v3d_rewrite_csd_job_wg_counts_from_indirect,
[V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = v3d_timestamp_query,
[V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = v3d_reset_timestamp_queries,
[V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = v3d_copy_query_results,
[V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = v3d_reset_performance_queries,
+ [V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY] = v3d_copy_performance_query,
};
static struct dma_fence *
diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c
index 20af8ae14831..d7a9da2484fd 100644
--- a/drivers/gpu/drm/v3d/v3d_submit.c
+++ b/drivers/gpu/drm/v3d/v3d_submit.c
@@ -672,6 +672,84 @@ v3d_get_cpu_reset_performance_params(struct drm_file *file_priv,
return 0;
}
+static int
+v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv,
+ struct drm_v3d_extension __user *ext,
+ struct v3d_cpu_job *job)
+{
+ u32 __user *syncs;
+ u64 __user *kperfmon_ids;
+ struct drm_v3d_copy_performance_query copy;
+
+ if (!job) {
+ DRM_DEBUG("CPU job extension was attached to a GPU job.\n");
+ return -EINVAL;
+ }
+
+ if (job->job_type) {
+ DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n");
+ return -EINVAL;
+ }
+
+ if (copy_from_user(&copy, ext, sizeof(copy)))
+ return -EFAULT;
+
+ if (copy.pad)
+ return -EINVAL;
+
+ job->job_type = V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY;
+
+ job->performance_query.queries = kvmalloc_array(copy.count,
+ sizeof(struct v3d_performance_query),
+ GFP_KERNEL);
+ if (!job->performance_query.queries)
+ return -ENOMEM;
+
+ syncs = u64_to_user_ptr(copy.syncs);
+ kperfmon_ids = u64_to_user_ptr(copy.kperfmon_ids);
+
+ for (int i = 0; i < copy.count; i++) {
+ u32 sync;
+ u64 ids;
+ u32 __user *ids_pointer;
+ u32 id;
+
+ if (copy_from_user(&sync, syncs++, sizeof(sync))) {
+ kvfree(job->performance_query.queries);
+ return -EFAULT;
+ }
+
+ job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync);
+
+ if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) {
+ kvfree(job->performance_query.queries);
+ return -EFAULT;
+ }
+
+ ids_pointer = u64_to_user_ptr(ids);
+
+ for (int j = 0; j < copy.nperfmons; j++) {
+ if (copy_from_user(&id, ids_pointer++, sizeof(id))) {
+ kvfree(job->performance_query.queries);
+ return -EFAULT;
+ }
+
+ job->performance_query.queries[i].kperfmon_ids[j] = id;
+ }
+ }
+ job->performance_query.count = copy.count;
+ job->performance_query.nperfmons = copy.nperfmons;
+ job->performance_query.ncounters = copy.ncounters;
+
+ job->copy.do_64bit = copy.do_64bit;
+ job->copy.do_partial = copy.do_partial;
+ job->copy.availability_bit = copy.availability_bit;
+ job->copy.offset = copy.offset;
+ job->copy.stride = copy.stride;
+
+ return 0;
+}
+
/* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data
* according to the extension id (name).
*/
@@ -712,6 +790,9 @@ v3d_get_extensions(struct drm_file *file_priv,
case DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY:
ret = v3d_get_cpu_reset_performance_params(file_priv, user_ext, job);
break;
+ case DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY:
+ ret = v3d_get_cpu_copy_performance_query_params(file_priv, user_ext, job);
+ break;
default:
DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id);
return -EINVAL;
@@ -1092,6 +1173,7 @@ static const unsigned int cpu_job_bo_handle_count[] = {
[V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = 1,
[V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = 2,
[V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = 0,
+ [V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY] = 1,
};
/**
diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index d609bdf29fd6..dce1835eced4 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -77,6 +77,7 @@ struct drm_v3d_extension {
#define DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY 0x04
#define DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY 0x05
#define DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY 0x06
+#define DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY 0x07
__u32 flags; /* mbz */
};
@@ -519,6 +520,52 @@ struct drm_v3d_reset_performance_query {
__u64 kperfmon_ids;
};
+/**
+ * struct drm_v3d_copy_performance_query - ioctl extension for the CPU job to copy
+ * performance query results to a buffer
+ *
+ * When an extension DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY is defined, it
+ * points to this extension to define a copy performance query submission. This
+ * CPU job will copy the performance queries results to a BO with the offset
+ * and stride defined in the extension.
+ */
+struct drm_v3d_copy_performance_query {
+ struct drm_v3d_extension base;
+
+ /* Define if should write to buffer using 64 or 32 bits */
+ __u8 do_64bit;
+
+ /* Define if it can write to buffer even if the query is not available */
+ __u8 do_partial;
+
+ /* Define if it should write availability bit to buffer */
+ __u8 availability_bit;
+
+ /* mbz */
+ __u8 pad;
+
+ /* Offset of the buffer in the BO */
+ __u32 offset;
+
+ /* Stride of the buffer in the BO */
+ __u32 stride;
+
+ /* Number of performance monitors */
+ __u32 nperfmons;
+
+ /* Number of performance counters related to this query pool */
+ __u32 ncounters;
+
+ /* Number of queries */
+ __u32 count;
+
+ /* Array of performance queries's syncobjs to indicate its availability */
+ __u64 syncs;
+
+ /* Array of u64 user-pointers that point to an array of kperfmon_ids */
+ __u64 kperfmon_ids;
+};
+
struct drm_v3d_submit_cpu {
/* Pointer to a u32 array of the BOs that are referenced by the job.
*
@@ -537,6 +584,9 @@ struct drm_v3d_submit_cpu {
*
* For DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY, it must contain no
* BOs.
+ *
+ * For DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY, it must contain one
+ * BO, where the performance queries will be written.
*/
__u64 bo_handles;