summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2015-07-06 16:18:24 +0200
committerChristian König <christian.koenig@amd.com>2015-07-06 16:18:24 +0200
commit8d0f83967344b1bdcd849c2b996d9e84b398a219 (patch)
treec096071b757fbf22d4c73216fe5844e3656df112
parent5c2d5a642449d23b1918b929d38774170f8a8f02 (diff)
parent13a58a5c87d4c01c3bf7fb084506b40170404038 (diff)
Merge branch 'amd-staging-upstream' into amd-mainline
-rw-r--r--.gitignore2
-rw-r--r--amdgpu/amdgpu.h427
-rw-r--r--amdgpu/amdgpu_bo.c137
-rw-r--r--amdgpu/amdgpu_cs.c618
-rw-r--r--amdgpu/amdgpu_device.c2
-rw-r--r--amdgpu/amdgpu_gpu_info.c31
-rw-r--r--amdgpu/amdgpu_internal.h30
-rw-r--r--include/drm/amdgpu_drm.h177
-rw-r--r--tests/amdgpu/amdgpu_test.h38
-rw-r--r--tests/amdgpu/basic_tests.c123
-rw-r--r--tests/amdgpu/cs_tests.c36
-rw-r--r--tests/amdgpu/vce_tests.c42
12 files changed, 598 insertions, 1065 deletions
diff --git a/.gitignore b/.gitignore
index b8769735..79b2ac15 100644
--- a/.gitignore
+++ b/.gitignore
@@ -55,6 +55,7 @@ libdrm_radeon.pc
libdrm_omap.pc
libdrm_exynos.pc
libdrm_freedreno.pc
+libdrm_amdgpu.pc
libkms.pc
libtool
ltmain.sh
@@ -73,6 +74,7 @@ stamp-h1
tdfx.kld
via.kld
tests/auth
+tests/amdgpu/amdgpu_test
tests/dristat
tests/drmsl
tests/drmstat
diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
index c6161bd1..9d164309 100644
--- a/amdgpu/amdgpu.h
+++ b/amdgpu/amdgpu.h
@@ -52,18 +52,15 @@ struct drm_amdgpu_info_hw_ip;
#define AMDGPU_CS_MAX_IBS_PER_SUBMIT 4
/**
- *
+ * Special timeout value meaning that the timeout is infinite.
*/
#define AMDGPU_TIMEOUT_INFINITE 0xffffffffffffffffull
/**
- * The special flag to mark that this IB will re-used
- * by client and should not be automatically return back
- * to free pool by libdrm_amdgpu when submission is completed.
- *
- * \sa amdgpu_cs_ib_info
-*/
-#define AMDGPU_CS_REUSE_IB 0x2
+ * Used in amdgpu_cs_query_fence::flags, meaning that the given timeout
+ * is absolute.
+ */
+#define AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE (1 << 0)
/*--------------------------------------------------------------------------*/
/* ----------------------------- Enums ------------------------------------ */
@@ -86,23 +83,6 @@ enum amdgpu_bo_handle_type {
amdgpu_bo_handle_type_dma_buf_fd = 2
};
-/**
- * For performance reasons and to simplify logic libdrm_amdgpu will handle
- * IBs only some pre-defined sizes.
- *
- * \sa amdgpu_cs_alloc_ib()
- */
-enum amdgpu_cs_ib_size {
- amdgpu_cs_ib_size_4K = 0,
- amdgpu_cs_ib_size_16K = 1,
- amdgpu_cs_ib_size_32K = 2,
- amdgpu_cs_ib_size_64K = 3,
- amdgpu_cs_ib_size_128K = 4
-};
-
-/** The number of different IB sizes */
-#define AMDGPU_CS_IB_SIZE_NUM 5
-
/*--------------------------------------------------------------------------*/
/* -------------------------- Datatypes ----------------------------------- */
@@ -133,13 +113,6 @@ typedef struct amdgpu_bo *amdgpu_bo_handle;
*/
typedef struct amdgpu_bo_list *amdgpu_bo_list_handle;
-/**
- * Define handle to be used when dealing with command
- * buffers (a.k.a. ibs)
- *
- */
-typedef struct amdgpu_ib *amdgpu_ib_handle;
-
/*--------------------------------------------------------------------------*/
/* -------------------------- Structures ---------------------------------- */
@@ -255,7 +228,7 @@ struct amdgpu_bo_info {
*/
struct amdgpu_bo_import_result {
/** Handle of memory/buffer to use */
- amdgpu_bo_handle buf_handle;
+ amdgpu_bo_handle buf_handle;
/** Buffer size */
uint64_t alloc_size;
@@ -264,7 +237,6 @@ struct amdgpu_bo_import_result {
uint64_t virtual_mc_base_address;
};
-
/**
*
* Structure to describe GDS partitioning information.
@@ -274,52 +246,36 @@ struct amdgpu_bo_import_result {
*
*/
struct amdgpu_gds_resource_info {
- uint32_t gds_gfx_partition_size;
- uint32_t compute_partition_size;
- uint32_t gds_total_size;
- uint32_t gws_per_gfx_partition;
- uint32_t gws_per_compute_partition;
- uint32_t oa_per_gfx_partition;
- uint32_t oa_per_compute_partition;
+ uint32_t gds_gfx_partition_size;
+ uint32_t compute_partition_size;
+ uint32_t gds_total_size;
+ uint32_t gws_per_gfx_partition;
+ uint32_t gws_per_compute_partition;
+ uint32_t oa_per_gfx_partition;
+ uint32_t oa_per_compute_partition;
};
-
-
/**
- * Structure describing result of request to allocate GDS
+ * Structure describing CS dependency
*
- * \sa amdgpu_gpu_resource_gds_alloc
+ * \sa amdgpu_cs_request, amdgpu_cs_submit()
*
*/
-struct amdgpu_gds_alloc_info {
- /** Handle assigned to gds allocation */
- amdgpu_bo_handle resource_handle;
+struct amdgpu_cs_dep_info {
+ /** Context to which the fence belongs */
+ amdgpu_context_handle context;
- /** How much was really allocated */
- uint32_t gds_memory_size;
-
- /** Number of GWS resources allocated */
- uint32_t gws;
-
- /** Number of OA resources allocated */
- uint32_t oa;
-};
+ /** To which HW IP type the fence belongs */
+ uint32_t ip_type;
-/**
- * Structure to described allocated command buffer (a.k.a. IB)
- *
- * \sa amdgpu_cs_alloc_ib()
- *
-*/
-struct amdgpu_cs_ib_alloc_result {
- /** IB allocation handle */
- amdgpu_ib_handle handle;
+ /** IP instance index if there are several IPs of the same type. */
+ uint32_t ip_instance;
- /** Assigned GPU VM MC Address of command buffer */
- uint64_t mc_address;
+ /** Ring index of the HW IP */
+ uint32_t ring;
- /** Address to be used for CPU access */
- void *cpu;
+ /** Specify fence for which we need to check submission status.*/
+ uint64_t fence;
};
/**
@@ -330,21 +286,17 @@ struct amdgpu_cs_ib_alloc_result {
*/
struct amdgpu_cs_ib_info {
/** Special flags */
- uint64_t flags;
+ uint64_t flags;
- /** Handle of command buffer */
- amdgpu_ib_handle ib_handle;
+ /** Virtual MC address of the command buffer */
+ uint64_t ib_mc_address;
/**
* Size of Command Buffer to be submitted.
* - The size is in units of dwords (4 bytes).
- * - Must be less or equal to the size of allocated IB
* - Could be 0
*/
- uint32_t size;
-
- /** Offset in the IB buffer object (in unit of dwords) */
- uint32_t offset_dw;
+ uint32_t size;
};
/**
@@ -356,25 +308,37 @@ struct amdgpu_cs_ib_info {
*/
struct amdgpu_cs_request {
/** Specify flags with additional information */
- uint64_t flags;
+ uint64_t flags;
/** Specify HW IP block type to which to send the IB. */
- unsigned ip_type;
+ unsigned ip_type;
/** IP instance index if there are several IPs of the same type. */
- unsigned ip_instance;
+ unsigned ip_instance;
/**
* Specify ring index of the IP. We could have several rings
* in the same IP. E.g. 0 for SDMA0 and 1 for SDMA1.
*/
- uint32_t ring;
+ uint32_t ring;
/**
* List handle with resources used by this request.
*/
amdgpu_bo_list_handle resources;
+ /**
+ * Number of dependencies this Command submission needs to
+ * wait for before starting execution.
+ */
+ uint32_t number_of_dependencies;
+
+ /**
+ * Array of dependencies which need to be met before
+ * execution can start.
+ */
+ struct amdgpu_cs_dep_info *dependencies;
+
/** Number of IBs to submit in the field ibs. */
uint32_t number_of_ibs;
@@ -393,26 +357,25 @@ struct amdgpu_cs_request {
struct amdgpu_cs_query_fence {
/** In which context IB was sent to execution */
- amdgpu_context_handle context;
+ amdgpu_context_handle context;
/** Timeout in nanoseconds. */
- uint64_t timeout_ns;
+ uint64_t timeout_ns;
/** To which HW IP type the fence belongs */
- unsigned ip_type;
+ unsigned ip_type;
/** IP instance index if there are several IPs of the same type. */
unsigned ip_instance;
/** Ring index of the HW IP */
- uint32_t ring;
+ uint32_t ring;
/** Flags */
- uint64_t flags;
+ uint64_t flags;
- /** Specify fence for which we need to check
- * submission status.*/
- uint64_t fence;
+ /** Specify fence for which we need to check submission status.*/
+ uint64_t fence;
};
/**
@@ -432,7 +395,6 @@ struct amdgpu_buffer_size_alignments {
uint64_t size_remote;
};
-
/**
* Structure which provide information about heap
*
@@ -441,7 +403,7 @@ struct amdgpu_buffer_size_alignments {
*/
struct amdgpu_heap_info {
/** Theoretical max. available memory in the given heap */
- uint64_t heap_size;
+ uint64_t heap_size;
/**
* Number of bytes allocated in the heap. This includes all processes
@@ -449,17 +411,15 @@ struct amdgpu_heap_info {
* are allocated, freed, and moved. It cannot be larger than
* heap_size.
*/
- uint64_t heap_usage;
+ uint64_t heap_usage;
/**
* Theoretical possible max. size of buffer which
* could be allocated in the given heap
*/
- uint64_t max_allocation;
+ uint64_t max_allocation;
};
-
-
/**
* Describe GPU h/w info needed for UMD correct initialization
*
@@ -468,7 +428,7 @@ struct amdgpu_heap_info {
struct amdgpu_gpu_info {
/** Asic id */
uint32_t asic_id;
- /**< Chip revision */
+ /** Chip revision */
uint32_t chip_rev;
/** Chip external revision */
uint32_t chip_external_rev;
@@ -478,6 +438,8 @@ struct amdgpu_gpu_info {
uint64_t ids_flags;
/** max engine clock*/
uint64_t max_engine_clk;
+ /** max memory clock */
+ uint64_t max_memory_clk;
/** number of shader engines */
uint32_t num_shader_engines;
/** number of shader arrays per engine */
@@ -514,6 +476,12 @@ struct amdgpu_gpu_info {
uint32_t cu_active_number;
uint32_t cu_ao_mask;
uint32_t cu_bitmap[4][4];
+ /* video memory type info*/
+ uint32_t vram_type;
+ /* video memory bit width*/
+ uint32_t vram_bit_width;
+ /** constant engine ram size*/
+ uint32_t ce_ram_size;
};
@@ -526,14 +494,14 @@ struct amdgpu_gpu_info {
*
*/
-
/**
*
* \param fd - \c [in] File descriptor for AMD GPU device
* received previously as the result of
* e.g. drmOpen() call.
- * For legacy fd type, the DRI2/DRI3 authentication
- * should be done before calling this function.
+ * For legacy fd type, the DRI2/DRI3
+ * authentication should be done before
+ * calling this function.
* \param major_version - \c [out] Major version of library. It is assumed
* that adding new functionality will cause
* increase in major version
@@ -544,7 +512,6 @@ struct amdgpu_gpu_info {
*
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*
*
@@ -555,8 +522,6 @@ int amdgpu_device_initialize(int fd,
uint32_t *minor_version,
amdgpu_device_handle *device_handle);
-
-
/**
*
* When access to such library does not needed any more the special
@@ -569,7 +534,6 @@ int amdgpu_device_initialize(int fd,
* result e.g. of drmOpen() call.
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*
* \sa amdgpu_device_initialize()
@@ -577,7 +541,6 @@ int amdgpu_device_initialize(int fd,
*/
int amdgpu_device_deinitialize(amdgpu_device_handle device_handle);
-
/*
* Memory Management
*
@@ -594,7 +557,6 @@ int amdgpu_device_deinitialize(amdgpu_device_handle device_handle);
* information about allocated memory
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*
* \sa amdgpu_bo_free()
@@ -611,7 +573,6 @@ int amdgpu_bo_alloc(amdgpu_device_handle dev,
* \param info - \c [in] Metadata to associated with buffer
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*/
int amdgpu_bo_set_metadata(amdgpu_bo_handle buf_handle,
@@ -627,7 +588,6 @@ int amdgpu_bo_set_metadata(amdgpu_bo_handle buf_handle,
* \param info - \c [out] Structure describing buffer
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*
* \sa amdgpu_bo_set_metadata(), amdgpu_bo_alloc()
@@ -645,7 +605,6 @@ int amdgpu_bo_query_info(amdgpu_bo_handle buf_handle,
* \param shared_handle - \c [out] Special "shared" handle
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*
* \sa amdgpu_bo_import()
@@ -667,7 +626,6 @@ int amdgpu_bo_export(amdgpu_bo_handle buf_handle,
* about imported buffer
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*
* \note Buffer must be "imported" only using new "fd" (different from
@@ -682,13 +640,47 @@ int amdgpu_bo_import(amdgpu_device_handle dev,
struct amdgpu_bo_import_result *output);
/**
+ * Request GPU access to user allocated memory e.g. via "malloc"
+ *
+ * \param dev - [in] Device handle. See #amdgpu_device_initialize()
+ * \param cpu - [in] CPU address of user allocated memory which we
+ * want to map to GPU address space (make GPU accessible)
+ * (This address must be correctly aligned).
+ * \param size - [in] Size of allocation (must be correctly aligned)
+ * \param amdgpu_bo_alloc_result - [out] Handle of allocation to be passed as
+ * resource on submission and be used in other operations.
+ *
+ *
+ * \return 0 on success\n
+ * <0 - Negative POSIX Error code
+ *
+ * \note
+ * This call doesn't guarantee that such memory will be persistently
+ * "locked" / make non-pageable. The purpose of this call is to provide
+ * opportunity for GPU get access to this resource during submission.
+ *
+ * The maximum amount of memory which could be mapped in this call depends
+ * if overcommit is disabled or not. If overcommit is disabled than the max.
+ * amount of memory to be pinned will be limited by left "free" size in total
+ * amount of memory which could be locked simultaneously ("GART" size).
+ *
+ * Supported (theoretical) max. size of mapping is restricted only by
+ * "GART" size.
+ *
+ * It is responsibility of caller to correctly specify access rights
+ * on VA assignment.
+*/
+int amdgpu_create_bo_from_user_mem(amdgpu_device_handle dev,
+ void *cpu, uint64_t size,
+ struct amdgpu_bo_alloc_result *info);
+
+/**
* Free previosuly allocated memory
*
* \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
* \param buf_handle - \c [in] Buffer handle to free
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*
* \note In the case of memory shared between different applications all
@@ -709,7 +701,6 @@ int amdgpu_bo_free(amdgpu_bo_handle buf_handle);
* \param cpu - \c [out] CPU address to be used for access
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*
* \sa amdgpu_bo_cpu_unmap()
@@ -723,7 +714,6 @@ int amdgpu_bo_cpu_map(amdgpu_bo_handle buf_handle, void **cpu);
* \param buf_handle - \c [in] Buffer handle
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*
* \sa amdgpu_bo_cpu_map()
@@ -731,7 +721,6 @@ int amdgpu_bo_cpu_map(amdgpu_bo_handle buf_handle, void **cpu);
*/
int amdgpu_bo_cpu_unmap(amdgpu_bo_handle buf_handle);
-
/**
* Wait until a buffer is not used by the device.
*
@@ -743,7 +732,7 @@ int amdgpu_bo_cpu_unmap(amdgpu_bo_handle buf_handle);
* 1 GPU access is in fly or scheduled
*
* \return 0 - on success
- * <0 - AMD specific error code
+ * <0 - Negative POSIX Error code
*/
int amdgpu_bo_wait_for_idle(amdgpu_bo_handle buf_handle,
uint64_t timeout_ns,
@@ -760,7 +749,6 @@ int amdgpu_bo_wait_for_idle(amdgpu_bo_handle buf_handle,
* \param result - \c [out] Created BO list handle
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*
* \sa amdgpu_bo_list_destroy()
@@ -777,7 +765,6 @@ int amdgpu_bo_list_create(amdgpu_device_handle dev,
* \param handle - \c [in] BO list handle.
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*
* \sa amdgpu_bo_list_create()
@@ -793,7 +780,6 @@ int amdgpu_bo_list_destroy(amdgpu_bo_list_handle handle);
* \param resource_prios - \c [in] Optional priority for each handle
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*
* \sa amdgpu_bo_list_update()
@@ -804,67 +790,6 @@ int amdgpu_bo_list_update(amdgpu_bo_list_handle handle,
uint8_t *resource_prios);
/*
- * Special GPU Resources
- *
-*/
-
-
-
-/**
- * Query information about GDS
- *
- * \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
- * \param gds_info - \c [out] Pointer to structure to get GDS information
- *
- * \return 0 on success\n
- * >0 - AMD specific error code\n
- * <0 - Negative POSIX Error code
- *
-*/
-int amdgpu_gpu_resource_query_gds_info(amdgpu_device_handle dev,
- struct amdgpu_gds_resource_info *
- gds_info);
-
-
-/**
- * Allocate GDS partitions
- *
- * \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
- * \param gds_size - \c [in] Size of gds allocation. Must be aligned
- * accordingly.
- * \param alloc_info - \c [out] Pointer to structure to receive information
- * about allocation
- *
- * \return 0 on success\n
- * >0 - AMD specific error code\n
- * <0 - Negative POSIX Error code
- *
- *
-*/
-int amdgpu_gpu_resource_gds_alloc(amdgpu_device_handle dev,
- uint32_t gds_size,
- struct amdgpu_gds_alloc_info *alloc_info);
-
-
-
-
-/**
- * Release GDS resource. When GDS and associated resources not needed any
- * more UMD should free them
- *
- * \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
- * \param handle - \c [in] Handle assigned to GDS allocation
- *
- * \return 0 on success\n
- * >0 - AMD specific error code\n
- * <0 - Negative POSIX Error code
- *
-*/
-int amdgpu_gpu_resource_gds_free(amdgpu_bo_handle handle);
-
-
-
-/*
* GPU Execution context
*
*/
@@ -883,7 +808,6 @@ int amdgpu_gpu_resource_gds_free(amdgpu_bo_handle handle);
* \param context - \c [out] GPU Context handle
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*
* \sa amdgpu_cs_ctx_free()
@@ -899,7 +823,6 @@ int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
* \param context - \c [in] GPU Context handle
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*
* \sa amdgpu_cs_ctx_create()
@@ -915,7 +838,6 @@ int amdgpu_cs_ctx_free(amdgpu_context_handle context);
* \param hangs - \c [out] Number of hangs caused by the context.
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*
* \sa amdgpu_cs_ctx_create()
@@ -924,51 +846,11 @@ int amdgpu_cs_ctx_free(amdgpu_context_handle context);
int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
uint32_t *state, uint32_t *hangs);
-
/*
* Command Buffers Management
*
*/
-
-/**
- * Allocate memory to be filled with PM4 packets and be served as the first
- * entry point of execution (a.k.a. Indirect Buffer)
- *
- * \param context - \c [in] GPU Context which will use IB
- * \param ib_size - \c [in] Size of allocation
- * \param output - \c [out] Pointer to structure to get information about
- * allocated IB
- *
- * \return 0 on success\n
- * >0 - AMD specific error code\n
- * <0 - Negative POSIX Error code
- *
- * \sa amdgpu_cs_free_ib()
- *
-*/
-int amdgpu_cs_alloc_ib(amdgpu_context_handle context,
- enum amdgpu_cs_ib_size ib_size,
- struct amdgpu_cs_ib_alloc_result *output);
-
-/**
- * If UMD has allocates IBs which doesn’t need any more than those IBs must
- * be explicitly freed
- *
- * \param handle - \c [in] IB handle
- *
- * \return 0 on success\n
- * >0 - AMD specific error code\n
- * <0 - Negative POSIX Error code
- *
- * \note Libdrm_amdgpu will guarantee that it will correctly detect when it
- * is safe to return IB to free pool
- *
- * \sa amdgpu_cs_alloc_ib()
- *
-*/
-int amdgpu_cs_free_ib(amdgpu_ib_handle handle);
-
/**
* Send request to submit command buffers to hardware.
*
@@ -996,17 +878,8 @@ int amdgpu_cs_free_ib(amdgpu_ib_handle handle);
* submission request
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*
- * \note It is assumed that by default IB will be returned to free pool
- * automatically by libdrm_amdgpu when submission will completed.
- * It is possible for UMD to make decision to re-use the same IB in
- * this case it should be explicitly freed.\n
- * Accordingly, by default, after submission UMD should not touch passed
- * IBs. If UMD needs to re-use IB then the special flag AMDGPU_CS_REUSE_IB
- * must be passed.
- *
* \note It is required to pass correct resource list with buffer handles
* which will be accessible by command buffers from submission
* This will allow kernel driver to correctly implement "paging".
@@ -1032,7 +905,6 @@ int amdgpu_cs_submit(amdgpu_context_handle context,
* !0 - otherwise
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*
* \note If UMD wants only to check operation status and returned immediately
@@ -1045,13 +917,11 @@ int amdgpu_cs_submit(amdgpu_context_handle context,
int amdgpu_cs_query_fence_status(struct amdgpu_cs_query_fence *fence,
uint32_t *expired);
-
/*
* Query / Info API
*
*/
-
/**
* Query allocation size alignments
*
@@ -1064,15 +934,12 @@ int amdgpu_cs_query_fence_status(struct amdgpu_cs_query_fence *fence,
* requirements
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*
*/
int amdgpu_query_buffer_size_alignment(amdgpu_device_handle dev,
- struct amdgpu_buffer_size_alignments
- *info);
-
-
+ struct amdgpu_buffer_size_alignments
+ *info);
/**
* Query firmware versions
@@ -1085,7 +952,6 @@ int amdgpu_query_buffer_size_alignment(amdgpu_device_handle dev,
* \param feature - \c [out] Pointer to to the "feature" return value
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*
*/
@@ -1093,8 +959,6 @@ int amdgpu_query_firmware_version(amdgpu_device_handle dev, unsigned fw_type,
unsigned ip_instance, unsigned index,
uint32_t *version, uint32_t *feature);
-
-
/**
* Query the number of HW IP instances of a certain type.
*
@@ -1103,14 +967,11 @@ int amdgpu_query_firmware_version(amdgpu_device_handle dev, unsigned fw_type,
* \param count - \c [out] Pointer to structure to get information
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*/
int amdgpu_query_hw_ip_count(amdgpu_device_handle dev, unsigned type,
uint32_t *count);
-
-
/**
* Query engine information
*
@@ -1123,16 +984,12 @@ int amdgpu_query_hw_ip_count(amdgpu_device_handle dev, unsigned type,
* \param info - \c [out] Pointer to structure to get information
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*/
int amdgpu_query_hw_ip_info(amdgpu_device_handle dev, unsigned type,
unsigned ip_instance,
struct drm_amdgpu_info_hw_ip *info);
-
-
-
/**
* Query heap information
*
@@ -1144,16 +1001,11 @@ int amdgpu_query_hw_ip_info(amdgpu_device_handle dev, unsigned type,
* \param info - \c [in] Pointer to structure to get needed information
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*
*/
-int amdgpu_query_heap_info(amdgpu_device_handle dev,
- uint32_t heap,
- uint32_t flags,
- struct amdgpu_heap_info *info);
-
-
+int amdgpu_query_heap_info(amdgpu_device_handle dev, uint32_t heap,
+ uint32_t flags, struct amdgpu_heap_info *info);
/**
* Get the CRTC ID from the mode object ID
@@ -1163,15 +1015,12 @@ int amdgpu_query_heap_info(amdgpu_device_handle dev,
* \param result - \c [in] Pointer to the CRTC ID
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*
*/
int amdgpu_query_crtc_from_id(amdgpu_device_handle dev, unsigned id,
int32_t *result);
-
-
/**
* Query GPU H/w Info
*
@@ -1182,15 +1031,12 @@ int amdgpu_query_crtc_from_id(amdgpu_device_handle dev, unsigned id,
* \param info - \c [in] Pointer to structure to get needed information
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX Error code
*
*/
int amdgpu_query_gpu_info(amdgpu_device_handle dev,
struct amdgpu_gpu_info *info);
-
-
/**
* Query hardware or driver information.
*
@@ -1203,14 +1049,24 @@ int amdgpu_query_gpu_info(amdgpu_device_handle dev,
* \param value - \c [out] Pointer to the return value.
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX error code
*
*/
int amdgpu_query_info(amdgpu_device_handle dev, unsigned info_id,
unsigned size, void *value);
-
+/**
+ * Query information about GDS
+ *
+ * \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
+ * \param gds_info - \c [out] Pointer to structure to get GDS information
+ *
+ * \return 0 on success\n
+ * <0 - Negative POSIX Error code
+ *
+*/
+int amdgpu_query_gds_info(amdgpu_device_handle dev,
+ struct amdgpu_gds_resource_info *gds_info);
/**
* Read a set of consecutive memory-mapped registers.
@@ -1226,7 +1082,6 @@ int amdgpu_query_info(amdgpu_device_handle dev, unsigned info_id,
* \param values - \c [out] The pointer to return values.
*
* \return 0 on success\n
- * >0 - AMD specific error code\n
* <0 - Negative POSIX error code
*
*/
@@ -1234,46 +1089,4 @@ int amdgpu_read_mm_registers(amdgpu_device_handle dev, unsigned dword_offset,
unsigned count, uint32_t instance, uint32_t flags,
uint32_t *values);
-
-
-/**
- * Request GPU access to user allocated memory e.g. via "malloc"
- *
- * \param dev - [in] Device handle. See #amdgpu_device_initialize()
- * \param cpu - [in] CPU address of user allocated memory which we
- * want to map to GPU address space (make GPU accessible)
- * (This address must be correctly aligned).
- * \param size - [in] Size of allocation (must be correctly aligned)
- * \param amdgpu_bo_alloc_result - [out] Handle of allocation to be passed as resource
- * on submission and be used in other operations.(e.g. for VA submission)
- * ( Temporally defined amdgpu_bo_alloc_result as parameter for return mc address. )
- *
- *
- * \return 0 on success
- * >0 - AMD specific error code
- * <0 - Negative POSIX Error code
- *
- *
- * \note
- * This call doesn't guarantee that such memory will be persistently
- * "locked" / make non-pageable. The purpose of this call is to provide
- * opportunity for GPU get access to this resource during submission.
- *
- * The maximum amount of memory which could be mapped in this call depends
- * if overcommit is disabled or not. If overcommit is disabled than the max.
- * amount of memory to be pinned will be limited by left "free" size in total
- * amount of memory which could be locked simultaneously ("GART" size).
- *
- * Supported (theoretical) max. size of mapping is restricted only by
- * "GART" size.
- *
- * It is responsibility of caller to correctly specify access rights
- * on VA assignment.
-*/
-int amdgpu_create_bo_from_user_mem(amdgpu_device_handle dev,
- void *cpu,
- uint64_t size,
- struct amdgpu_bo_alloc_result *info);
-
-
#endif /* #ifdef _AMDGPU_H_ */
diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c
index b34abb85..fae1003b 100644
--- a/amdgpu/amdgpu_bo.c
+++ b/amdgpu/amdgpu_bo.c
@@ -27,6 +27,7 @@
#include <stdlib.h>
#include <stdio.h>
+#include <stdint.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
@@ -51,35 +52,11 @@ static void amdgpu_close_kms_handle(amdgpu_device_handle dev,
drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &args);
}
-void amdgpu_bo_free_internal(amdgpu_bo_handle bo)
-{
- /* Remove the buffer from the hash tables. */
- pthread_mutex_lock(&bo->dev->bo_table_mutex);
- util_hash_table_remove(bo->dev->bo_handles,
- (void*)(uintptr_t)bo->handle);
- if (bo->flink_name) {
- util_hash_table_remove(bo->dev->bo_flink_names,
- (void*)(uintptr_t)bo->flink_name);
- }
- pthread_mutex_unlock(&bo->dev->bo_table_mutex);
-
- /* Release CPU access. */
- if (bo->cpu_map_count > 0) {
- bo->cpu_map_count = 1;
- amdgpu_bo_cpu_unmap(bo);
- }
-
- amdgpu_close_kms_handle(bo->dev, bo->handle);
- pthread_mutex_destroy(&bo->cpu_access_mutex);
- amdgpu_vamgr_free_va(bo->dev->vamgr, bo->virtual_mc_base_address, bo->alloc_size);
- free(bo);
-}
-
/* map the buffer to the GPU virtual address space */
static int amdgpu_bo_map(amdgpu_bo_handle bo, uint32_t alignment)
{
amdgpu_device_handle dev = bo->dev;
- union drm_amdgpu_gem_va va;
+ struct drm_amdgpu_gem_va va;
int r;
memset(&va, 0, sizeof(va));
@@ -90,17 +67,17 @@ static int amdgpu_bo_map(amdgpu_bo_handle bo, uint32_t alignment)
if (bo->virtual_mc_base_address == AMDGPU_INVALID_VA_ADDRESS)
return -ENOSPC;
- va.in.handle = bo->handle;
- va.in.operation = AMDGPU_VA_OP_MAP;
- va.in.flags = AMDGPU_VM_PAGE_READABLE |
+ va.handle = bo->handle;
+ va.operation = AMDGPU_VA_OP_MAP;
+ va.flags = AMDGPU_VM_PAGE_READABLE |
AMDGPU_VM_PAGE_WRITEABLE |
AMDGPU_VM_PAGE_EXECUTABLE;
- va.in.va_address = bo->virtual_mc_base_address;
- va.in.offset_in_bo = 0;
- va.in.map_size = ALIGN(bo->alloc_size, getpagesize());
+ va.va_address = bo->virtual_mc_base_address;
+ va.offset_in_bo = 0;
+ va.map_size = ALIGN(bo->alloc_size, getpagesize());
r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_GEM_VA, &va, sizeof(va));
- if (r || va.out.result == AMDGPU_VA_RESULT_ERROR) {
+ if (r) {
amdgpu_bo_free_internal(bo);
return r;
}
@@ -108,6 +85,63 @@ static int amdgpu_bo_map(amdgpu_bo_handle bo, uint32_t alignment)
return 0;
}
+/* unmap the buffer from the GPU virtual address space */
+static void amdgpu_bo_unmap(amdgpu_bo_handle bo)
+{
+ amdgpu_device_handle dev = bo->dev;
+ struct drm_amdgpu_gem_va va;
+ int r;
+
+ if (bo->virtual_mc_base_address == AMDGPU_INVALID_VA_ADDRESS)
+ return;
+
+ memset(&va, 0, sizeof(va));
+
+ va.handle = bo->handle;
+ va.operation = AMDGPU_VA_OP_UNMAP;
+ va.flags = AMDGPU_VM_PAGE_READABLE |
+ AMDGPU_VM_PAGE_WRITEABLE |
+ AMDGPU_VM_PAGE_EXECUTABLE;
+ va.va_address = bo->virtual_mc_base_address;
+ va.offset_in_bo = 0;
+ va.map_size = ALIGN(bo->alloc_size, getpagesize());
+
+ r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_GEM_VA, &va, sizeof(va));
+ if (r) {
+ fprintf(stderr, "amdgpu: VA_OP_UNMAP failed with %d\n", r);
+ return;
+ }
+
+ amdgpu_vamgr_free_va(bo->dev->vamgr, bo->virtual_mc_base_address,
+ bo->alloc_size);
+
+ bo->virtual_mc_base_address = AMDGPU_INVALID_VA_ADDRESS;
+}
+
+void amdgpu_bo_free_internal(amdgpu_bo_handle bo)
+{
+ /* Remove the buffer from the hash tables. */
+ pthread_mutex_lock(&bo->dev->bo_table_mutex);
+ util_hash_table_remove(bo->dev->bo_handles,
+ (void*)(uintptr_t)bo->handle);
+ if (bo->flink_name) {
+ util_hash_table_remove(bo->dev->bo_flink_names,
+ (void*)(uintptr_t)bo->flink_name);
+ }
+ pthread_mutex_unlock(&bo->dev->bo_table_mutex);
+
+ /* Release CPU access. */
+ if (bo->cpu_map_count > 0) {
+ bo->cpu_map_count = 1;
+ amdgpu_bo_cpu_unmap(bo);
+ }
+
+ amdgpu_bo_unmap(bo);
+ amdgpu_close_kms_handle(bo->dev, bo->handle);
+ pthread_mutex_destroy(&bo->cpu_access_mutex);
+ free(bo);
+}
+
int amdgpu_bo_alloc(amdgpu_device_handle dev,
struct amdgpu_bo_alloc_request *alloc_buffer,
struct amdgpu_bo_alloc_result *info)
@@ -191,6 +225,10 @@ int amdgpu_bo_query_info(amdgpu_bo_handle bo,
struct drm_amdgpu_gem_op gem_op = {};
int r;
+ /* Validate the BO passed in */
+ if (!bo->handle)
+ return -EINVAL;
+
/* Query metadata. */
metadata.handle = bo->handle;
metadata.op = AMDGPU_GEM_METADATA_OP_GET_METADATA;
@@ -303,10 +341,6 @@ int amdgpu_bo_export(amdgpu_bo_handle bo,
return 0;
case amdgpu_bo_handle_type_kms:
- r = amdgpu_bo_export_flink(bo);
- if (r)
- return r;
-
amdgpu_add_handle_to_table(bo);
*shared_handle = bo->handle;
return 0;
@@ -449,10 +483,10 @@ int amdgpu_bo_import(amdgpu_device_handle dev,
atomic_set(&bo->refcount, 1);
bo->dev = dev;
pthread_mutex_init(&bo->cpu_access_mutex, NULL);
- pthread_mutex_unlock(&dev->bo_table_mutex);
r = amdgpu_bo_map(bo, 1 << 20);
if (r) {
+ pthread_mutex_unlock(&dev->bo_table_mutex);
amdgpu_bo_reference(&bo, NULL);
return r;
}
@@ -637,9 +671,15 @@ int amdgpu_bo_list_create(amdgpu_device_handle dev,
unsigned i;
int r;
- list = calloc(number_of_resources, sizeof(struct drm_amdgpu_bo_list_entry));
+ if (!number_of_resources)
+ return -EINVAL;
- if (list == NULL)
+ /* overflow check for multiplication */
+ if (number_of_resources > UINT32_MAX / sizeof(struct drm_amdgpu_bo_list_entry))
+ return -EINVAL;
+
+ list = malloc(number_of_resources * sizeof(struct drm_amdgpu_bo_list_entry));
+ if (!list)
return -ENOMEM;
memset(&args, 0, sizeof(args));
@@ -658,15 +698,14 @@ int amdgpu_bo_list_create(amdgpu_device_handle dev,
r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_BO_LIST,
&args, sizeof(args));
+ free(list);
if (r)
- goto out;
+ return r;
- *result = calloc(1, sizeof(struct amdgpu_bo_list));
+ *result = malloc(sizeof(struct amdgpu_bo_list));
(*result)->dev = dev;
(*result)->handle = args.out.list_handle;
-out:
- free(list);
- return r;
+ return 0;
}
int amdgpu_bo_list_destroy(amdgpu_bo_list_handle list)
@@ -697,11 +736,17 @@ int amdgpu_bo_list_update(amdgpu_bo_list_handle handle,
unsigned i;
int r;
- list = calloc(number_of_resources, sizeof(struct drm_amdgpu_bo_list_entry));
+ if (!number_of_resources)
+ return -EINVAL;
+
+ /* overflow check for multiplication */
+ if (number_of_resources > UINT32_MAX / sizeof(struct drm_amdgpu_bo_list_entry))
+ return -EINVAL;
+
+ list = malloc(number_of_resources * sizeof(struct drm_amdgpu_bo_list_entry));
if (list == NULL)
return -ENOMEM;
- memset(&args, 0, sizeof(args));
args.in.operation = AMDGPU_BO_LIST_OP_UPDATE;
args.in.list_handle = handle->handle;
args.in.bo_number = number_of_resources;
diff --git a/amdgpu/amdgpu_cs.c b/amdgpu/amdgpu_cs.c
index 44e132cc..b3f51700 100644
--- a/amdgpu/amdgpu_cs.c
+++ b/amdgpu/amdgpu_cs.c
@@ -33,480 +33,6 @@
#include "amdgpu_internal.h"
/**
- * Create an IB buffer.
- *
- * \param dev - \c [in] Device handle
- * \param context - \c [in] GPU Context
- * \param ib_size - \c [in] Size of allocation
- * \param ib - \c [out] return the pointer to the created IB buffer
- *
- * \return 0 on success otherwise POSIX Error code
-*/
-static int amdgpu_cs_create_ib(amdgpu_context_handle context,
- enum amdgpu_cs_ib_size ib_size,
- amdgpu_ib_handle *ib)
-{
- struct amdgpu_bo_alloc_request alloc_buffer;
- struct amdgpu_bo_alloc_result info;
- int r;
- void *cpu;
- struct amdgpu_ib *new_ib;
-
- memset(&alloc_buffer, 0, sizeof(alloc_buffer));
-
- switch (ib_size) {
- case amdgpu_cs_ib_size_4K:
- alloc_buffer.alloc_size = 4 * 1024;
- break;
- case amdgpu_cs_ib_size_16K:
- alloc_buffer.alloc_size = 16 * 1024;
- break;
- case amdgpu_cs_ib_size_32K:
- alloc_buffer.alloc_size = 32 * 1024;
- break;
- case amdgpu_cs_ib_size_64K:
- alloc_buffer.alloc_size = 64 * 1024;
- break;
- case amdgpu_cs_ib_size_128K:
- alloc_buffer.alloc_size = 128 * 1024;
- break;
- default:
- return -EINVAL;
- }
-
- alloc_buffer.phys_alignment = 4 * 1024;
-
- alloc_buffer.preferred_heap = AMDGPU_GEM_DOMAIN_GTT;
-
- r = amdgpu_bo_alloc(context->dev,
- &alloc_buffer,
- &info);
- if (r)
- return r;
-
- r = amdgpu_bo_cpu_map(info.buf_handle, &cpu);
- if (r) {
- amdgpu_bo_free(info.buf_handle);
- return r;
- }
-
- new_ib = malloc(sizeof(struct amdgpu_ib));
- if (NULL == new_ib) {
- amdgpu_bo_cpu_unmap(info.buf_handle);
- amdgpu_bo_free(info.buf_handle);
- return -ENOMEM;
- }
-
- new_ib->context = context;
- new_ib->buf_handle = info.buf_handle;
- new_ib->cpu = cpu;
- new_ib->virtual_mc_base_address = info.virtual_mc_base_address;
- new_ib->ib_size = ib_size;
- *ib = new_ib;
- return 0;
-}
-
-/**
- * Destroy an IB buffer.
- *
- * \param dev - \c [in] Device handle
- * \param ib - \c [in] the IB buffer
- *
- * \return 0 on success otherwise POSIX Error code
-*/
-static int amdgpu_cs_destroy_ib(amdgpu_ib_handle ib)
-{
- int r;
-
- r = amdgpu_bo_cpu_unmap(ib->buf_handle);
- if (r)
- return r;
-
- r = amdgpu_bo_free(ib->buf_handle);
- if (r)
- return r;
-
- free(ib);
- return 0;
-}
-
-/**
- * Initialize IB pools to empty.
- *
- * \param context - \c [in] GPU Context
- *
- * \return 0 on success otherwise POSIX Error code
-*/
-static int amdgpu_cs_init_ib_pool(amdgpu_context_handle context)
-{
- int i;
- int r;
-
- r = pthread_mutex_init(&context->pool_mutex, NULL);
- if (r)
- return r;
-
- for (i = 0; i < AMDGPU_CS_IB_SIZE_NUM; i++)
- LIST_INITHEAD(&context->ib_pools[i]);
-
- return 0;
-}
-
-/**
- * Allocate an IB buffer from IB pools.
- *
- * \param dev - \c [in] Device handle
- * \param context - \c [in] GPU Context
- * \param ib_size - \c [in] Size of allocation
- * \param ib - \c [out] return the pointer to the allocated IB buffer
- *
- * \return 0 on success otherwise POSIX Error code
-*/
-static int amdgpu_cs_alloc_from_ib_pool(amdgpu_context_handle context,
- enum amdgpu_cs_ib_size ib_size,
- amdgpu_ib_handle *ib)
-{
- int r;
- struct list_head *head;
- head = &context->ib_pools[ib_size];
-
- r = -ENOMEM;
- pthread_mutex_lock(&context->pool_mutex);
- if (!LIST_IS_EMPTY(head)) {
- *ib = LIST_ENTRY(struct amdgpu_ib, head->next, list_node);
- LIST_DEL(&(*ib)->list_node);
- r = 0;
- }
- pthread_mutex_unlock(&context->pool_mutex);
-
- return r;
-}
-
-/**
- * Free an IB buffer to IB pools.
- *
- * \param context - \c [in] GPU Context
- * \param ib - \c [in] the IB buffer
- *
- * \return N/A
-*/
-static void amdgpu_cs_free_to_ib_pool(amdgpu_context_handle context,
- amdgpu_ib_handle ib)
-{
- struct list_head *head;
- head = &context->ib_pools[ib->ib_size];
- pthread_mutex_lock(&context->pool_mutex);
- LIST_ADD(&ib->list_node, head);
- pthread_mutex_unlock(&context->pool_mutex);
- return;
-}
-
-/**
- * Destroy all IB buffers in pools
- *
- * \param dev - \c [in] Device handle
- * \param context - \c [in] GPU Context
- *
- * \return 0 on success otherwise POSIX Error code
-*/
-static int amdgpu_cs_destroy_ib_pool(amdgpu_context_handle context)
-{
- struct list_head *head;
- struct amdgpu_ib *next;
- struct amdgpu_ib *storage;
- int i, r;
-
- r = 0;
- pthread_mutex_lock(&context->pool_mutex);
- for (i = 0; i < AMDGPU_CS_IB_SIZE_NUM; i++) {
- head = &context->ib_pools[i];
- LIST_FOR_EACH_ENTRY_SAFE(next, storage, head, list_node) {
- r = amdgpu_cs_destroy_ib(next);
- if (r)
- break;
- }
- }
- pthread_mutex_unlock(&context->pool_mutex);
- pthread_mutex_destroy(&context->pool_mutex);
- return r;
-}
-
-/**
- * Initialize pending IB lists
- *
- * \param context - \c [in] GPU Context
- *
- * \return 0 on success otherwise POSIX Error code
-*/
-static int amdgpu_cs_init_pendings(amdgpu_context_handle context)
-{
- unsigned ip, inst;
- uint32_t ring;
- int r;
-
- r = pthread_mutex_init(&context->pendings_mutex, NULL);
- if (r)
- return r;
-
- for (ip = 0; ip < AMDGPU_HW_IP_NUM; ip++)
- for (inst = 0; inst < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; inst++)
- for (ring = 0; ring < AMDGPU_CS_MAX_RINGS; ring++)
- LIST_INITHEAD(&context->pendings[ip][inst][ring]);
-
- LIST_INITHEAD(&context->freed);
- return 0;
-}
-
-/**
- * Free pending IBs
- *
- * \param dev - \c [in] Device handle
- * \param context - \c [in] GPU Context
- *
- * \return 0 on success otherwise POSIX Error code
-*/
-static int amdgpu_cs_destroy_pendings(amdgpu_context_handle context)
-{
- int ip, inst;
- uint32_t ring;
- int r;
- struct amdgpu_ib *next;
- struct amdgpu_ib *s;
- struct list_head *head;
-
- r = 0;
- pthread_mutex_lock(&context->pendings_mutex);
- for (ip = 0; ip < AMDGPU_HW_IP_NUM; ip++)
- for (inst = 0; inst < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; inst++)
- for (ring = 0; ring < AMDGPU_CS_MAX_RINGS; ring++) {
- head = &context->pendings[ip][inst][ring];
- LIST_FOR_EACH_ENTRY_SAFE(next, s, head, list_node) {
- r = amdgpu_cs_destroy_ib(next);
- if (r)
- break;
- }
- }
-
- head = &context->freed;
- LIST_FOR_EACH_ENTRY_SAFE(next, s, head, list_node) {
- r = amdgpu_cs_destroy_ib(next);
- if (r)
- break;
- }
-
- pthread_mutex_unlock(&context->pendings_mutex);
- pthread_mutex_destroy(&context->pendings_mutex);
- return r;
-}
-
-/**
- * Add IB to pending IB lists without holding sequence_mutex.
- *
- * \param context - \c [in] GPU Context
- * \param ib - \c [in] ib to added to pending lists
- * \param ip - \c [in] hw ip block
- * \param ip_instance - \c [in] instance of the hw ip block
- * \param ring - \c [in] Ring of hw ip
- *
- * \return N/A
-*/
-static void amdgpu_cs_add_pending(amdgpu_context_handle context,
- amdgpu_ib_handle ib,
- unsigned ip, unsigned ip_instance,
- uint32_t ring)
-{
- struct list_head *head;
- struct amdgpu_ib *next;
- struct amdgpu_ib *s;
-
- pthread_mutex_lock(&context->pendings_mutex);
- head = &context->pendings[ip][ip_instance][ring];
- LIST_FOR_EACH_ENTRY_SAFE(next, s, head, list_node)
- if (next == ib) {
- pthread_mutex_unlock(&context->pendings_mutex);
- return;
- }
-
- LIST_ADDTAIL(&ib->list_node, head);
- pthread_mutex_unlock(&context->pendings_mutex);
- return;
-}
-
-/**
- * Garbage collector on a pending IB list without holding pendings_mutex.
- * This function by itself is not multithread safe.
- *
- * \param context - \c [in] GPU Context
- * \param ip - \c [in] hw ip block
- * \param ip_instance - \c [in] instance of the hw ip block
- * \param ring - \c [in] Ring of hw ip
- * \param expired_fence - \c [in] fence expired
- *
- * \return N/A
- * \note Hold pendings_mutex before calling this function.
-*/
-static void amdgpu_cs_pending_gc_not_safe(amdgpu_context_handle context,
- unsigned ip, unsigned ip_instance,
- uint32_t ring,
- uint64_t expired_fence)
-{
- struct list_head *head;
- struct amdgpu_ib *next;
- struct amdgpu_ib *s;
- int r;
-
- head = &context->pendings[ip][ip_instance][ring];
- LIST_FOR_EACH_ENTRY_SAFE(next, s, head, list_node)
- if (next->cs_handle <= expired_fence) {
- LIST_DEL(&next->list_node);
- amdgpu_cs_free_to_ib_pool(context, next);
- } else {
- /* The pending list is a sorted list.
- There is no need to continue. */
- break;
- }
-
- /* walk the freed list as well */
- head = &context->freed;
- LIST_FOR_EACH_ENTRY_SAFE(next, s, head, list_node) {
- bool busy;
-
- r = amdgpu_bo_wait_for_idle(next->buf_handle, 0, &busy);
- if (r || busy)
- break;
-
- LIST_DEL(&next->list_node);
- amdgpu_cs_free_to_ib_pool(context, next);
- }
-
- return;
-}
-
-/**
- * Garbage collector on a pending IB list
- *
- * \param context - \c [in] GPU Context
- * \param ip - \c [in] hw ip block
- * \param ip_instance - \c [in] instance of the hw ip block
- * \param ring - \c [in] Ring of hw ip
- * \param expired_fence - \c [in] fence expired
- *
- * \return N/A
-*/
-static void amdgpu_cs_pending_gc(amdgpu_context_handle context,
- unsigned ip, unsigned ip_instance,
- uint32_t ring,
- uint64_t expired_fence)
-{
- pthread_mutex_lock(&context->pendings_mutex);
- amdgpu_cs_pending_gc_not_safe(context, ip, ip_instance, ring,
- expired_fence);
- pthread_mutex_unlock(&context->pendings_mutex);
- return;
-}
-
-/**
- * Garbage collector on all pending IB lists
- *
- * \param context - \c [in] GPU Context
- *
- * \return N/A
-*/
-static void amdgpu_cs_all_pending_gc(amdgpu_context_handle context)
-{
- unsigned ip, inst;
- uint32_t ring;
- uint64_t expired_fences[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS];
-
- pthread_mutex_lock(&context->sequence_mutex);
- for (ip = 0; ip < AMDGPU_HW_IP_NUM; ip++)
- for (inst = 0; inst < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; inst++)
- for (ring = 0; ring < AMDGPU_CS_MAX_RINGS; ring++)
- expired_fences[ip][inst][ring] =
- context->expired_fences[ip][inst][ring];
- pthread_mutex_unlock(&context->sequence_mutex);
-
- pthread_mutex_lock(&context->pendings_mutex);
- for (ip = 0; ip < AMDGPU_HW_IP_NUM; ip++)
- for (inst = 0; inst < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; inst++)
- for (ring = 0; ring < AMDGPU_CS_MAX_RINGS; ring++)
- amdgpu_cs_pending_gc_not_safe(context, ip, inst, ring,
- expired_fences[ip][inst][ring]);
- pthread_mutex_unlock(&context->pendings_mutex);
-}
-
-/**
- * Allocate an IB buffer
- * If there is no free IB buffer in pools, create one.
- *
- * \param dev - \c [in] Device handle
- * \param context - \c [in] GPU Context
- * \param ib_size - \c [in] Size of allocation
- * \param ib - \c [out] return the pointer to the allocated IB buffer
- *
- * \return 0 on success otherwise POSIX Error code
-*/
-static int amdgpu_cs_alloc_ib_local(amdgpu_context_handle context,
- enum amdgpu_cs_ib_size ib_size,
- amdgpu_ib_handle *ib)
-{
- int r;
-
- r = amdgpu_cs_alloc_from_ib_pool(context, ib_size, ib);
- if (!r)
- return r;
-
- amdgpu_cs_all_pending_gc(context);
-
- /* Retry to allocate from free IB pools after garbage collector. */
- r = amdgpu_cs_alloc_from_ib_pool(context, ib_size, ib);
- if (!r)
- return r;
-
- /* There is no suitable IB in free pools. Create one. */
- r = amdgpu_cs_create_ib(context, ib_size, ib);
- return r;
-}
-
-int amdgpu_cs_alloc_ib(amdgpu_context_handle context,
- enum amdgpu_cs_ib_size ib_size,
- struct amdgpu_cs_ib_alloc_result *output)
-{
- int r;
- amdgpu_ib_handle ib;
-
- if (NULL == context)
- return -EINVAL;
- if (NULL == output)
- return -EINVAL;
- if (ib_size >= AMDGPU_CS_IB_SIZE_NUM)
- return -EINVAL;
-
- r = amdgpu_cs_alloc_ib_local(context, ib_size, &ib);
- if (!r) {
- output->handle = ib;
- output->cpu = ib->cpu;
- output->mc_address = ib->virtual_mc_base_address;
- }
-
- return r;
-}
-
-int amdgpu_cs_free_ib(amdgpu_ib_handle handle)
-{
- amdgpu_context_handle context;
-
- if (NULL == handle)
- return -EINVAL;
-
- context = handle->context;
- pthread_mutex_lock(&context->pendings_mutex);
- LIST_ADD(&handle->list_node, &context->freed);
- pthread_mutex_unlock(&context->pendings_mutex);
- return 0;
-}
-
-/**
* Create command submission context
*
* \param dev - \c [in] amdgpu device handle
@@ -517,6 +43,8 @@ int amdgpu_cs_free_ib(amdgpu_ib_handle handle)
int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
amdgpu_context_handle *context)
{
+ struct amdgpu_bo_alloc_request alloc_buffer = {};
+ struct amdgpu_bo_alloc_result info = {};
struct amdgpu_context *gpu_context;
union drm_amdgpu_ctx args;
int r;
@@ -536,20 +64,21 @@ int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
if (r)
goto error_mutex;
- r = amdgpu_cs_init_ib_pool(gpu_context);
- if (r)
- goto error_pool;
+ /* Create the fence BO */
+ alloc_buffer.alloc_size = 4 * 1024;
+ alloc_buffer.phys_alignment = 4 * 1024;
+ alloc_buffer.preferred_heap = AMDGPU_GEM_DOMAIN_GTT;
- r = amdgpu_cs_init_pendings(gpu_context);
+ r = amdgpu_bo_alloc(dev, &alloc_buffer, &info);
if (r)
- goto error_pendings;
+ goto error_fence_alloc;
+ gpu_context->fence_bo = info.buf_handle;
- r = amdgpu_cs_alloc_ib_local(gpu_context, amdgpu_cs_ib_size_4K,
- &gpu_context->fence_ib);
+ r = amdgpu_bo_cpu_map(gpu_context->fence_bo, &gpu_context->fence_cpu);
if (r)
- goto error_fence_ib;
-
+ goto error_fence_map;
+ /* Create the context */
memset(&args, 0, sizeof(args));
args.in.op = AMDGPU_CTX_OP_ALLOC_CTX;
r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CTX, &args, sizeof(args));
@@ -562,15 +91,12 @@ int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
return 0;
error_kernel:
- amdgpu_cs_free_ib(gpu_context->fence_ib);
-
-error_fence_ib:
- amdgpu_cs_destroy_pendings(gpu_context);
+ amdgpu_bo_cpu_unmap(gpu_context->fence_bo);
-error_pendings:
- amdgpu_cs_destroy_ib_pool(gpu_context);
+error_fence_map:
+ amdgpu_bo_free(gpu_context->fence_bo);
-error_pool:
+error_fence_alloc:
pthread_mutex_destroy(&gpu_context->sequence_mutex);
error_mutex:
@@ -594,15 +120,11 @@ int amdgpu_cs_ctx_free(amdgpu_context_handle context)
if (NULL == context)
return -EINVAL;
- r = amdgpu_cs_free_ib(context->fence_ib);
- if (r)
- return r;
-
- r = amdgpu_cs_destroy_pendings(context);
+ r = amdgpu_bo_cpu_unmap(context->fence_bo);
if (r)
return r;
- r = amdgpu_cs_destroy_ib_pool(context);
+ r = amdgpu_bo_free(context->fence_bo);
if (r)
return r;
@@ -660,12 +182,13 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
struct amdgpu_cs_request *ibs_request,
uint64_t *fence)
{
- int r;
- uint32_t i, size;
union drm_amdgpu_cs cs;
uint64_t *chunk_array;
struct drm_amdgpu_cs_chunk *chunks;
struct drm_amdgpu_cs_chunk_data *chunk_data;
+ struct drm_amdgpu_cs_chunk_dep *dependencies = NULL;
+ uint32_t i, size;
+ int r = 0;
if (ibs_request->ip_type >= AMDGPU_HW_IP_NUM)
return -EINVAL;
@@ -674,17 +197,13 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
if (ibs_request->number_of_ibs > AMDGPU_CS_MAX_IBS_PER_SUBMIT)
return -EINVAL;
- size = (ibs_request->number_of_ibs + 1) * (
- sizeof(uint64_t) +
- sizeof(struct drm_amdgpu_cs_chunk) +
- sizeof(struct drm_amdgpu_cs_chunk_data));
- chunk_array = malloc(size);
- if (NULL == chunk_array)
- return -ENOMEM;
- memset(chunk_array, 0, size);
+ size = ibs_request->number_of_ibs + 2;
+
+ chunk_array = alloca(sizeof(uint64_t) * size);
+ chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
- chunks = (struct drm_amdgpu_cs_chunk *)(chunk_array + ibs_request->number_of_ibs + 1);
- chunk_data = (struct drm_amdgpu_cs_chunk_data *)(chunks + ibs_request->number_of_ibs + 1);
+ size = ibs_request->number_of_ibs + 1;
+ chunk_data = alloca(sizeof(struct drm_amdgpu_cs_chunk_data) * size);
memset(&cs, 0, sizeof(cs));
cs.in.chunks = (uint64_t)(uintptr_t)chunk_array;
@@ -702,9 +221,8 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
ib = &ibs_request->ibs[i];
- chunk_data[i].ib_data.handle = ib->ib_handle->buf_handle->handle;
- chunk_data[i].ib_data.va_start = ib->ib_handle->virtual_mc_base_address
- + ib->offset_dw * 4;
+ chunk_data[i].ib_data._pad = 0;
+ chunk_data[i].ib_data.va_start = ib->ib_mc_address;
chunk_data[i].ib_data.ib_bytes = ib->size * 4;
chunk_data[i].ib_data.ip_type = ibs_request->ip_type;
chunk_data[i].ib_data.ip_instance = ibs_request->ip_instance;
@@ -725,46 +243,51 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
/* fence bo handle */
- chunk_data[i].fence_data.handle = context->fence_ib->buf_handle->handle;
+ chunk_data[i].fence_data.handle = context->fence_bo->handle;
/* offset */
chunk_data[i].fence_data.offset = amdgpu_cs_fence_index(
ibs_request->ip_type, ibs_request->ring);
chunk_data[i].fence_data.offset *= sizeof(uint64_t);
}
- r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS,
- &cs, sizeof(cs));
- if (r)
- goto error_unlock;
-
-
- /* Hold sequence_mutex while adding record to the pending list.
- So the pending list is a sorted list according to fence value. */
-
- for (i = 0; i < ibs_request->number_of_ibs; i++) {
- struct amdgpu_cs_ib_info *ib;
+ if (ibs_request->number_of_dependencies) {
+ dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) *
+ ibs_request->number_of_dependencies);
+ if (!dependencies) {
+ r = -ENOMEM;
+ goto error_unlock;
+ }
- ib = &ibs_request->ibs[i];
- if (ib->flags & AMDGPU_CS_REUSE_IB)
- continue;
+ for (i = 0; i < ibs_request->number_of_dependencies; ++i) {
+ struct amdgpu_cs_dep_info *info = &ibs_request->dependencies[i];
+ struct drm_amdgpu_cs_chunk_dep *dep = &dependencies[i];
+ dep->ip_type = info->ip_type;
+ dep->ip_instance = info->ip_instance;
+ dep->ring = info->ring;
+ dep->ctx_id = info->context->id;
+ dep->handle = info->fence;
+ }
- ib->ib_handle->cs_handle = cs.out.handle;
+ i = cs.in.num_chunks++;
- amdgpu_cs_add_pending(context, ib->ib_handle, ibs_request->ip_type,
- ibs_request->ip_instance,
- ibs_request->ring);
+ /* dependencies chunk */
+ chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
+ chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
+ chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4
+ * ibs_request->number_of_dependencies;
+ chunks[i].chunk_data = (uint64_t)(uintptr_t)dependencies;
}
- *fence = cs.out.handle;
-
- pthread_mutex_unlock(&context->sequence_mutex);
+ r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS,
+ &cs, sizeof(cs));
+ if (r)
+ goto error_unlock;
- free(chunk_array);
- return 0;
+ *fence = cs.out.handle;
error_unlock:
pthread_mutex_unlock(&context->sequence_mutex);
- free(chunk_array);
+ free(dependencies);
return r;
}
@@ -825,6 +348,7 @@ static int amdgpu_ioctl_wait_cs(amdgpu_context_handle context,
uint32_t ring,
uint64_t handle,
uint64_t timeout_ns,
+ uint64_t flags,
bool *busy)
{
amdgpu_device_handle dev = context->dev;
@@ -836,9 +360,13 @@ static int amdgpu_ioctl_wait_cs(amdgpu_context_handle context,
args.in.ip_type = ip;
args.in.ip_instance = ip_instance;
args.in.ring = ring;
- args.in.timeout = amdgpu_cs_calculate_timeout(timeout_ns);
args.in.ctx_id = context->id;
+ if (flags & AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE)
+ args.in.timeout = timeout_ns;
+ else
+ args.in.timeout = amdgpu_cs_calculate_timeout(timeout_ns);
+
/* Handle errors manually here because of timeout */
r = ioctl(dev->fd, DRM_IOCTL_AMDGPU_WAIT_CS, &args);
if (r == -1 && (errno == EINTR || errno == EAGAIN)) {
@@ -877,7 +405,7 @@ int amdgpu_cs_query_fence_status(struct amdgpu_cs_query_fence *fence,
ip_type = fence->ip_type;
ip_instance = fence->ip_instance;
ring = fence->ring;
- signaled_fence = context->fence_ib->cpu;
+ signaled_fence = context->fence_cpu;
signaled_fence += amdgpu_cs_fence_index(ip_type, ring);
expired_fence = &context->expired_fences[ip_type][ip_instance][ring];
*expired = false;
@@ -894,8 +422,6 @@ int amdgpu_cs_query_fence_status(struct amdgpu_cs_query_fence *fence,
/* This fence value is signaled already. */
*expired_fence = *signaled_fence;
pthread_mutex_unlock(&context->sequence_mutex);
- amdgpu_cs_pending_gc(context, ip_type, ip_instance, ring,
- fence->fence);
*expired = true;
return 0;
}
@@ -908,21 +434,17 @@ int amdgpu_cs_query_fence_status(struct amdgpu_cs_query_fence *fence,
pthread_mutex_unlock(&context->sequence_mutex);
r = amdgpu_ioctl_wait_cs(context, ip_type, ip_instance, ring,
- fence->fence, fence->timeout_ns, &busy);
+ fence->fence, fence->timeout_ns,
+ fence->flags, &busy);
if (!r && !busy) {
*expired = true;
pthread_mutex_lock(&context->sequence_mutex);
/* The thread doesn't hold sequence_mutex. Other thread could
update *expired_fence already. Check whether there is a
newerly expired fence. */
- if (fence->fence > *expired_fence) {
+ if (fence->fence > *expired_fence)
*expired_fence = fence->fence;
- pthread_mutex_unlock(&context->sequence_mutex);
- amdgpu_cs_pending_gc(context, ip_type, ip_instance,
- ring, fence->fence);
- } else {
- pthread_mutex_unlock(&context->sequence_mutex);
- }
+ pthread_mutex_unlock(&context->sequence_mutex);
}
return r;
diff --git a/amdgpu/amdgpu_device.c b/amdgpu/amdgpu_device.c
index 72dd6bbd..e882aad2 100644
--- a/amdgpu/amdgpu_device.c
+++ b/amdgpu/amdgpu_device.c
@@ -101,7 +101,7 @@ static int fd_compare(void *key1, void *key2)
static int amdgpu_get_auth(int fd, int *auth)
{
int r = 0;
- drm_client_t client;
+ drm_client_t client = {};
if (drmGetNodeTypeFromFd(fd) == DRM_NODE_RENDER)
*auth = 0;
diff --git a/amdgpu/amdgpu_gpu_info.c b/amdgpu/amdgpu_gpu_info.c
index d46052e6..515ee78a 100644
--- a/amdgpu/amdgpu_gpu_info.c
+++ b/amdgpu/amdgpu_gpu_info.c
@@ -149,6 +149,7 @@ int amdgpu_query_gpu_info_init(amdgpu_device_handle dev)
dev->info.chip_external_rev = dev->dev_info.external_rev;
dev->info.family_id = dev->dev_info.family;
dev->info.max_engine_clk = dev->dev_info.max_engine_clock;
+ dev->info.max_memory_clk = dev->dev_info.max_memory_clock;
dev->info.gpu_counter_freq = dev->dev_info.gpu_counter_freq;
dev->info.enabled_rb_pipes_mask = dev->dev_info.enabled_rb_pipes_mask;
dev->info.rb_pipes = dev->dev_info.num_rb_pipes;
@@ -157,6 +158,9 @@ int amdgpu_query_gpu_info_init(amdgpu_device_handle dev)
dev->info.num_shader_engines = dev->dev_info.num_shader_engines;
dev->info.num_shader_arrays_per_engine =
dev->dev_info.num_shader_arrays_per_engine;
+ dev->info.vram_type = dev->dev_info.vram_type;
+ dev->info.vram_bit_width = dev->dev_info.vram_bit_width;
+ dev->info.ce_ram_size = dev->dev_info.ce_ram_size;
for (i = 0; i < (int)dev->info.num_shader_engines; i++) {
unsigned instance = (i << AMDGPU_INFO_MMR_SE_INDEX_SHIFT) |
@@ -226,7 +230,7 @@ int amdgpu_query_heap_info(amdgpu_device_handle dev,
uint32_t flags,
struct amdgpu_heap_info *info)
{
- struct drm_amdgpu_info_vram_gtt vram_gtt_info;
+ struct drm_amdgpu_info_vram_gtt vram_gtt_info = {};
int r;
r = amdgpu_query_info(dev, AMDGPU_INFO_VRAM_GTT,
@@ -272,3 +276,28 @@ int amdgpu_query_heap_info(amdgpu_device_handle dev,
return 0;
}
+
+int amdgpu_query_gds_info(amdgpu_device_handle dev,
+ struct amdgpu_gds_resource_info *gds_info)
+{
+ struct drm_amdgpu_info_gds gds_config = {};
+ int r;
+
+ if (gds_info == NULL)
+ return -EINVAL;
+
+ r = amdgpu_query_info(dev, AMDGPU_INFO_GDS_CONFIG,
+ sizeof(gds_config), &gds_config);
+ if (r)
+ return r;
+
+ gds_info->gds_gfx_partition_size = gds_config.gds_gfx_partition_size;
+ gds_info->compute_partition_size = gds_config.compute_partition_size;
+ gds_info->gds_total_size = gds_config.gds_total_size;
+ gds_info->gws_per_gfx_partition = gds_config.gws_per_gfx_partition;
+ gds_info->gws_per_compute_partition = gds_config.gws_per_compute_partition;
+ gds_info->oa_per_gfx_partition = gds_config.oa_per_gfx_partition;
+ gds_info->oa_per_compute_partition = gds_config.oa_per_compute_partition;
+
+ return 0;
+}
diff --git a/amdgpu/amdgpu_internal.h b/amdgpu/amdgpu_internal.h
index c1cd4da7..ee1cb61c 100644
--- a/amdgpu/amdgpu_internal.h
+++ b/amdgpu/amdgpu_internal.h
@@ -97,44 +97,20 @@ struct amdgpu_bo_list {
uint32_t handle;
};
-/*
- * There are three mutexes.
- * To avoid deadlock, only hold the mutexes in this order:
- * sequence_mutex -> pendings_mutex -> pool_mutex.
-*/
struct amdgpu_context {
struct amdgpu_device *dev;
/** Mutex for accessing fences and to maintain command submissions
- and pending lists in good sequence. */
+ in good sequence. */
pthread_mutex_t sequence_mutex;
/** Buffer for user fences */
- struct amdgpu_ib *fence_ib;
+ struct amdgpu_bo *fence_bo;
+ void *fence_cpu;
/** The newest expired fence for the ring of the ip blocks. */
uint64_t expired_fences[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS];
- /** Mutex for accessing pendings list. */
- pthread_mutex_t pendings_mutex;
- /** Pending IBs. */
- struct list_head pendings[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS];
- /** Freed IBs not yet in pool */
- struct list_head freed;
- /** Mutex for accessing free ib pool. */
- pthread_mutex_t pool_mutex;
- /** Internal free IB pools. */
- struct list_head ib_pools[AMDGPU_CS_IB_SIZE_NUM];
/* context id*/
uint32_t id;
};
-struct amdgpu_ib {
- amdgpu_context_handle context;
- struct list_head list_node;
- amdgpu_bo_handle buf_handle;
- void *cpu;
- uint64_t virtual_mc_base_address;
- enum amdgpu_cs_ib_size ib_size;
- uint64_t cs_handle;
-};
-
/**
* Functions.
*/
diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h
index 1331159d..a3a025fe 100644
--- a/include/drm/amdgpu_drm.h
+++ b/include/drm/amdgpu_drm.h
@@ -55,7 +55,7 @@
#define DRM_IOCTL_AMDGPU_INFO DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_INFO, struct drm_amdgpu_info)
#define DRM_IOCTL_AMDGPU_GEM_METADATA DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_METADATA, struct drm_amdgpu_gem_metadata)
#define DRM_IOCTL_AMDGPU_GEM_WAIT_IDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_WAIT_IDLE, union drm_amdgpu_gem_wait_idle)
-#define DRM_IOCTL_AMDGPU_GEM_VA DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_VA, union drm_amdgpu_gem_va)
+#define DRM_IOCTL_AMDGPU_GEM_VA DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_VA, struct drm_amdgpu_gem_va)
#define DRM_IOCTL_AMDGPU_WAIT_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_CS, union drm_amdgpu_wait_cs)
#define DRM_IOCTL_AMDGPU_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
#define DRM_IOCTL_AMDGPU_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
@@ -139,16 +139,19 @@ union drm_amdgpu_bo_list {
#define AMDGPU_CTX_OP_FREE_CTX 2
#define AMDGPU_CTX_OP_QUERY_STATE 3
-#define AMDGPU_CTX_OP_STATE_RUNNING 1
-
/* GPU reset status */
#define AMDGPU_CTX_NO_RESET 0
-#define AMDGPU_CTX_GUILTY_RESET 1 /* this the context caused it */
-#define AMDGPU_CTX_INNOCENT_RESET 2 /* some other context caused it */
-#define AMDGPU_CTX_UNKNOWN_RESET 3 /* unknown cause */
+/* this the context caused it */
+#define AMDGPU_CTX_GUILTY_RESET 1
+/* some other context caused it */
+#define AMDGPU_CTX_INNOCENT_RESET 2
+/* unknown cause */
+#define AMDGPU_CTX_UNKNOWN_RESET 3
struct drm_amdgpu_ctx_in {
+ /** AMDGPU_CTX_OP_* */
uint32_t op;
+ /** For future use, no flags defined so far */
uint32_t flags;
uint32_t ctx_id;
uint32_t _pad;
@@ -161,6 +164,7 @@ union drm_amdgpu_ctx_out {
} alloc;
struct {
+ /** For future use, no flags defined so far */
uint64_t flags;
/** Number of resets caused by this context so far. */
uint32_t hangs;
@@ -187,7 +191,9 @@ union drm_amdgpu_ctx {
struct drm_amdgpu_gem_userptr {
uint64_t addr;
uint64_t size;
+ /* AMDGPU_GEM_USERPTR_* */
uint32_t flags;
+ /* Resulting GEM handle */
uint32_t handle;
};
@@ -219,23 +225,29 @@ struct drm_amdgpu_gem_userptr {
/** The same structure is shared for input/output */
struct drm_amdgpu_gem_metadata {
- uint32_t handle; /* GEM Object handle */
- uint32_t op; /** Do we want get or set metadata */
+ /** GEM Object handle */
+ uint32_t handle;
+ /** Do we want get or set metadata */
+ uint32_t op;
struct {
+ /** For future use, no flags defined so far */
uint64_t flags;
- uint64_t tiling_info; /* family specific tiling info */
+ /** family specific tiling info */
+ uint64_t tiling_info;
uint32_t data_size_bytes;
uint32_t data[64];
} data;
};
struct drm_amdgpu_gem_mmap_in {
- uint32_t handle; /** the GEM object handle */
+ /** the GEM object handle */
+ uint32_t handle;
uint32_t _pad;
};
struct drm_amdgpu_gem_mmap_out {
- uint64_t addr_ptr; /** mmap offset from the vma offset manager */
+ /** mmap offset from the vma offset manager */
+ uint64_t addr_ptr;
};
union drm_amdgpu_gem_mmap {
@@ -244,14 +256,19 @@ union drm_amdgpu_gem_mmap {
};
struct drm_amdgpu_gem_wait_idle_in {
- uint32_t handle; /* GEM object handle */
+ /** GEM object handle */
+ uint32_t handle;
+ /** For future use, no flags defined so far */
uint32_t flags;
- uint64_t timeout; /* Timeout to wait. If 0 then returned immediately with the status */
+ /** Absolute timeout to wait */
+ uint64_t timeout;
};
struct drm_amdgpu_gem_wait_idle_out {
- uint32_t status; /* BO status: 0 - BO is idle, 1 - BO is busy */
- uint32_t domain; /* Returned current memory domain */
+ /** BO status: 0 - BO is idle, 1 - BO is busy */
+ uint32_t status;
+ /** Returned current memory domain */
+ uint32_t domain;
};
union drm_amdgpu_gem_wait_idle {
@@ -260,7 +277,9 @@ union drm_amdgpu_gem_wait_idle {
};
struct drm_amdgpu_wait_cs_in {
+ /** Command submission handle */
uint64_t handle;
+ /** Absolute timeout to wait */
uint64_t timeout;
uint32_t ip_type;
uint32_t ip_instance;
@@ -269,6 +288,7 @@ struct drm_amdgpu_wait_cs_in {
};
struct drm_amdgpu_wait_cs_out {
+ /** CS status: 0 - CS completed, 1 - CS still busy */
uint64_t status;
};
@@ -277,23 +297,22 @@ union drm_amdgpu_wait_cs {
struct drm_amdgpu_wait_cs_out out;
};
+#define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO 0
+#define AMDGPU_GEM_OP_SET_PLACEMENT 1
+
/* Sets or returns a value associated with a buffer. */
struct drm_amdgpu_gem_op {
- uint32_t handle; /* buffer */
- uint32_t op; /* AMDGPU_GEM_OP_* */
- uint64_t value; /* input or return value */
+ /** GEM object handle */
+ uint32_t handle;
+ /** AMDGPU_GEM_OP_* */
+ uint32_t op;
+ /** Input or return value */
+ uint64_t value;
};
-#define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO 0
-#define AMDGPU_GEM_OP_SET_PLACEMENT 1
-
#define AMDGPU_VA_OP_MAP 1
#define AMDGPU_VA_OP_UNMAP 2
-#define AMDGPU_VA_RESULT_OK 0
-#define AMDGPU_VA_RESULT_ERROR 1
-#define AMDGPU_VA_RESULT_VA_INVALID_ALIGNMENT 2
-
/* Mapping flags */
/* readable mapping */
#define AMDGPU_VM_PAGE_READABLE (1 << 1)
@@ -302,33 +321,22 @@ struct drm_amdgpu_gem_op {
/* executable mapping, new for VI */
#define AMDGPU_VM_PAGE_EXECUTABLE (1 << 3)
-struct drm_amdgpu_gem_va_in {
- /* GEM object handle */
+struct drm_amdgpu_gem_va {
+ /** GEM object handle */
uint32_t handle;
uint32_t _pad;
- /* map or unmap*/
+ /** AMDGPU_VA_OP_* */
uint32_t operation;
- /* specify mapping flags */
+ /** AMDGPU_VM_PAGE_* */
uint32_t flags;
- /* va address to assign . Must be correctly aligned.*/
+ /** va address to assign . Must be correctly aligned.*/
uint64_t va_address;
- /* Specify offset inside of BO to assign. Must be correctly aligned.*/
+ /** Specify offset inside of BO to assign. Must be correctly aligned.*/
uint64_t offset_in_bo;
- /* Specify mapping size. If 0 and offset is 0 then map the whole BO.*/
- /* Must be correctly aligned. */
+ /** Specify mapping size. Must be correctly aligned. */
uint64_t map_size;
};
-struct drm_amdgpu_gem_va_out {
- uint32_t result;
- uint32_t _pad;
-};
-
-union drm_amdgpu_gem_va {
- struct drm_amdgpu_gem_va_in in;
- struct drm_amdgpu_gem_va_out out;
-};
-
#define AMDGPU_HW_IP_GFX 0
#define AMDGPU_HW_IP_COMPUTE 1
#define AMDGPU_HW_IP_DMA 2
@@ -340,6 +348,8 @@ union drm_amdgpu_gem_va {
#define AMDGPU_CHUNK_ID_IB 0x01
#define AMDGPU_CHUNK_ID_FENCE 0x02
+#define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03
+
struct drm_amdgpu_cs_chunk {
uint32_t chunk_id;
uint32_t length_dw;
@@ -353,7 +363,7 @@ struct drm_amdgpu_cs_in {
uint32_t bo_list_handle;
uint32_t num_chunks;
uint32_t _pad;
- /* this points to uint64_t * which point to cs chunks */
+ /** this points to uint64_t * which point to cs chunks */
uint64_t chunks;
};
@@ -362,8 +372,8 @@ struct drm_amdgpu_cs_out {
};
union drm_amdgpu_cs {
- struct drm_amdgpu_cs_in in;
- struct drm_amdgpu_cs_out out;
+ struct drm_amdgpu_cs_in in;
+ struct drm_amdgpu_cs_out out;
};
/* Specify flags to be used for IB */
@@ -371,24 +381,31 @@ union drm_amdgpu_cs {
/* This IB should be submitted to CE */
#define AMDGPU_IB_FLAG_CE (1<<0)
-/* GDS is used by this IB */
-#define AMDGPU_IB_FLAG_GDS (1<<1)
-
/* CE Preamble */
-#define AMDGPU_IB_FLAG_PREAMBLE (1<<2)
+#define AMDGPU_IB_FLAG_PREAMBLE (1<<1)
struct drm_amdgpu_cs_chunk_ib {
- /**
- * Handle of GEM object to be used as IB or 0 if it is already in
- * residency list.
- */
- uint32_t handle;
- uint32_t flags; /* IB Flags */
- uint64_t va_start; /* Virtual address to begin IB execution */
- uint32_t ib_bytes; /* Size of submission */
- uint32_t ip_type; /* HW IP to submit to */
- uint32_t ip_instance; /* HW IP index of the same type to submit to */
- uint32_t ring; /* Ring index to submit to */
+ uint32_t _pad;
+ /** AMDGPU_IB_FLAG_* */
+ uint32_t flags;
+ /** Virtual address to begin IB execution */
+ uint64_t va_start;
+ /** Size of submission */
+ uint32_t ib_bytes;
+ /** HW IP to submit to */
+ uint32_t ip_type;
+ /** HW IP index of the same type to submit to */
+ uint32_t ip_instance;
+ /** Ring index to submit to */
+ uint32_t ring;
+};
+
+struct drm_amdgpu_cs_chunk_dep {
+ uint32_t ip_type;
+ uint32_t ip_instance;
+ uint32_t ring;
+ uint32_t ctx_id;
+ uint64_t handle;
};
struct drm_amdgpu_cs_chunk_fence {
@@ -483,23 +500,28 @@ struct drm_amdgpu_info {
/** AMDGPU_HW_IP_* */
uint32_t type;
/**
- * Index of the IP if there are more IPs of the same type.
- * Ignored by AMDGPU_INFO_HW_IP_COUNT.
+ * Index of the IP if there are more IPs of the same
+ * type. Ignored by AMDGPU_INFO_HW_IP_COUNT.
*/
uint32_t ip_instance;
} query_hw_ip;
struct {
uint32_t dword_offset;
- uint32_t count; /* number of registers to read */
+ /** number of registers to read */
+ uint32_t count;
uint32_t instance;
+ /** For future use, no flags defined so far */
uint32_t flags;
} read_mmr_reg;
struct {
/** AMDGPU_INFO_FW_* */
uint32_t fw_type;
- /** Index of the IP if there are more IPs of the same type. */
+ /**
+ * Index of the IP if there are more IPs of
+ * the same type.
+ */
uint32_t ip_instance;
/**
* Index of the engine. Whether this is used depends
@@ -540,6 +562,15 @@ struct drm_amdgpu_info_firmware {
uint32_t feature;
};
+#define AMDGPU_VRAM_TYPE_UNKNOWN 0
+#define AMDGPU_VRAM_TYPE_GDDR1 1
+#define AMDGPU_VRAM_TYPE_DDR2 2
+#define AMDGPU_VRAM_TYPE_GDDR3 3
+#define AMDGPU_VRAM_TYPE_GDDR4 4
+#define AMDGPU_VRAM_TYPE_GDDR5 5
+#define AMDGPU_VRAM_TYPE_HBM 6
+#define AMDGPU_VRAM_TYPE_DDR3 7
+
struct drm_amdgpu_info_device {
/** PCI Device ID */
uint32_t device_id;
@@ -551,8 +582,10 @@ struct drm_amdgpu_info_device {
uint32_t family;
uint32_t num_shader_engines;
uint32_t num_shader_arrays_per_engine;
- uint32_t gpu_counter_freq; /* in KHz */
- uint64_t max_engine_clock; /* in KHz */
+ /* in KHz */
+ uint32_t gpu_counter_freq;
+ uint64_t max_engine_clock;
+ uint64_t max_memory_clock;
/* cu information */
uint32_t cu_active_number;
uint32_t cu_ao_mask;
@@ -572,6 +605,12 @@ struct drm_amdgpu_info_device {
/** Page table entry - fragment size */
uint32_t pte_fragment_size;
uint32_t gart_page_size;
+ /** constant engine ram size*/
+ uint32_t ce_ram_size;
+ /** video memory type info*/
+ uint32_t vram_type;
+ /** video memory bit width*/
+ uint32_t vram_bit_width;
};
struct drm_amdgpu_info_hw_ip {
@@ -580,6 +619,10 @@ struct drm_amdgpu_info_hw_ip {
uint32_t hw_ip_version_minor;
/** Capabilities */
uint64_t capabilities_flags;
+ /** command buffer address start alignment*/
+ uint32_t ib_start_alignment;
+ /** command buffer size alignment*/
+ uint32_t ib_size_alignment;
/** Bitmask of available rings. Bit 0 means ring 0, etc. */
uint32_t available_rings;
uint32_t _pad;
diff --git a/tests/amdgpu/amdgpu_test.h b/tests/amdgpu/amdgpu_test.h
index 0062bd7c..dd3b8bc2 100644
--- a/tests/amdgpu/amdgpu_test.h
+++ b/tests/amdgpu/amdgpu_test.h
@@ -131,4 +131,42 @@ static inline amdgpu_bo_handle gpu_mem_alloc(
return res.buf_handle;
}
+static inline int
+amdgpu_bo_alloc_and_map(amdgpu_device_handle dev, unsigned size,
+ unsigned alignment, unsigned heap, uint64_t flags,
+ amdgpu_bo_handle *bo, void **cpu, uint64_t *mc_address)
+{
+ struct amdgpu_bo_alloc_request request = {};
+ struct amdgpu_bo_alloc_result out;
+ int r;
+
+ request.alloc_size = size;
+ request.phys_alignment = alignment;
+ request.preferred_heap = heap;
+ request.flags = flags;
+
+ r = amdgpu_bo_alloc(dev, &request, &out);
+ if (r)
+ return r;
+
+ r = amdgpu_bo_cpu_map(out.buf_handle, cpu);
+ if (r) {
+ amdgpu_bo_free(out.buf_handle);
+ return r;
+ }
+
+ *bo = out.buf_handle;
+ *mc_address = out.virtual_mc_base_address;
+ return 0;
+}
+
+static inline int
+amdgpu_get_bo_list(amdgpu_device_handle dev, amdgpu_bo_handle bo1,
+ amdgpu_bo_handle bo2, amdgpu_bo_list_handle *list)
+{
+ amdgpu_bo_handle resources[] = {bo1, bo2};
+
+ return amdgpu_bo_list_create(dev, bo2 ? 2 : 1, resources, NULL, list);
+}
+
#endif /* #ifdef _AMDGPU_TEST_H_ */
diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c
index 67a8d3c6..93743998 100644
--- a/tests/amdgpu/basic_tests.c
+++ b/tests/amdgpu/basic_tests.c
@@ -156,48 +156,59 @@ static void amdgpu_memory_alloc(void)
static void amdgpu_command_submission_gfx_separate_ibs(void)
{
amdgpu_context_handle context_handle;
- struct amdgpu_cs_ib_alloc_result ib_result = {0};
- struct amdgpu_cs_ib_alloc_result ib_result_ce = {0};
+ amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
+ void *ib_result_cpu, *ib_result_ce_cpu;
+ uint64_t ib_result_mc_address, ib_result_ce_mc_address;
struct amdgpu_cs_request ibs_request = {0};
struct amdgpu_cs_ib_info ib_info[2];
struct amdgpu_cs_query_fence fence_status = {0};
uint32_t *ptr;
uint32_t expired;
+ amdgpu_bo_list_handle bo_list;
int r;
r = amdgpu_cs_ctx_create(device_handle, &context_handle);
CU_ASSERT_EQUAL(r, 0);
- r = amdgpu_cs_alloc_ib(context_handle,
- amdgpu_cs_ib_size_4K, &ib_result);
+ r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
+ AMDGPU_GEM_DOMAIN_GTT, 0,
+ &ib_result_handle, &ib_result_cpu,
+ &ib_result_mc_address);
CU_ASSERT_EQUAL(r, 0);
- r = amdgpu_cs_alloc_ib(context_handle,
- amdgpu_cs_ib_size_4K, &ib_result_ce);
+ r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
+ AMDGPU_GEM_DOMAIN_GTT, 0,
+ &ib_result_ce_handle, &ib_result_ce_cpu,
+ &ib_result_ce_mc_address);
+ CU_ASSERT_EQUAL(r, 0);
+
+ r = amdgpu_get_bo_list(device_handle, ib_result_handle,
+ ib_result_ce_handle, &bo_list);
CU_ASSERT_EQUAL(r, 0);
memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
/* IT_SET_CE_DE_COUNTERS */
- ptr = ib_result_ce.cpu;
+ ptr = ib_result_ce_cpu;
ptr[0] = 0xc0008900;
ptr[1] = 0;
ptr[2] = 0xc0008400;
ptr[3] = 1;
- ib_info[0].ib_handle = ib_result_ce.handle;
+ ib_info[0].ib_mc_address = ib_result_ce_mc_address;
ib_info[0].size = 4;
ib_info[0].flags = AMDGPU_IB_FLAG_CE;
/* IT_WAIT_ON_CE_COUNTER */
- ptr = ib_result.cpu;
+ ptr = ib_result_cpu;
ptr[0] = 0xc0008600;
ptr[1] = 0x00000001;
- ib_info[1].ib_handle = ib_result.handle;
+ ib_info[1].ib_mc_address = ib_result_mc_address;
ib_info[1].size = 2;
ibs_request.ip_type = AMDGPU_HW_IP_GFX;
ibs_request.number_of_ibs = 2;
ibs_request.ibs = ib_info;
+ ibs_request.resources = bo_list;
r = amdgpu_cs_submit(context_handle, 0,
&ibs_request, 1, &fence_status.fence);
@@ -210,6 +221,15 @@ static void amdgpu_command_submission_gfx_separate_ibs(void)
r = amdgpu_cs_query_fence_status(&fence_status, &expired);
CU_ASSERT_EQUAL(r, 0);
+ r = amdgpu_bo_free(ib_result_handle);
+ CU_ASSERT_EQUAL(r, 0);
+
+ r = amdgpu_bo_free(ib_result_ce_handle);
+ CU_ASSERT_EQUAL(r, 0);
+
+ r = amdgpu_bo_list_destroy(bo_list);
+ CU_ASSERT_EQUAL(r, 0);
+
r = amdgpu_cs_ctx_free(context_handle);
CU_ASSERT_EQUAL(r, 0);
}
@@ -217,43 +237,52 @@ static void amdgpu_command_submission_gfx_separate_ibs(void)
static void amdgpu_command_submission_gfx_shared_ib(void)
{
amdgpu_context_handle context_handle;
- struct amdgpu_cs_ib_alloc_result ib_result = {0};
+ amdgpu_bo_handle ib_result_handle;
+ void *ib_result_cpu;
+ uint64_t ib_result_mc_address;
struct amdgpu_cs_request ibs_request = {0};
struct amdgpu_cs_ib_info ib_info[2];
struct amdgpu_cs_query_fence fence_status = {0};
uint32_t *ptr;
uint32_t expired;
+ amdgpu_bo_list_handle bo_list;
int r;
r = amdgpu_cs_ctx_create(device_handle, &context_handle);
CU_ASSERT_EQUAL(r, 0);
- r = amdgpu_cs_alloc_ib(context_handle,
- amdgpu_cs_ib_size_4K, &ib_result);
+ r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
+ AMDGPU_GEM_DOMAIN_GTT, 0,
+ &ib_result_handle, &ib_result_cpu,
+ &ib_result_mc_address);
+ CU_ASSERT_EQUAL(r, 0);
+
+ r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
+ &bo_list);
CU_ASSERT_EQUAL(r, 0);
memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
/* IT_SET_CE_DE_COUNTERS */
- ptr = ib_result.cpu;
+ ptr = ib_result_cpu;
ptr[0] = 0xc0008900;
ptr[1] = 0;
ptr[2] = 0xc0008400;
ptr[3] = 1;
- ib_info[0].ib_handle = ib_result.handle;
+ ib_info[0].ib_mc_address = ib_result_mc_address;
ib_info[0].size = 4;
ib_info[0].flags = AMDGPU_IB_FLAG_CE;
- ptr = (uint32_t *)ib_result.cpu + 4;
+ ptr = (uint32_t *)ib_result_cpu + 4;
ptr[0] = 0xc0008600;
ptr[1] = 0x00000001;
- ib_info[1].ib_handle = ib_result.handle;
+ ib_info[1].ib_mc_address = ib_result_mc_address + 16;
ib_info[1].size = 2;
- ib_info[1].offset_dw = 4;
ibs_request.ip_type = AMDGPU_HW_IP_GFX;
ibs_request.number_of_ibs = 2;
ibs_request.ibs = ib_info;
+ ibs_request.resources = bo_list;
r = amdgpu_cs_submit(context_handle, 0,
&ibs_request, 1, &fence_status.fence);
@@ -266,6 +295,12 @@ static void amdgpu_command_submission_gfx_shared_ib(void)
r = amdgpu_cs_query_fence_status(&fence_status, &expired);
CU_ASSERT_EQUAL(r, 0);
+ r = amdgpu_bo_free(ib_result_handle);
+ CU_ASSERT_EQUAL(r, 0);
+
+ r = amdgpu_bo_list_destroy(bo_list);
+ CU_ASSERT_EQUAL(r, 0);
+
r = amdgpu_cs_ctx_free(context_handle);
CU_ASSERT_EQUAL(r, 0);
}
@@ -281,29 +316,37 @@ static void amdgpu_command_submission_gfx(void)
static void amdgpu_command_submission_compute(void)
{
amdgpu_context_handle context_handle;
- struct amdgpu_cs_ib_alloc_result ib_result;
+ amdgpu_bo_handle ib_result_handle;
+ void *ib_result_cpu;
+ uint64_t ib_result_mc_address;
struct amdgpu_cs_request ibs_request;
struct amdgpu_cs_ib_info ib_info;
struct amdgpu_cs_query_fence fence_status;
uint32_t *ptr;
uint32_t expired;
int i, r, instance;
+ amdgpu_bo_list_handle bo_list;
r = amdgpu_cs_ctx_create(device_handle, &context_handle);
CU_ASSERT_EQUAL(r, 0);
for (instance = 0; instance < 8; instance++) {
- memset(&ib_result, 0, sizeof(struct amdgpu_cs_ib_alloc_result));
- r = amdgpu_cs_alloc_ib(context_handle,
- amdgpu_cs_ib_size_4K, &ib_result);
+ r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
+ AMDGPU_GEM_DOMAIN_GTT, 0,
+ &ib_result_handle, &ib_result_cpu,
+ &ib_result_mc_address);
+ CU_ASSERT_EQUAL(r, 0);
+
+ r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
+ &bo_list);
CU_ASSERT_EQUAL(r, 0);
- ptr = ib_result.cpu;
+ ptr = ib_result_cpu;
for (i = 0; i < 16; ++i)
ptr[i] = 0xffff1000;
memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
- ib_info.ib_handle = ib_result.handle;
+ ib_info.ib_mc_address = ib_result_mc_address;
ib_info.size = 16;
memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
@@ -311,6 +354,7 @@ static void amdgpu_command_submission_compute(void)
ibs_request.ring = instance;
ibs_request.number_of_ibs = 1;
ibs_request.ibs = &ib_info;
+ ibs_request.resources = bo_list;
memset(&fence_status, 0, sizeof(struct amdgpu_cs_query_fence));
r = amdgpu_cs_submit(context_handle, 0,
@@ -324,6 +368,12 @@ static void amdgpu_command_submission_compute(void)
r = amdgpu_cs_query_fence_status(&fence_status, &expired);
CU_ASSERT_EQUAL(r, 0);
+
+ r = amdgpu_bo_list_destroy(bo_list);
+ CU_ASSERT_EQUAL(r, 0);
+
+ r = amdgpu_bo_free(ib_result_handle);
+ CU_ASSERT_EQUAL(r, 0);
}
r = amdgpu_cs_ctx_free(context_handle);
@@ -341,11 +391,14 @@ static void amdgpu_sdma_test_exec_cs(amdgpu_context_handle context_handle,
struct amdgpu_cs_ib_info *ib_info,
struct amdgpu_cs_request *ibs_request)
{
- int r, i, j;
+ int r;
uint32_t expired;
uint32_t *ring_ptr;
- struct amdgpu_cs_ib_alloc_result ib_result = {0};
+ amdgpu_bo_handle ib_result_handle;
+ void *ib_result_cpu;
+ uint64_t ib_result_mc_address;
struct amdgpu_cs_query_fence fence_status = {0};
+ amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
/* prepare CS */
CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
@@ -355,15 +408,17 @@ static void amdgpu_sdma_test_exec_cs(amdgpu_context_handle context_handle,
CU_ASSERT_TRUE(pm4_dw <= 1024);
/* allocate IB */
- r = amdgpu_cs_alloc_ib(context_handle,
- amdgpu_cs_ib_size_4K, &ib_result);
+ r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
+ AMDGPU_GEM_DOMAIN_GTT, 0,
+ &ib_result_handle, &ib_result_cpu,
+ &ib_result_mc_address);
CU_ASSERT_EQUAL(r, 0);
/* copy PM4 packet to ring from caller */
- ring_ptr = ib_result.cpu;
+ ring_ptr = ib_result_cpu;
memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
- ib_info->ib_handle = ib_result.handle;
+ ib_info->ib_mc_address = ib_result_mc_address;
ib_info->size = pm4_dw;
ibs_request->ip_type = AMDGPU_HW_IP_DMA;
@@ -371,7 +426,10 @@ static void amdgpu_sdma_test_exec_cs(amdgpu_context_handle context_handle,
ibs_request->number_of_ibs = 1;
ibs_request->ibs = ib_info;
- r = amdgpu_bo_list_create(device_handle, res_cnt, resources,
+ memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
+ all_res[res_cnt] = ib_result_handle;
+
+ r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
NULL, &ibs_request->resources);
CU_ASSERT_EQUAL(r, 0);
@@ -394,6 +452,9 @@ static void amdgpu_sdma_test_exec_cs(amdgpu_context_handle context_handle,
r = amdgpu_cs_query_fence_status(&fence_status, &expired);
CU_ASSERT_EQUAL(r, 0);
CU_ASSERT_EQUAL(expired, true);
+
+ r = amdgpu_bo_free(ib_result_handle);
+ CU_ASSERT_EQUAL(r, 0);
}
static void amdgpu_command_submission_sdma_write_linear(void)
diff --git a/tests/amdgpu/cs_tests.c b/tests/amdgpu/cs_tests.c
index 6d485ae3..63cdbb5b 100644
--- a/tests/amdgpu/cs_tests.c
+++ b/tests/amdgpu/cs_tests.c
@@ -31,7 +31,7 @@
#include "amdgpu_drm.h"
#include "amdgpu_internal.h"
-#define IB_SIZE amdgpu_cs_ib_size_4K
+#define IB_SIZE 4096
#define MAX_RESOURCES 16
static amdgpu_device_handle device_handle;
@@ -40,8 +40,9 @@ static uint32_t minor_version;
static uint32_t family_id;
static amdgpu_context_handle context_handle;
-static amdgpu_ib_handle ib_handle;
-uint32_t *ib_cpu;
+static amdgpu_bo_handle ib_handle;
+static uint64_t ib_mc_address;
+static uint32_t *ib_cpu;
static amdgpu_bo_handle resources[MAX_RESOURCES];
static unsigned num_resources;
@@ -59,7 +60,9 @@ CU_TestInfo cs_tests[] = {
int suite_cs_tests_init(void)
{
- struct amdgpu_cs_ib_alloc_result ib_result = {0};
+ amdgpu_bo_handle ib_result_handle;
+ void *ib_result_cpu;
+ uint64_t ib_result_mc_address;
int r;
r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
@@ -73,12 +76,16 @@ int suite_cs_tests_init(void)
if (r)
return CUE_SINIT_FAILED;
- r = amdgpu_cs_alloc_ib(context_handle, IB_SIZE, &ib_result);
+ r = amdgpu_bo_alloc_and_map(device_handle, IB_SIZE, 4096,
+ AMDGPU_GEM_DOMAIN_GTT, 0,
+ &ib_result_handle, &ib_result_cpu,
+ &ib_result_mc_address);
if (r)
return CUE_SINIT_FAILED;
- ib_handle = ib_result.handle;
- ib_cpu = ib_result.cpu;
+ ib_handle = ib_result_handle;
+ ib_mc_address = ib_result_mc_address;
+ ib_cpu = ib_result_cpu;
return CUE_SUCCESS;
}
@@ -87,7 +94,7 @@ int suite_cs_tests_clean(void)
{
int r;
- r = amdgpu_cs_free_ib(ib_handle);
+ r = amdgpu_bo_free(ib_handle);
if (r)
return CUE_SCLEAN_FAILED;
@@ -104,14 +111,13 @@ int suite_cs_tests_clean(void)
static int submit(unsigned ndw, unsigned ip)
{
- struct amdgpu_cs_ib_alloc_result ib_result = {0};
struct amdgpu_cs_request ibs_request = {0};
struct amdgpu_cs_ib_info ib_info = {0};
struct amdgpu_cs_query_fence fence_status = {0};
uint32_t expired;
int r;
- ib_info.ib_handle = ib_handle;
+ ib_info.ib_mc_address = ib_mc_address;
ib_info.size = ndw;
ibs_request.ip_type = ip;
@@ -133,13 +139,6 @@ static int submit(unsigned ndw, unsigned ip)
if (r)
return r;
- r = amdgpu_cs_alloc_ib(context_handle, IB_SIZE, &ib_result);
- if (r)
- return r;
-
- ib_handle = ib_result.handle;
- ib_cpu = ib_result.cpu;
-
fence_status.context = context_handle;
fence_status.timeout_ns = AMDGPU_TIMEOUT_INFINITE;
fence_status.ip_type = ip;
@@ -186,6 +185,7 @@ static void amdgpu_cs_uvd_create(void)
num_resources = 0;
resources[num_resources++] = res.buf_handle;
+ resources[num_resources++] = ib_handle;
i = 0;
uvd_cmd(res.virtual_mc_base_address, 0x0, &i);
@@ -247,6 +247,7 @@ static void amdgpu_cs_uvd_decode(void)
num_resources = 0;
resources[num_resources++] = res.buf_handle;
+ resources[num_resources++] = ib_handle;
msg_addr = res.virtual_mc_base_address;
fb_addr = msg_addr + 4*1024;
@@ -311,6 +312,7 @@ static void amdgpu_cs_uvd_destroy(void)
num_resources = 0;
resources[num_resources++] = res.buf_handle;
+ resources[num_resources++] = ib_handle;
i = 0;
uvd_cmd(res.virtual_mc_base_address, 0x0, &i);
diff --git a/tests/amdgpu/vce_tests.c b/tests/amdgpu/vce_tests.c
index aaa29f6e..70e378cd 100644
--- a/tests/amdgpu/vce_tests.c
+++ b/tests/amdgpu/vce_tests.c
@@ -35,7 +35,7 @@
#include "vce_ib.h"
#include "frame.h"
-#define IB_SIZE amdgpu_cs_ib_size_4K
+#define IB_SIZE 4096
#define MAX_RESOURCES 16
struct amdgpu_vce_bo {
@@ -61,12 +61,13 @@ static uint32_t minor_version;
static uint32_t family_id;
static amdgpu_context_handle context_handle;
-static amdgpu_ib_handle ib_handle;
-uint32_t *ib_cpu;
+static amdgpu_bo_handle ib_handle;
+static uint64_t ib_mc_address;
+static uint32_t *ib_cpu;
-struct amdgpu_vce_encode enc;
-amdgpu_bo_handle resources[MAX_RESOURCES];
-unsigned num_resources;
+static struct amdgpu_vce_encode enc;
+static amdgpu_bo_handle resources[MAX_RESOURCES];
+static unsigned num_resources;
static void amdgpu_cs_vce_create(void);
static void amdgpu_cs_vce_encode(void);
@@ -81,7 +82,6 @@ CU_TestInfo vce_tests[] = {
int suite_vce_tests_init(void)
{
- struct amdgpu_cs_ib_alloc_result ib_result = {0};
int r;
r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
@@ -95,13 +95,13 @@ int suite_vce_tests_init(void)
if (r)
return CUE_SINIT_FAILED;
- r = amdgpu_cs_alloc_ib(context_handle, IB_SIZE, &ib_result);
+ r = amdgpu_bo_alloc_and_map(device_handle, IB_SIZE, 4096,
+ AMDGPU_GEM_DOMAIN_GTT, 0,
+ &ib_handle, (void**)&ib_cpu,
+ &ib_mc_address);
if (r)
return CUE_SINIT_FAILED;
- ib_handle = ib_result.handle;
- ib_cpu = ib_result.cpu;
-
memset(&enc, 0, sizeof(struct amdgpu_vce_encode));
return CUE_SUCCESS;
@@ -111,7 +111,7 @@ int suite_vce_tests_clean(void)
{
int r;
- r = amdgpu_cs_free_ib(ib_handle);
+ r = amdgpu_bo_free(ib_handle);
if (r)
return CUE_SCLEAN_FAILED;
@@ -128,14 +128,13 @@ int suite_vce_tests_clean(void)
static int submit(unsigned ndw, unsigned ip)
{
- struct amdgpu_cs_ib_alloc_result ib_result = {0};
struct amdgpu_cs_request ibs_request = {0};
struct amdgpu_cs_ib_info ib_info = {0};
struct amdgpu_cs_query_fence fence_status = {0};
uint32_t expired;
int r;
- ib_info.ib_handle = ib_handle;
+ ib_info.ib_mc_address = ib_mc_address;
ib_info.size = ndw;
ibs_request.ip_type = ip;
@@ -157,12 +156,13 @@ static int submit(unsigned ndw, unsigned ip)
if (r)
return r;
- r = amdgpu_cs_alloc_ib(context_handle, IB_SIZE, &ib_result);
+ r = amdgpu_bo_alloc_and_map(device_handle, IB_SIZE, 4096,
+ AMDGPU_GEM_DOMAIN_GTT, 0,
+ &ib_handle, (void**)&ib_cpu,
+ &ib_mc_address);
if (r)
return r;
-
- ib_handle = ib_result.handle;
- ib_cpu = ib_result.cpu;
+ resources[num_resources-1] = ib_handle;
fence_status.context = context_handle;
fence_status.timeout_ns = AMDGPU_TIMEOUT_INFINITE;
@@ -204,6 +204,7 @@ static void amdgpu_cs_vce_create(void)
num_resources = 0;
alloc_resource(&enc.fb[0], 4096, AMDGPU_GEM_DOMAIN_GTT);
resources[num_resources++] = enc.fb[0].handle;
+ resources[num_resources++] = ib_handle;
len = 0;
memcpy(ib_cpu, vce_session, sizeof(vce_session));
@@ -374,7 +375,6 @@ static void amdgpu_cs_vce_encode(void)
vbuf_size = enc.width * enc.height * 1.5;
cpb_size = vbuf_size * 10;
-
num_resources = 0;
alloc_resource(&enc.fb[0], 4096, AMDGPU_GEM_DOMAIN_GTT);
resources[num_resources++] = enc.fb[0].handle;
@@ -388,6 +388,7 @@ static void amdgpu_cs_vce_encode(void)
resources[num_resources++] = enc.vbuf.handle;
alloc_resource(&enc.cpb, cpb_size, AMDGPU_GEM_DOMAIN_VRAM);
resources[num_resources++] = enc.cpb.handle;
+ resources[num_resources++] = ib_handle;
r = amdgpu_bo_cpu_map(enc.vbuf.handle, (void **)&enc.vbuf.ptr);
CU_ASSERT_EQUAL(r, 0);
@@ -426,7 +427,7 @@ static void amdgpu_cs_vce_encode(void)
check_result(&enc);
}
- for (i = 0; i < num_resources; ++i) {
+ for (i = 0; i < num_resources-1; ++i) {
r = amdgpu_bo_free(resources[i]);
CU_ASSERT_EQUAL(r, 0);
}
@@ -439,6 +440,7 @@ static void amdgpu_cs_vce_destroy(void)
num_resources = 0;
alloc_resource(&enc.fb[0], 4096, AMDGPU_GEM_DOMAIN_GTT);
resources[num_resources++] = enc.fb[0].handle;
+ resources[num_resources++] = ib_handle;
len = 0;
memcpy(ib_cpu, vce_session, sizeof(vce_session));