diff options
author | Oded Gabbay <oded.gabbay@gmail.com> | 2015-10-04 10:23:30 +0300 |
---|---|---|
committer | Oded Gabbay <oded.gabbay@gmail.com> | 2015-10-04 10:58:11 +0300 |
commit | c214a227dc9af42050f44c7f55498f37449dd574 (patch) | |
tree | b246f90ff6e0f0405097eac1a74b72de07896098 /hsakmt/include | |
parent | 369325d9d551eb8f16279eb4c562c05223b4d5bc (diff) |
restore original paths
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Diffstat (limited to 'hsakmt/include')
-rw-r--r-- | hsakmt/include/hsakmt.h | 577 | ||||
-rw-r--r-- | hsakmt/include/hsakmttypes.h | 909 | ||||
-rw-r--r-- | hsakmt/include/linux/kfd_ioctl.h | 292 |
3 files changed, 1778 insertions, 0 deletions
diff --git a/hsakmt/include/hsakmt.h b/hsakmt/include/hsakmt.h new file mode 100644 index 0000000..c87b3f8 --- /dev/null +++ b/hsakmt/include/hsakmt.h @@ -0,0 +1,577 @@ +/* + * Copyright © 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including + * the next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _HSAKMT_H_ +#define _HSAKMT_H_ + +#include "hsakmttypes.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + "Opens" the HSA kernel driver for user-kernel mode communication. + + On Windows, this function gets a handle to the KFD's AMDKFDIO device object that + is responsible for user-kernel communication, this handle is used internally by + the thunk library to send device I/O control to the HSA kernel driver. + No other thunk library function may be called unless the user-kernel communication + channel is opened first. + + On Linux this call opens the "/dev/kfd" device file to establish a communication + path to the kernel. +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtOpenKFD( void ); + +/** + "Closes" the user-kernel communication path. + + On Windows, the handle obtained by the hsaKmtOpenKFD() function is closed; + no other communication with the kernel driver is possible after the successful + execution of the saKmdCloseKFD() function. Depending on the failure reason, + the user-kernel communication path may or may not be still active. + + On Linux the function closes the "dev/kfd" device file. + No further communication to the kernel driver is allowed until hsaKmtOpenKFD() + function is called again. +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtCloseKFD( void ); + + +/** + Returns the user-kernel interface version supported by KFD. + Higher major numbers usually add new features to KFD and may break user-kernel + compatibility; higher minor numbers define additional functionality associated + within a major number. + The calling software should validate that it meets the minimum interface version + as described in the API specification. +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtGetVersion( + HsaVersionInfo* VersionInfo //OUT + ); + +/** + The function takes a "snapshot" of the topology information within the KFD + to avoid any changes during the enumeration process. +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtAcquireSystemProperties( + HsaSystemProperties* SystemProperties //OUT + ); + +/** + Releases the topology "snapshot" taken by hsaKmtAcquireSystemProperties() +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtReleaseSystemProperties( void ) ; + +/** + Retrieves the discoverable sub-properties for a given HSA + node. The parameters returned allow the application or runtime to size the + management structures necessary to store the information. +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtGetNodeProperties( + HSAuint32 NodeId, //IN + HsaNodeProperties* NodeProperties //OUT + ); + +/** + Retrieves the memory properties of a specific HSA node. + the memory pointer passed as MemoryProperties is sized as + NumBanks * sizeof(HsaMemoryProperties). NumBanks is retrieved with the + hsaKmtGetNodeProperties() call. + + Some of the data returned is optional. Not all implementations may return all + parameters in the hsaMemoryProperties. +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtGetNodeMemoryProperties( + HSAuint32 NodeId, //IN + HSAuint32 NumBanks, //IN + HsaMemoryProperties* MemoryProperties //OUT + ); + +/** + Retrieves the cache properties of a specific HSA node and processor ID. + ProcessorID refers to either a CPU core or a SIMD unit as enumerated earlier + via the hsaKmtGetNodeProperties() call. + The memory pointer passed as CacheProperties is sized as + NumCaches * sizeof(HsaCacheProperties). NumCaches is retrieved with the + hsaKmtGetNodeProperties() call. + + The data returned is optional. Not all implementations may return all + parameters in the CacheProperties. +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtGetNodeCacheProperties( + HSAuint32 NodeId, //IN + HSAuint32 ProcessorId, //IN + HSAuint32 NumCaches, //IN + HsaCacheProperties* CacheProperties //OUT + ); + +/** + Retrieves the HSA IO affinity properties of a specific HSA node. + the memory pointer passed as Properties is sized as + NumIoLinks * sizeof(HsaIoLinkProperties). NumIoLinks is retrieved with the + hsaKmtGetNodeProperties() call. + + The data returned is optional. Not all implementations may return all + parameters in the IoLinkProperties. +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtGetNodeIoLinkProperties( + HSAuint32 NodeId, //IN + HSAuint32 NumIoLinks, //IN + HsaIoLinkProperties* IoLinkProperties //OUT + ); + + + +/** + Creates an operating system event associated with a HSA event ID +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtCreateEvent( + HsaEventDescriptor* EventDesc, //IN + bool ManualReset, //IN + bool IsSignaled, //IN + HsaEvent** Event //OUT + ); + +/** + Destroys an operating system event associated with a HSA event ID +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtDestroyEvent( + HsaEvent* Event //IN + ); + +/** + Sets the specified event object to the signaled state +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtSetEvent( + HsaEvent* Event //IN + ); + +/** + Sets the specified event object to the non-signaled state +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtResetEvent( + HsaEvent* Event //IN + ); + +/** + Queries the state of the specified event object +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtQueryEventState( + HsaEvent* Event //IN + ); + +/** + Checks the current state of the event object. If the object's state is + nonsignaled, the calling thread enters the wait state. + + The function returns when one of the following occurs: +- The specified event object is in the signaled state. +- The time-out interval elapses. +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtWaitOnEvent( + HsaEvent* Event, //IN + HSAuint32 Milliseconds //IN + ); + +/** + Checks the current state of multiple event objects. + + The function returns when one of the following occurs: +- Either any one or all of the specified objects are in the signaled state + - if "WaitOnAll" is "true" the function returns when the state of all + objects in array is signaled + - if "WaitOnAll" is "false" the function returns when the state of any + one of the objects is set to signaled +- The time-out interval elapses. +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtWaitOnMultipleEvents( + HsaEvent* Events[], //IN + HSAuint32 NumEvents, //IN + bool WaitOnAll, //IN + HSAuint32 Milliseconds //IN + ); + +/** + new TEMPORARY function definition - to be used only on "Triniti + Southern Islands" platform + If used on other platforms the function will return HSAKMT_STATUS_ERROR +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtReportQueue( + HSA_QUEUEID QueueId, //IN + HsaQueueReport* QueueReport //OUT + ); + +/** + Creates a GPU queue with user-mode access rights +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtCreateQueue( + HSAuint32 NodeId, //IN + HSA_QUEUE_TYPE Type, //IN + HSAuint32 QueuePercentage, //IN + HSA_QUEUE_PRIORITY Priority, //IN + void* QueueAddress, //IN + HSAuint64 QueueSizeInBytes, //IN + HsaEvent* Event, //IN + HsaQueueResource* QueueResource //OUT + ); + +/** + Updates a queue +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtUpdateQueue( + HSA_QUEUEID QueueId, //IN + HSAuint32 QueuePercentage,//IN + HSA_QUEUE_PRIORITY Priority, //IN + void* QueueAddress, //IN + HSAuint64 QueueSize, //IN + HsaEvent* Event //IN + ); + +/** + Destroys a queue +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtDestroyQueue( + HSA_QUEUEID QueueId //IN + ); + +/** + Allows an HSA process to set/change the default and alternate memory coherency, before starting to dispatch. +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtSetMemoryPolicy( + HSAuint32 Node, //IN + HSAuint32 DefaultPolicy, //IN + HSAuint32 AlternatePolicy, //IN + void* MemoryAddressAlternate, //IN (page-aligned) + HSAuint64 MemorySizeInBytes //IN (page-aligned) + ); +/** + Allocates a memory buffer that may be accessed by the GPU +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtAllocMemory( + HSAuint32 PreferredNode, //IN + HSAuint64 SizeInBytes, //IN (multiple of page size) + HsaMemFlags MemFlags, //IN + void** MemoryAddress //OUT (page-aligned) + ); + +/** + Frees a memory buffer +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtFreeMemory( + void* MemoryAddress, //IN (page-aligned) + HSAuint64 SizeInBytes //IN + ); + +/** + Registers with KFD a memory buffer that may be accessed by the GPU + This function will never be required for Linux +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtRegisterMemory( + void* MemoryAddress, //IN (page-aligned) + HSAuint64 MemorySizeInBytes //IN (page-aligned) + ); + + +/** + Unregisters with KFD a memory buffer + This function will never be required for Linux +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtDeregisterMemory( + void* MemoryAddress //IN + ); + + +/** + Ensures that the memory is resident and can be accessed by GPU +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtMapMemoryToGPU( + void* MemoryAddress, //IN (page-aligned) + HSAuint64 MemorySizeInBytes, //IN (page-aligned) + HSAuint64* AlternateVAGPU //OUT (page-aligned) + ); + +/** + Releases the residency of the memory +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtUnmapMemoryToGPU( + void* MemoryAddress //IN (page-aligned) + ); + + +/** + Notifies the kernel driver that a process wants to use GPU debugging facilities +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtDbgRegister( + HSAuint32 NodeId //IN + ); + +/** + Detaches the debugger process from the HW debug established by hsaKmtDbgRegister() API +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtDbgUnregister( + HSAuint32 NodeId //IN + ); + +/** + Controls a wavefront +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtDbgWavefrontControl( + HSAuint32 NodeId, //IN + HSA_DBG_WAVEOP Operand, //IN + HSA_DBG_WAVEMODE Mode, //IN + HSAuint32 TrapId, //IN + HsaDbgWaveMessage* DbgWaveMsgRing //IN + ); + +/** + Sets watch points on memory address ranges to generate exception events when the + watched addresses are accessed +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtDbgAddressWatch( + HSAuint32 NodeId, //IN + HSAuint32 NumWatchPoints, //IN + HSA_DBG_WATCH_MODE WatchMode[], //IN + void* WatchAddress[], //IN + HSAuint64 WatchMask[], //IN, optional + HsaEvent* WatchEvent[] //IN, optional + ); + +/** + Gets GPU and CPU clock counters for particular Node +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtGetClockCounters( + HSAuint32 NodeId, //IN + HsaClockCounters* Counters //OUT + ); + +/** + Retrieves information on the available HSA counters +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtPmcGetCounterProperties( + HSAuint32 NodeId, //IN + HsaCounterProperties** CounterProperties //OUT + ); + +/** + Registers a set of (HW) counters to be used for tracing/profiling +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtPmcRegisterTrace( + HSAuint32 NodeId, //IN + HSAuint32 NumberOfCounters, //IN + HsaCounter* Counters, //IN + HsaPmcTraceRoot* TraceRoot //OUT + ); + +/** + Unregisters a set of (HW) counters used for tracing/profiling +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtPmcUnregisterTrace( + HSAuint32 NodeId, //IN + HSATraceId TraceId //IN + ); + +/** + Allows a user mode process to get exclusive access to the defined set of (HW) counters + used for tracing/profiling +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtPmcAcquireTraceAccess( + HSAuint32 NodeId, //IN + HSATraceId TraceId //IN + ); + +/** + Allows a user mode process to release exclusive access to the defined set of (HW) counters + used for tracing/profiling +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtPmcReleaseTraceAccess( + HSAuint32 NodeId, //IN + HSATraceId TraceId //IN + ); + +/** + Starts tracing operation on a previously established set of performance counters +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtPmcStartTrace( + HSATraceId TraceId, //IN + void* TraceBuffer, //IN (page aligned) + HSAuint64 TraceBufferSizeBytes //IN (page aligned) + ); + +/** + Forces an update of all the counters that a previously started trace operation has registered +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtPmcQueryTrace( + HSATraceId TraceId //IN + ); + +/** + Stops tracing operation on a previously established set of performance counters +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtPmcStopTrace( + HSATraceId TraceId //IN + ); + +/** + Sets trap handler and trap buffer to be used for all queues associated with the specified NodeId within this process context +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtSetTrapHandler( + HSAuint32 NodeId, //IN + void* TrapHandlerBaseAddress, //IN + HSAuint64 TrapHandlerSizeInBytes, //IN + void* TrapBufferBaseAddress, //IN + HSAuint64 TrapBufferSizeInBytes //IN + ); + +#ifdef __cplusplus +} //extern "C" +#endif + +#endif //_HSAKMT_H_ + diff --git a/hsakmt/include/hsakmttypes.h b/hsakmt/include/hsakmttypes.h new file mode 100644 index 0000000..a7e0a81 --- /dev/null +++ b/hsakmt/include/hsakmttypes.h @@ -0,0 +1,909 @@ +/* + * Copyright © 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including + * the next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _HSAKMTTYPES_H_ +#define _HSAKMTTYPES_H_ + +//the definitions and THUNK API are version specific - define the version numbers here +#define HSAKMT_VERSION_MAJOR 0 +#define HSAKMT_VERSION_MINOR 99 + + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(_WIN64) || defined(_WINDOWS) || defined(_WIN32) + + #if defined(_WIN32) + #define HSAKMTAPI __stdcall + #else + #define HSAKMTAPI + #endif + + typedef unsigned char HSAuint8; + typedef char HSAint8; + typedef unsigned short HSAuint16; + typedef signed short HSAint16; + typedef unsigned __int32 HSAuint32; + typedef signed __int64 HSAint64; + typedef unsigned __int64 HSAuint64; + +#elif defined(__linux__) + +#include <stdbool.h> +#include <stdint.h> + + #define HSAKMTAPI + + typedef uint8_t HSAuint8; + typedef int8_t HSAint8; + typedef uint16_t HSAuint16; + typedef int16_t HSAint16; + typedef uint32_t HSAuint32; + typedef int64_t HSAint64; + typedef uint64_t HSAuint64; + +#endif + +typedef void* HSA_HANDLE; +typedef HSAuint64 HSA_QUEUEID; + +// This is included in order to force the alignments to be 4 bytes so that +// it avoids extra padding added by the compiler when a 64-bit binary is generated. +#pragma pack(push, hsakmttypes_h, 4) + +// +// HSA STATUS codes returned by the KFD Interfaces +// + +typedef enum _HSAKMT_STATUS +{ + HSAKMT_STATUS_SUCCESS = 0, // Operation successful + HSAKMT_STATUS_ERROR = 1, // General error return if not otherwise specified + HSAKMT_STATUS_DRIVER_MISMATCH = 2, // User mode component is not compatible with kernel HSA driver + + HSAKMT_STATUS_INVALID_PARAMETER = 3, // KFD identifies input parameters invalid + HSAKMT_STATUS_INVALID_HANDLE = 4, // KFD identifies handle parameter invalid + HSAKMT_STATUS_INVALID_NODE_UNIT = 5, // KFD identifies node or unit parameter invalid + + HSAKMT_STATUS_NO_MEMORY = 6, // No memory available (when allocating queues or memory) + HSAKMT_STATUS_BUFFER_TOO_SMALL = 7, // A buffer needed to handle a request is too small + + HSAKMT_STATUS_NOT_IMPLEMENTED = 10, // KFD function is not implemented for this set of paramters + HSAKMT_STATUS_NOT_SUPPORTED = 11, // KFD function is not supported on this node + HSAKMT_STATUS_UNAVAILABLE = 12, // KFD function is not available currently on this node (but + // may be at a later time) + + HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED = 20, // KFD driver path not opened + HSAKMT_STATUS_KERNEL_COMMUNICATION_ERROR = 21, // user-kernel mode communication failure + HSAKMT_STATUS_KERNEL_ALREADY_OPENED = 22, // KFD driver path already opened + HSAKMT_STATUS_HSAMMU_UNAVAILABLE = 23, // ATS/PRI 1.1 (Address Translation Services) not available + // (IOMMU driver not installed or not-available) + + HSAKMT_STATUS_WAIT_FAILURE = 30, // The wait operation failed + HSAKMT_STATUS_WAIT_TIMEOUT = 31, // The wait operation timed out + + HSAKMT_STATUS_MEMORY_ALREADY_REGISTERED = 35, // Memory buffer already registered + HSAKMT_STATUS_MEMORY_NOT_REGISTERED = 36, // Memory buffer not registered + HSAKMT_STATUS_MEMORY_ALIGNMENT = 37, // Memory parameter not aligned + +} HSAKMT_STATUS; + +// +// HSA KFD interface version information. Calling software has to validate that it meets +// the minimum interface version as described in the API specification. +// All future structures will be extended in a backward compatible fashion. +// + +typedef struct _HsaVersionInfo +{ + HSAuint32 KernelInterfaceMajorVersion; // supported kernel interface major version + HSAuint32 KernelInterfaceMinorVersion; // supported kernel interface minor version +} HsaVersionInfo; + +// +// HSA Topology Discovery Infrastructure structure definitions. +// The infrastructure implementation is based on design specified in the Kernel HSA Driver ADD +// The discoverable data is retrieved from ACPI structures in the platform infrastructure, as defined +// in the "Heterogeneous System Architecture Detail Topology" specification. +// +// The following structure is returned on a call to hsaKmtAcquireSystemProperties() as output. +// When the call is made within a process context, a "snapshot" of the topology information +// is taken within the KFD to avoid any changes during the enumeration process. +// The Snapshot is released when hsaKmtReleaseSystemProperties() is called +// or when the process exits or is terminated. +// + +typedef struct _HsaSystemProperties +{ + HSAuint32 NumNodes; // the number of "H-NUMA" memory nodes. + // each node represents a discoverable node of the system + // All other enumeration is done on a per-node basis + + HSAuint32 PlatformOem; // identifies HSA platform, reflects the OEMID in the CRAT + HSAuint32 PlatformId; // HSA platform ID, reflects OEM TableID in the CRAT + HSAuint32 PlatformRev; // HSA platform revision, reflects Platform Table Revision ID +} HsaSystemProperties; + + +typedef union +{ + HSAuint32 Value; + struct + { + unsigned int HotPluggable : 1; // the node may be removed by some system action + // (event will be sent) + unsigned int HSAMMUPresent : 1; // This node has an ATS/PRI 1.1 compatible + // translation agent in the system (e.g. IOMMUv2) + unsigned int SharedWithGraphics : 1; // this HSA nodes' GPU function is also used for OS primary + // graphics render (= UI) + unsigned int QueueSizePowerOfTwo : 1; // This node GPU requires the queue size to be a power of 2 value + unsigned int QueueSize32bit : 1; // This node GPU requires the queue size to be less than 4GB + unsigned int QueueIdleEvent : 1; // This node GPU supports notification on Queue Idle + unsigned int VALimit : 1; // This node GPU has limited VA range for platform + // (typical 40bit). Affects shared VM use for 64bit apps + unsigned int WatchPointsSupported: 1; // Indicates if Watchpoints are available on the node. + unsigned int WatchPointsTotalBits: 4; // ld(Watchpoints) available. To determine the number use 2^value + + unsigned int DoorbellType : 2; // 0: This node has pre-1.0 doorbell characteristic + // 1: This node has 1.0 doorbell characteristic + // 2,3: reserved for future use + unsigned int Reserved : 18; + } ui32; +} HSA_CAPABILITY; + + +// +// HSA node properties. This structure is an output parameter of hsaKmtGetNodeProperties() +// The application or runtime can use the information herein to size the topology management structures +// Unless there is some very weird setup, there is at most one "GPU" device (with a certain number +// of throughput compute units (= SIMDs) associated with a H-NUMA node. +// + +#define HSA_PUBLIC_NAME_SIZE 128 + +typedef struct _HsaNodeProperties +{ + HSAuint32 NumCPUCores; // # of latency (= CPU) cores present on this HSA node. + // This value is 0 for a HSA node with no such cores, + // e.g a "discrete HSA GPU" + HSAuint32 NumFComputeCores; // # of HSA throughtput (= GPU) FCompute cores ("SIMD") present in a node. + // This value is 0 if no FCompute cores are present (e.g. pure "CPU node"). + HSAuint32 NumMemoryBanks; // # of discoverable memory bank affinity properties on this "H-NUMA" node. + HSAuint32 NumCaches; // # of discoverable cache affinity properties on this "H-NUMA" node. + + HSAuint32 NumIOLinks; // # of discoverable IO link affinity properties of this node + // connecting to other nodes. + + HSAuint32 CComputeIdLo; // low value of the logical processor ID of the latency (= CPU) + // cores available on this node + HSAuint32 FComputeIdLo; // low value of the logical processor ID of the throughput (= GPU) + // units available on this node + + HSA_CAPABILITY Capability; // see above + + HSAuint32 MaxWavesPerSIMD; // This identifies the max. number of launched waves per SIMD. + // If NumFComputeCores is 0, this value is ignored. + HSAuint32 LDSSizeInKB; // Size of Local Data Store in Kilobytes per SIMD Wavefront + HSAuint32 GDSSizeInKB; // Size of Global Data Store in Kilobytes shared across SIMD Wavefronts + + HSAuint32 WaveFrontSize; // Number of SIMD cores per wavefront executed, typically 64, + // may be 32 or a different value for some HSA based architectures + + HSAuint32 NumShaderBanks; // Number of Shader Banks or Shader Engines, typical values are 1 or 2 + + + HSAuint32 NumArrays; // Number of SIMD arrays per engine + HSAuint32 NumCUPerArray; // Number of Compute Units (CU) per SIMD array + HSAuint32 NumSIMDPerCU; // Number of SIMD representing a Compute Unit (CU) + + HSAuint32 MaxSlotsScratchCU; // Number of temp. memory ("scratch") wave slots available to access, + // may be 0 if HW has no restrictions + + HSAuint32 EngineId; // Identifier (rev) of teh GPU uEngine or Firmware, may be 0 + + HSAuint16 VendorId; // GPU vendor id; 0 on latency (= CPU)-only nodes + HSAuint16 DeviceId; // GPU device id; 0 on latency (= CPU)-only nodes + + HSAuint32 LocationId; // GPU BDF (Bus/Device/function number) - identifies the device + // location in the overall system + HSAuint64 LocalMemSize; // Local memory size + HSAuint32 MaxEngineClockMhzFCompute; // maximum engine clocks for CPU and + HSAuint32 MaxEngineClockMhzCCompute; // GPU function, including any boost caopabilities, + + HSAuint16 MarketingName[HSA_PUBLIC_NAME_SIZE]; // Public name of the "device" on the node (board or APU name). + // Unicode string +} HsaNodeProperties; + + +typedef enum _HSA_HEAPTYPE +{ + HSA_HEAPTYPE_SYSTEM = 0, + HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC = 1, // CPU "visible" part of GPU device local memory (for discrete GPU) + HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE = 2, // CPU "invisible" part of GPU device local memory (for discrete GPU) + // All HSA accessible memory is per definition "CPU visible" + // "Private memory" is relevant for graphics interop only. + HSA_HEAPTYPE_GPU_GDS = 3, // GPU internal memory (GDS) + HSA_HEAPTYPE_GPU_LDS = 4, // GPU internal memory (LDS) + HSA_HEAPTYPE_GPU_SCRATCH = 5, // GPU special memory (scratch) + + HSA_HEAPTYPE_NUMHEAPTYPES, + HSA_HEAPTYPE_SIZE = 0xFFFFFFFF +} HSA_HEAPTYPE; + +typedef union +{ + HSAuint32 MemoryProperty; + struct + { + unsigned int HotPluggable : 1; // the memory may be removed by some system action, + // memory should be used for temporary data + unsigned int NonVolatile : 1; // memory content is preserved across a power-off cycle. + unsigned int Reserved :30; + } ui32; +} HSA_MEMORYPROPERTY; + + +// +// Discoverable HSA Memory properties. +// The structure is the output parameter of the hsaKmtGetNodeMemoryProperties() function +// + +typedef struct _HsaMemoryProperties +{ + HSA_HEAPTYPE HeapType; // system or frame buffer, + union + { + HSAuint64 SizeInBytes; // physical memory size of the memory range in bytes + struct + { + HSAuint32 SizeInBytesLow; // physical memory size of the memory range in bytes (lower 32bit) + HSAuint32 SizeInBytesHigh; // physical memory size of the memory range in bytes (higher 32bit) + } ui32; + }; + HSA_MEMORYPROPERTY Flags; // See definitions above + + HSAuint32 Width; // memory width - the number of parallel bits of the memory interface + HSAuint32 MemoryClockMax; // memory clock for the memory, this allows computing the available bandwidth + // to the memory when needed + HSAuint64 VirtualBaseAddress; // if set to value != 0, indicates the virtual base address of the memory + // in process virtual space +} HsaMemoryProperties; + +// +// Discoverable Cache Properties. (optional). +// The structure is the output parameter of the hsaKmtGetNodeMemoryProperties() function +// Any of the parameters may be 0 (= not defined) +// + +#define HSA_CPU_SIBLINGS 256 +#define HSA_PROCESSORID_ALL 0xFFFFFFFF + +typedef union +{ + HSAuint32 Value; + struct + { + unsigned int Data : 1; + unsigned int Instruction : 1; + unsigned int CPU : 1; + unsigned int HSACU : 1; + unsigned int Reserved :28; + } ui32; +} HsaCacheType; + +typedef struct _HaCacheProperties +{ + HSAuint32 ProcessorIdLow; // Identifies the processor number + + HSAuint32 CacheLevel; // Integer representing level: 1, 2, 3, 4, etc + HSAuint32 CacheSize; // Size of the cache + HSAuint32 CacheLineSize; // Cache line size in bytes + HSAuint32 CacheLinesPerTag; // Cache lines per Cache Tag + HSAuint32 CacheAssociativity; // Cache Associativity + HSAuint32 CacheLatency; // Cache latency in ns + HsaCacheType CacheType; + HSAuint32 SiblingMap[HSA_CPU_SIBLINGS]; +} HsaCacheProperties; + + +// +// Discoverable CPU Compute Properties. (optional). +// The structure is the output parameter of the hsaKmtGetCComputeProperties() function +// Any of the parameters may be 0 (= not defined) +// + +typedef struct _HsaCComputeProperties +{ + HSAuint32 SiblingMap[HSA_CPU_SIBLINGS]; +} HsaCComputeProperties; + +// +// Discoverable IoLink Properties (optional). +// The structure is the output parameter of the hsaKmtGetIoLinkProperties() function. +// Any of the parameters may be 0 (= not defined) +// + +typedef enum _HSA_IOLINKTYPE { + HSA_IOLINKTYPE_UNDEFINED = 0, + HSA_IOLINKTYPE_HYPERTRANSPORT = 1, + HSA_IOLINKTYPE_PCIEXPRESS = 2, + HSA_IOLINKTYPE_AMBA = 3, + HSA_IOLINKTYPE_MIPI = 4, + HSA_IOLINKTYPE_OTHER = 5, + HSA_IOLINKTYPE_NUMIOLINKTYPES, + HSA_IOLINKTYPE_SIZE = 0xFFFFFFFF +} HSA_IOLINKTYPE; + +typedef union +{ + HSAuint32 LinkProperty; + struct + { + unsigned int Override : 1; // bus link properties are determined by this structure + // not by the HSA_IOLINKTYPE. The other flags are valid + // only if this bit is set to one + unsigned int NonCoherent : 1; // The link doesn't support coherent transactions + // memory accesses across must not be set to "host cacheable"! + unsigned int NoAtomics32bit : 1; // The link doesn't support 32bit-wide atomic transactions + unsigned int NoAtomics64bit : 1; // The link doesn't support 64bit-wide atomic transactions + unsigned int Reserved :28; + } ui32; +} HSA_LINKPROPERTY; + + +typedef struct _HsaIoLinkProperties +{ + HSA_IOLINKTYPE IoLinkType; // see above + HSAuint32 VersionMajor; // Bus interface version (optional) + HSAuint32 VersionMinor; // Bus interface version (optional) + + HSAuint32 NodeFrom; // + HSAuint32 NodeTo; // + + HSAuint32 Weight; // weight factor (derived from CDIT) + + HSAuint32 MinimumLatency; // minimum cost of time to transfer (rounded to ns) + HSAuint32 MaximumLatency; // maximum cost of time to transfer (rounded to ns) + HSAuint32 MinimumBandwidth; // minimum interface Bandwidth in MB/s + HSAuint32 MaximumBandwidth; // maximum interface Bandwidth in MB/s + HSAuint32 RecTransferSize; // recommended transfer size to reach maximum bandwidth in Bytes + HSA_LINKPROPERTY Flags; // override flags (may be active for specific platforms) +} HsaIoLinkProperties; + +// +// Memory allocation definitions for the KFD HSA interface +// + +typedef struct _HsaMemFlags +{ + union + { + struct + { + unsigned int NonPaged : 1; // default = 0: pageable memory + unsigned int CachePolicy : 2; // see HSA_CACHING_TYPE + unsigned int ReadOnly : 1; // default = 0: Read/Write memory + unsigned int PageSize : 2; // see HSA_PAGE_SIZE + unsigned int HostAccess : 1; // default = 0: GPU access only + unsigned int NoSubstitute: 1; // default = 0: if specific memory is not available on node (e.g. on + // discrete GPU local), allocation may fall back to system memory node 0 + // memory (= always available). Otherwise no allocation is possible. + unsigned int GDSMemory : 1; // default = 0: If set, the allocation will occur in GDS heap. + // HostAccess must be 0, all other flags (except NoSubstitute) should + // be 0 when setting this entry to 1. GDS allocation may fail due to + // limited resources. Application code is required to work without + // any allocated GDS memory using regular memory. + // Allocation fails on any node without GPU function. + unsigned int Scratch : 1; // default = 0: If set, the allocation will occur in GPU "scratch area". + // HostAccess must be 0, all other flags (except NoSubstitute) should be 0 + // when setting this entry to 1. Scratch allocation may fail due to limited + // resources. Application code is required to work without any allocation. + // Allocation fails on any node without GPU function. + unsigned int AtomicAccessFull: 1; // default = 0: If set, the memory will be allocated and mapped to allow + // atomic ops processing. On AMD APU, this will use the ATC path on system + // memory, irrespective of the NonPaged flag setting (= if NonPaged is set, + // the memory is pagelocked but mapped through IOMMUv2 instead of GPUVM). + // All atomic ops must be supported on this memory. + unsigned int AtomicAccessPartial: 1; // default = 0: See above for AtomicAccessFull description, however + // focused on AMD discrete GPU that support PCIe atomics; the memory + // allocation is mapped to allow for PCIe atomics to operate on system + // memory, irrespective of NonPaged set or the presence of an ATC path + // in the system. The atomic operations supported are limited to SWAP, + // CompareAndSwap (CAS) and FetchAdd (this PCIe op allows both atomic + // increment and decrement via 2-complement arithmetic), which are the + // only atomic ops directly supported in PCI Express. + // On AMD APU, setting this flag will allocate the same type of memory + // as AtomicAccessFull, but it will be considered compatible with + // discrete GPU atomic operations access. + unsigned int ExecuteAccess: 1; // default = 0: Identifies if memory is primarily used for data or accessed + // for executable code (e.g. queue memory) by the host CPU or the device. + // Influences the page attribute setting within the allocation + unsigned int Reserved : 19; + + } ui32; + HSAuint32 Value; + }; +} HsaMemFlags; + +typedef enum _HSA_CACHING_TYPE +{ + HSA_CACHING_CACHED = 0, + HSA_CACHING_NONCACHED = 1, + HSA_CACHING_WRITECOMBINED = 2, + HSA_CACHING_RESERVED = 3, + HSA_CACHING_NUM_CACHING, + HSA_CACHING_SIZE = 0xFFFFFFFF +} HSA_CACHING_TYPE; + +typedef enum _HSA_PAGE_SIZE +{ + HSA_PAGE_SIZE_4KB = 0, + HSA_PAGE_SIZE_64KB = 1, //64KB pages, not generally available in systems + HSA_PAGE_SIZE_2MB = 2, + HSA_PAGE_SIZE_1GB = 3, //1GB pages, not generally available in systems +} HSA_PAGE_SIZE; + + +typedef enum _HSA_DEVICE +{ + HSA_DEVICE_CPU = 0, + HSA_DEVICE_GPU = 1, + MAX_HSA_DEVICE = 2 +} HSA_DEVICE; + + +typedef enum _HSA_QUEUE_PRIORITY +{ + HSA_QUEUE_PRIORITY_MINIMUM = -3, + HSA_QUEUE_PRIORITY_LOW = -2, + HSA_QUEUE_PRIORITY_BELOW_NORMAL = -1, + HSA_QUEUE_PRIORITY_NORMAL = 0, + HSA_QUEUE_PRIORITY_ABOVE_NORMAL = 1, + HSA_QUEUE_PRIORITY_HIGH = 2, + HSA_QUEUE_PRIORITY_MAXIMUM = 3, + HSA_QUEUE_PRIORITY_NUM_PRIORITY, + HSA_QUEUE_PRIORITY_SIZE = 0xFFFFFFFF +} HSA_QUEUE_PRIORITY; + +typedef enum _HSA_QUEUE_TYPE +{ + HSA_QUEUE_COMPUTE = 1, // AMD PM4 compatible Compute Queue + HSA_QUEUE_SDMA = 2, // SDMA Queue, used for data transport and format conversion (e.g. (de-)tiling, etc). + HSA_QUEUE_MULTIMEDIA_DECODE = 3, // reserved, for HSA multimedia decode queue + HSA_QUEUE_MULTIMEDIA_ENCODE = 4, // reserved, for HSA multimedia encode queue + + // the following values indicate a queue type permitted to reference OS graphics + // resources through the interoperation API. See [5] "HSA Graphics Interoperation + // specification" for more details on use of such resources. + + HSA_QUEUE_COMPUTE_OS = 11, // AMD PM4 compatible Compute Queue + HSA_QUEUE_SDMA_OS = 12, // SDMA Queue, used for data transport and format conversion (e.g. (de-)tiling, etc). + HSA_QUEUE_MULTIMEDIA_DECODE_OS = 13, // reserved, for HSA multimedia decode queue + HSA_QUEUE_MULTIMEDIA_ENCODE_OS = 14, // reserved, for HSA multimedia encode queue + + HSA_QUEUE_COMPUTE_AQL = 21, // HSA AQL packet compatible Compute Queue + HSA_QUEUE_DMA_AQL = 22, // HSA AQL packet compatible DMA Queue + + // more types in the future + + HSA_QUEUE_TYPE_SIZE = 0xFFFFFFFF //aligns to 32bit enum +} HSA_QUEUE_TYPE; + +typedef struct _HsaQueueResource +{ + HSA_QUEUEID QueueId; /** queue ID */ + /** Doorbell address to notify HW of a new dispatch */ + union + { + HSAuint32* Queue_DoorBell; + HSAuint64* Queue_DoorBell_aql; + HSAuint64 QueueDoorBell; + }; + + /** virtual address to notify HW of queue write ptr value */ + union + { + HSAuint32* Queue_write_ptr; + HSAuint64* Queue_write_ptr_aql; + HSAuint64 QueueWptrValue; + }; + + /** virtual address updated by HW to indicate current read location */ + union + { + HSAuint32* Queue_read_ptr; + HSAuint64* Queue_read_ptr_aql; + HSAuint64 QueueRptrValue; + }; + +} HsaQueueResource; + + +//TEMPORARY structure definition - to be used only on "Triniti + Southern Islands" platform +typedef struct _HsaQueueReport +{ + HSAuint32 VMID; //Required on SI to dispatch IB in primary ring + void* QueueAddress; //virtual address of UM mapped compute ring + HSAuint64 QueueSize; //size of the UM mapped compute ring +} HsaQueueReport; + + + +typedef enum _HSA_DBG_WAVEOP +{ + HSA_DBG_WAVEOP_HALT = 1, //Halts a wavefront + HSA_DBG_WAVEOP_RESUME = 2, //Resumes a wavefront + HSA_DBG_WAVEOP_KILL = 3, //Kills a wavefront + HSA_DBG_WAVEOP_DEBUG = 4, //Causes wavefront to enter debug mode + HSA_DBG_WAVEOP_TRAP = 5, //Causes wavefront to take a trap + HSA_DBG_NUM_WAVEOP = 5, + HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF +} HSA_DBG_WAVEOP; + +typedef enum _HSA_DBG_WAVEMODE +{ + HSA_DBG_WAVEMODE_SINGLE = 0, //send command to a single wave + //Broadcast to all wavefronts of all processes is not supported for HSA user mode + HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2, //send to waves within current process + HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3, //send to waves within current process on CU + HSA_DBG_NUM_WAVEMODE = 3, + HSA_DBG_MAX_WAVEMODE = 0xFFFFFFFF +} HSA_DBG_WAVEMODE; + + +typedef enum _HSA_DBG_WAVEMSG_TYPE +{ + HSA_DBG_WAVEMSG_AUTO = 0, + HSA_DBG_WAVEMSG_USER = 1, + HSA_DBG_WAVEMSG_ERROR = 2, + HSA_DBG_NUM_WAVEMSG, + HSA_DBG_MAX_WAVEMSG = 0xFFFFFFFF +} HSA_DBG_WAVEMSG_TYPE; + +typedef enum _HSA_DBG_WATCH_MODE +{ + HSA_DBG_WATCH_READ = 0, //Read operations only + HSA_DBG_WATCH_NONREAD = 1, //Write or Atomic operations only + HSA_DBG_WATCH_ATOMIC = 2, //Atomic Operations only + HSA_DBG_WATCH_ALL = 3, //Read, Write or Atomic operations + HSA_DBG_WATCH_NUM, + HSA_DBG_WATCH_SIZE = 0xFFFFFFFF +} HSA_DBG_WATCH_MODE; + + +//This structure is hardware specific and may change in the future +typedef struct _HsaDbgWaveMsgAMDGen2 +{ + HSAuint32 Value; + HSAuint32 Reserved2; + +} HsaDbgWaveMsgAMDGen2; + +typedef union _HsaDbgWaveMessageAMD +{ + HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2; + //for future HsaDbgWaveMsgAMDGen3; +} HsaDbgWaveMessageAMD; + +typedef struct _HsaDbgWaveMessage +{ + void* MemoryVA; // ptr to associated host-accessible data + HsaDbgWaveMessageAMD DbgWaveMsg; +} HsaDbgWaveMessage; + + +// +// HSA sync primitive, Event and HW Exception notification API definitions +// The API functions allow the runtime to define a so-called sync-primitive, a SW object +// combining a user-mode provided "syncvar" and a scheduler event that can be signaled +// through a defined GPU interrupt. A syncvar is a process virtual memory location of +// a certain size that can be accessed by CPU and GPU shader code within the process to set +// and query the content within that memory. The definition of the content is determined by +// the HSA runtime and potentially GPU shader code interfacing with the HSA runtime. +// The syncvar values may be commonly written through an PM4 WRITE_DATA packet in the +// user mode instruction stream. +// The OS scheduler event is typically associated and signaled by an interrupt issued by +// the GPU, but other HSA system interrupt conditions from other HW (e.g. IOMMUv2) may be +// surfaced by the KFD by this mechanism, too. +// + +// these are the new definitions for events +typedef enum _HSA_EVENTTYPE +{ + HSA_EVENTTYPE_SIGNAL = 0, //user-mode generated GPU signal + HSA_EVENTTYPE_NODECHANGE = 1, //HSA node change (attach/detach) + HSA_EVENTTYPE_DEVICESTATECHANGE = 2, //HSA device state change( start/stop ) + HSA_EVENTTYPE_HW_EXCEPTION = 3, //GPU shader exception event + HSA_EVENTTYPE_SYSTEM_EVENT = 4, //GPU SYSCALL with parameter info + HSA_EVENTTYPE_DEBUG_EVENT = 5, //GPU signal for debugging + HSA_EVENTTYPE_PROFILE_EVENT = 6, //GPU signal for profiling + HSA_EVENTTYPE_QUEUE_EVENT = 7, //GPU signal queue idle state (EOP pm4) + HSA_EVENTTYPE_MEMORY = 8, //GPU signal for signaling memory access faults and memory subsystem issues + //... + HSA_EVENTTYPE_MAXID, + HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF +} HSA_EVENTTYPE; + +typedef HSAuint32 HSA_EVENTID; + +// +// Subdefinitions for various event types: Syncvar +// + +typedef struct _HsaSyncVar +{ + union + { + void* UserData; //pointer to user mode data + HSAuint64 UserDataPtrValue; //64bit compatibility of value + } SyncVar; + HSAuint64 SyncVarSize; +} HsaSyncVar; + +// +// Subdefinitions for various event types: NodeChange +// + +typedef enum _HSA_EVENTTYPE_NODECHANGE_FLAGS +{ + HSA_EVENTTYPE_NODECHANGE_ADD = 0, + HSA_EVENTTYPE_NODECHANGE_REMOVE = 1, + HSA_EVENTTYPE_NODECHANGE_SIZE = 0xFFFFFFFF +} HSA_EVENTTYPE_NODECHANGE_FLAGS; + +typedef struct _HsaNodeChange +{ + HSA_EVENTTYPE_NODECHANGE_FLAGS Flags; // HSA node added/removed on the platform +} HsaNodeChange; + +// +// Sub-definitions for various event types: DeviceStateChange +// + +typedef enum _HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS +{ + HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0, //device started (and available) + HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1, //device stopped (i.e. unavailable) + HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE = 0xFFFFFFFF +} HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS; + +typedef struct _HsaDeviceStateChange +{ + HSAuint32 NodeId; // F-NUMA node that contains the device + HSA_DEVICE Device; // device type: GPU or CPU + HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; // event flags +} HsaDeviceStateChange; + +// +// Sub-definitions for various event types: Memory exception +// + +typedef enum _HSA_EVENTID_MEMORYFLAGS +{ + HSA_EVENTID_MEMORY_RECOVERABLE = 0, //access fault, recoverable after page adjustment + HSA_EVENTID_MEMORY_FATAL_PROCESS = 1, //memory access requires process context destruction, unrecoverable + HSA_EVENTID_MEMORY_FATAL_VM = 2, //memory access requires all GPU VA context destruction, unrecoverable +} HSA_EVENTID_MEMORYFLAGS; + +typedef struct _HsaAccessAttributeFailure +{ + unsigned int NotPresent : 1; // Page not present or supervisor privilege + unsigned int ReadOnly : 1; // Write access to a read-only page + unsigned int NoExecute : 1; // Execute access to a page marked NX + unsigned int GpuAccess : 1; // Host access only + unsigned int ECC : 1; // ECC failure (if supported by HW) + unsigned int Reserved : 27; // must be 0 +} HsaAccessAttributeFailure; + +// data associated with HSA_EVENTID_MEMORY +typedef struct _HsaMemoryAccessFault +{ + HSAuint32 NodeId; // H-NUMA node that contains the device where the memory access occurred + HSAuint64 VirtualAddress; // virtual address this occurred on + HsaAccessAttributeFailure Failure; // failure attribute + HSA_EVENTID_MEMORYFLAGS Flags; // event flags +} HsaMemoryAccessFault; + +typedef struct _HsaEventData +{ + HSA_EVENTTYPE EventType; //event type + + union + { + // return data associated with HSA_EVENTTYPE_SIGNAL and other events + HsaSyncVar SyncVar; + + // data associated with HSA_EVENTTYPE_NODE_CHANGE + HsaNodeChange NodeChangeState; + + // data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE + HsaDeviceStateChange DeviceState; + + // data associated with HSA_EVENTTYPE_MEMORY + HsaMemoryAccessFault MemoryAccessFault; + + } EventData; + + // the following data entries are internal to the KFD & thunk itself. + + HSAuint64 HWData1; // internal thunk store for Event data (OsEventHandle) + HSAuint64 HWData2; // internal thunk store for Event data (HWAddress) + HSAuint32 HWData3; // internal thunk store for Event data (HWData) +} HsaEventData; + + +typedef struct _HsaEventDescriptor +{ + HSA_EVENTTYPE EventType; // event type to allocate + HSAuint32 NodeId; // H-NUMA node containing GPU device that is event source + HsaSyncVar SyncVar; // pointer to user mode syncvar data, syncvar->UserDataPtrValue may be NULL +} HsaEventDescriptor; + + +typedef struct _HsaEvent +{ + HSA_EVENTID EventId; + HsaEventData EventData; +} HsaEvent; + +typedef enum _HsaEventTimeout +{ + HSA_EVENTTIMEOUT_IMMEDIATE = 0, + HSA_EVENTTIMEOUT_INFINITE = 0xFFFFFFFF +} HsaEventTimeOut; + +typedef struct _HsaClockCounters +{ + HSAuint64 GPUClockCounter; + HSAuint64 CPUClockCounter; + HSAuint64 SystemClockCounter; + HSAuint64 SystemClockFrequencyHz; +} HsaClockCounters; + +#ifndef DEFINE_GUID +typedef struct _HSA_UUID +{ + HSAuint32 Data1; + HSAuint16 Data2; + HSAuint16 Data3; + HSAuint8 Data4[8]; +} HSA_UUID; + +#define HSA_DEFINE_UUID(name, dw, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \ + static const HSA_UUID name = {dw, w1, w2, {b1, b2, b3, b4, b5, b6, b7, b8}} +#else +#define HSA_UUID GUID +#define HSA_DEFINE_UUID DEFINE_GUID +#endif + + +// GUID that identifies the GPU Shader Sequencer (SQ) block +// {B5C396B6-D310-47E4-86FC-5CC3043AF508} +HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_SQ, +0xb5c396b6, 0xd310, 0x47e4, 0x86, 0xfc, 0x5c, 0xc3, 0x4, 0x3a, 0xf5, 0x8); + +// GUID that identifies the GPU Memory Controller (MC) block +// {13900B57-4956-4D98-81D0-68521937F59C} +HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_MC, +0x13900b57, 0x4956, 0x4d98, 0x81, 0xd0, 0x68, 0x52, 0x19, 0x37, 0xf5, 0x9c); + +// GUID that identifies the IMOMMUv2 HW device +// {80969879-B0F6-4BE6-97F6-6A6300F5101D} +HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_IOMMUV2, +0x80969879, 0xb0f6, 0x4be6, 0x97, 0xf6, 0x6a, 0x63, 0x0, 0xf5, 0x10, 0x1d); + +// GUID that identifies the KFD +// {EA9B5AE1-6C3F-44B3-8954-DAF07565A90A} +HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_KERNEL_DRIVER, +0xea9b5ae1, 0x6c3f, 0x44b3, 0x89, 0x54, 0xda, 0xf0, 0x75, 0x65, 0xa9, 0xa); + +typedef enum _HSA_PROFILE_TYPE +{ + HSA_PROFILE_TYPE_PRIVILEGED_IMMEDIATE = 0, //immediate access counter (KFD access only) + HSA_PROFILE_TYPE_PRIVILEGED_STREAMING = 1, //streaming counter, HW continuously + //writes to memory on updates (KFD access only) + HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE = 2, //user-queue accessible counter + HSA_PROFILE_TYPE_NONPRIV_STREAMING = 3, //user-queue accessible counter + //... + HSA_PROFILE_TYPE_NUM, + + HSA_PROFILE_TYPE_SIZE = 0xFFFFFFFF // In order to align to 32-bit value +} HSA_PROFILE_TYPE; + + +typedef struct _HsaCounterFlags +{ + union + { + struct + { + unsigned int Global : 1; // counter is global + // (not tied to VMID/WAVE/CU, ...) + unsigned int Resettable : 1; // counter can be reset by SW + // (always to 0?) + unsigned int ReadOnly : 1; // counter is read-only + // (but may be reset, if indicated) + unsigned int Stream : 1; // counter has streaming capability + // (after trigger, updates buffer) + unsigned int Reserved : 28; + } ui32; + HSAuint32 Value; + }; +} HsaCounterFlags; + + +typedef struct _HsaCounter +{ + HSA_PROFILE_TYPE Type; // specifies the counter type + HSAuint64 CounterId; // indicates counter register offset + HSAuint32 CounterSizeInBits; // indicates relevant counter bits + HSAuint64 CounterMask; // bitmask for counter value (if applicable) + HsaCounterFlags Flags; // Property flags (see above) + HSAuint32 BlockIndex; // identifies block the counter belongs to, + // value may be 0 to NumBlocks +} HsaCounter; + + +typedef struct _HsaCounterBlockProperties +{ + HSA_UUID BlockId; // specifies the block location + HSAuint32 NumCounters; // How many counters are available? + // (sizes Counters[] array below) + HSAuint32 NumConcurrent; // How many counter slots are available + // in block? + HsaCounter Counters[1]; // Start of counter array + // (NumCounters elements total) +} HsaCounterBlockProperties; + + +typedef struct _HsaCounterProperties +{ + HSAuint32 NumBlocks; // How many profilable block are available? + // (sizes Blocks[] array below) + HSAuint32 NumConcurrent; // How many blocks slots can be queried + // concurrently by HW? + HsaCounterBlockProperties Blocks[1]; // Start of block array + // (NumBlocks elements total) +} HsaCounterProperties; + +typedef HSAuint64 HSATraceId; + +typedef struct _HsaPmcTraceRoot +{ + HSAuint64 TraceBufferMinSizeBytes;// (page aligned) + HSAuint32 NumberOfPasses; + HSATraceId TraceId; +} HsaPmcTraceRoot; + +#pragma pack(pop, hsakmttypes_h) + + +#ifdef __cplusplus +} //extern "C" +#endif + +#endif //_HSAKMTTYPES_H_ diff --git a/hsakmt/include/linux/kfd_ioctl.h b/hsakmt/include/linux/kfd_ioctl.h new file mode 100644 index 0000000..d683342 --- /dev/null +++ b/hsakmt/include/linux/kfd_ioctl.h @@ -0,0 +1,292 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef KFD_IOCTL_H_INCLUDED +#define KFD_IOCTL_H_INCLUDED + +#include <linux/types.h> +#include <linux/ioctl.h> + +#define KFD_IOCTL_MAJOR_VERSION 1 +#define KFD_IOCTL_MINOR_VERSION 1 + +struct kfd_ioctl_get_version_args { + uint32_t major_version; /* from KFD */ + uint32_t minor_version; /* from KFD */ +}; + +/* For kfd_ioctl_create_queue_args.queue_type. */ +#define KFD_IOC_QUEUE_TYPE_COMPUTE 0 +#define KFD_IOC_QUEUE_TYPE_SDMA 1 +#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 2 + +#define KFD_MAX_QUEUE_PERCENTAGE 100 +#define KFD_MAX_QUEUE_PRIORITY 15 + +struct kfd_ioctl_create_queue_args { + uint64_t ring_base_address; /* to KFD */ + uint64_t write_pointer_address; /* from KFD */ + uint64_t read_pointer_address; /* from KFD */ + uint64_t doorbell_offset; /* from KFD */ + + uint32_t ring_size; /* to KFD */ + uint32_t gpu_id; /* to KFD */ + uint32_t queue_type; /* to KFD */ + uint32_t queue_percentage; /* to KFD */ + uint32_t queue_priority; /* to KFD */ + uint32_t queue_id; /* from KFD */ + + uint64_t eop_buffer_address; /* to KFD */ + uint64_t eop_buffer_size; /* to KFD */ + uint64_t ctx_save_restore_address; /* to KFD */ + uint64_t ctx_save_restore_size; /* to KFD */ +}; + +struct kfd_ioctl_destroy_queue_args { + uint32_t queue_id; /* to KFD */ + uint32_t pad; +}; + +struct kfd_ioctl_update_queue_args { + uint64_t ring_base_address; /* to KFD */ + + uint32_t queue_id; /* to KFD */ + uint32_t ring_size; /* to KFD */ + uint32_t queue_percentage; /* to KFD */ + uint32_t queue_priority; /* to KFD */ +}; + +/* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ +#define KFD_IOC_CACHE_POLICY_COHERENT 0 +#define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 + +struct kfd_ioctl_set_memory_policy_args { + uint64_t alternate_aperture_base; /* to KFD */ + uint64_t alternate_aperture_size; /* to KFD */ + + uint32_t gpu_id; /* to KFD */ + uint32_t default_policy; /* to KFD */ + uint32_t alternate_policy; /* to KFD */ + uint32_t pad; +}; + +/* + * All counters are monotonic. They are used for profiling of compute jobs. + * The profiling is done by userspace. + * + * In case of GPU reset, the counter should not be affected. + */ + +struct kfd_ioctl_get_clock_counters_args { + uint64_t gpu_clock_counter; /* from KFD */ + uint64_t cpu_clock_counter; /* from KFD */ + uint64_t system_clock_counter; /* from KFD */ + uint64_t system_clock_freq; /* from KFD */ + + uint32_t gpu_id; /* to KFD */ + uint32_t pad; +}; + +#define NUM_OF_SUPPORTED_GPUS 7 + +struct kfd_process_device_apertures { + uint64_t lds_base; /* from KFD */ + uint64_t lds_limit; /* from KFD */ + uint64_t scratch_base; /* from KFD */ + uint64_t scratch_limit; /* from KFD */ + uint64_t gpuvm_base; /* from KFD */ + uint64_t gpuvm_limit; /* from KFD */ + uint32_t gpu_id; /* from KFD */ + uint32_t pad; +}; + +struct kfd_ioctl_get_process_apertures_args { + struct kfd_process_device_apertures + process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */ + + /* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS] */ + uint32_t num_of_nodes; + uint32_t pad; +}; + +#define MAX_ALLOWED_NUM_POINTS 100 +#define MAX_ALLOWED_AW_BUFF_SIZE 4096 +#define MAX_ALLOWED_WAC_BUFF_SIZE 128 + +struct kfd_ioctl_dbg_register_args { + uint32_t gpu_id; /* to KFD */ + uint32_t pad; +}; + +struct kfd_ioctl_dbg_unregister_args { + uint32_t gpu_id; /* to KFD */ + uint32_t pad; +}; + +struct kfd_ioctl_dbg_address_watch_args { + uint64_t content_ptr; /* a pointer to the actual content */ + uint32_t gpu_id; /* to KFD */ + uint32_t buf_size_in_bytes; /*including gpu_id and buf_size */ +}; + +struct kfd_ioctl_dbg_wave_control_args { + uint64_t content_ptr; /* a pointer to the actual content */ + uint32_t gpu_id; /* to KFD */ + uint32_t buf_size_in_bytes; /*including gpu_id and buf_size */ +}; + +/* Matching HSA_EVENTTYPE */ +#define KFD_IOC_EVENT_SIGNAL 0 +#define KFD_IOC_EVENT_NODECHANGE 1 +#define KFD_IOC_EVENT_DEVICESTATECHANGE 2 +#define KFD_IOC_EVENT_HW_EXCEPTION 3 +#define KFD_IOC_EVENT_SYSTEM_EVENT 4 +#define KFD_IOC_EVENT_DEBUG_EVENT 5 +#define KFD_IOC_EVENT_PROFILE_EVENT 6 +#define KFD_IOC_EVENT_QUEUE_EVENT 7 +#define KFD_IOC_EVENT_MEMORY 8 + +#define KFD_IOC_WAIT_RESULT_COMPLETE 0 +#define KFD_IOC_WAIT_RESULT_TIMEOUT 1 +#define KFD_IOC_WAIT_RESULT_FAIL 2 + +#define KFD_SIGNAL_EVENT_LIMIT 256 + +struct kfd_ioctl_create_event_args { + uint64_t event_page_offset; /* from KFD */ + uint32_t event_trigger_data; /* from KFD - signal events only */ + uint32_t event_type; /* to KFD */ + uint32_t auto_reset; /* to KFD */ + uint32_t node_id; /* to KFD - only valid for certain + event types */ + uint32_t event_id; /* from KFD */ + uint32_t event_slot_index; /* from KFD */ +}; + +struct kfd_ioctl_destroy_event_args { + uint32_t event_id; /* to KFD */ + uint32_t pad; +}; + +struct kfd_ioctl_set_event_args { + uint32_t event_id; /* to KFD */ + uint32_t pad; +}; + +struct kfd_ioctl_reset_event_args { + uint32_t event_id; /* to KFD */ + uint32_t pad; +}; + +struct kfd_memory_exception_failure { + uint32_t NotPresent; /* Page not present or supervisor privilege */ + uint32_t ReadOnly; /* Write access to a read-only page */ + uint32_t NoExecute; /* Execute access to a page marked NX */ + uint32_t pad; +}; + +/* memory exception data*/ +struct kfd_hsa_memory_exception_data { + struct kfd_memory_exception_failure failure; + uint64_t va; + uint32_t gpu_id; + uint32_t pad; +}; + +/* Event data*/ +struct kfd_event_data { + union { + struct kfd_hsa_memory_exception_data memory_exception_data; + }; /* From KFD */ + uint64_t kfd_event_data_ext; /* pointer to an extension structure + for future exception types */ + uint32_t event_id; /* to KFD */ + uint32_t pad; +}; + +struct kfd_ioctl_wait_events_args { + uint64_t events_ptr; /* pointed to struct + kfd_event_data array, to KFD */ + uint32_t num_events; /* to KFD */ + uint32_t wait_for_all; /* to KFD */ + uint32_t timeout; /* to KFD */ + uint32_t wait_result; /* from KFD */ +}; + +#define AMDKFD_IOCTL_BASE 'K' +#define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) +#define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) +#define AMDKFD_IOW(nr, type) _IOW(AMDKFD_IOCTL_BASE, nr, type) +#define AMDKFD_IOWR(nr, type) _IOWR(AMDKFD_IOCTL_BASE, nr, type) + +#define AMDKFD_IOC_GET_VERSION \ + AMDKFD_IOR(0x01, struct kfd_ioctl_get_version_args) + +#define AMDKFD_IOC_CREATE_QUEUE \ + AMDKFD_IOWR(0x02, struct kfd_ioctl_create_queue_args) + +#define AMDKFD_IOC_DESTROY_QUEUE \ + AMDKFD_IOWR(0x03, struct kfd_ioctl_destroy_queue_args) + +#define AMDKFD_IOC_SET_MEMORY_POLICY \ + AMDKFD_IOW(0x04, struct kfd_ioctl_set_memory_policy_args) + +#define AMDKFD_IOC_GET_CLOCK_COUNTERS \ + AMDKFD_IOWR(0x05, struct kfd_ioctl_get_clock_counters_args) + +#define AMDKFD_IOC_GET_PROCESS_APERTURES \ + AMDKFD_IOR(0x06, struct kfd_ioctl_get_process_apertures_args) + +#define AMDKFD_IOC_UPDATE_QUEUE \ + AMDKFD_IOW(0x07, struct kfd_ioctl_update_queue_args) + +#define AMDKFD_IOC_CREATE_EVENT \ + AMDKFD_IOWR(0x08, struct kfd_ioctl_create_event_args) + +#define AMDKFD_IOC_DESTROY_EVENT \ + AMDKFD_IOW(0x09, struct kfd_ioctl_destroy_event_args) + +#define AMDKFD_IOC_SET_EVENT \ + AMDKFD_IOW(0x0A, struct kfd_ioctl_set_event_args) + +#define AMDKFD_IOC_RESET_EVENT \ + AMDKFD_IOW(0x0B, struct kfd_ioctl_reset_event_args) + +#define AMDKFD_IOC_WAIT_EVENTS \ + AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args) + +#define AMDKFD_IOC_DBG_REGISTER \ + AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args) + +#define AMDKFD_IOC_DBG_UNREGISTER \ + AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args) + +#define AMDKFD_IOC_DBG_ADDRESS_WATCH \ + AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args) + +#define AMDKFD_IOC_DBG_WAVE_CONTROL \ + AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args) + +#define AMDKFD_COMMAND_START 0x01 +#define AMDKFD_COMMAND_END 0x11 + +#endif |