Cycles: Handling SDK/ROCm 6+ lack of backward compatibility with pre ROCm 6

This commit introduces proper handling of ROCm 5 and ROCm 6 runtimes on
Linux, based on the version of the ROCm compiler used at build time.
Previously, HIPEW (the HIP equivalent of Cuda Wrangler) defaulted to
loading the ROCm 5 runtime. If ROCm 5 was unavailable, it would attempt
to load ROCm 6. However, ROCm 6 introduces changes in certain
structures and functions that are not backward compatible, leading to
potential issues when kernels compiled with the ROCm 6 compiler are
executed on the ROCm 5 runtime.

### Summary of Changes:

**Separation of Structures and Functions:**
Structures and functions are now separated into hipew5 and hipew6 to
accommodate the differences between ROCm versions.

**Build-Time Version Detection:**
The ROCm version is determined during build time, and the corresponding
hipew5 or hipew6 is included accordingly.

**Runtime Default to ROCm 6:**
By default, HIPEW now loads the ROCm 6 runtime and
includes hipew6 (Linux only).

**JIT Compilation Behavior:**
Since ROCm 6 is the default version, JIT compilation is supported only
when the ROCm 6 compiler is detected at runtime.

**HIP-RT Update:**
HIP-RT has been updated to load the ROCm 6 runtime by default.

These changes ensure compatibility and stability when switching
between ROCm versions, avoiding issues caused by runtime
and compiler mismatches.

Co-authored-by: Alaska <alaskayou01@gmail.com>
Co-authored-by: Sergey Sharybin <sergey@blender.org>
Pull Request: https://projects.blender.org/blender/blender/pulls/130153
This commit is contained in:
salipourto 2024-12-17 16:19:36 +01:00 committed by Sergey Sharybin
parent cd13f52e75
commit 4e5a9c5dfb
10 changed files with 901 additions and 410 deletions

View file

@ -14,9 +14,20 @@ set(HIPRT_EXTRA_ARGS
-DBITCODE=ON -DBITCODE=ON
-DGENERATE_BAKE_KERNEL=OFF -DGENERATE_BAKE_KERNEL=OFF
-DNO_UNITTEST=ON -DNO_UNITTEST=ON
-DHIPRT_PREFER_HIP_5=ON
) )
if(WIN32)
# Windows is currently defaulting to HIP 5 for the buildbot and the
# dependency build environment.
list(APPEND HIPRT_EXTRA_ARGS -DHIPRT_PREFER_HIP_5=ON)
else()
# The Linux uses HIP 6 by default in those environments, but it had
# -DHIPRT_PREFER_HIP_5=ON passed to the dependency builder in its
# initial implementation. Force it to off so that incremental build
# in the existing build environment does the right thing.
list(APPEND HIPRT_EXTRA_ARGS -DHIPRT_PREFER_HIP_5=OFF)
endif()
set(HIPRT_SOURCE_DIR ${BUILD_DIR}/hiprt/src/external_hiprt) set(HIPRT_SOURCE_DIR ${BUILD_DIR}/hiprt/src/external_hiprt)
set(HIPRT_BUILD_DIR ${BUILD_DIR}/hiprt/src/external_hiprt-build) set(HIPRT_BUILD_DIR ${BUILD_DIR}/hiprt/src/external_hiprt-build)

View file

@ -14,6 +14,8 @@ set(INC_SYS
set(SRC set(SRC
src/hipew.c src/hipew.c
include/hipew.h include/hipew.h
include/hipew6.h
include/hipew5.h
) )
set(LIB set(LIB
@ -32,4 +34,10 @@ if(HIPRT_INCLUDE_DIR)
) )
endif() endif()
blender_add_lib(extern_hipew "${SRC}" "${INC}" "${INC_SYS}" "${LIB}") blender_add_lib(extern_hipew "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
find_package(HIP 5.5.0)
if(HIP_VERSION_MAJOR EQUAL 5)
target_compile_definitions(extern_hipew PUBLIC WITH_HIP_SDK_5)
endif()

View file

@ -23,6 +23,7 @@ extern "C" {
#include <stdlib.h> #include <stdlib.h>
#define HIP_IPC_HANDLE_SIZE 64 #define HIP_IPC_HANDLE_SIZE 64
#define hipHostMallocDefault 0x00 #define hipHostMallocDefault 0x00
#define hipHostMallocPortable 0x01 #define hipHostMallocPortable 0x01
@ -116,13 +117,6 @@ typedef struct HIPuuid_st {
char bytes[16]; char bytes[16];
} HIPuuid; } HIPuuid;
typedef enum hipMemcpyKind {
hipMemcpyHostToHost = 0,
hipMemcpyHostToDevice = 1,
hipMemcpyDeviceToHost = 2,
hipMemcpyDeviceToDevice = 3,
hipMemcpyDefault = 4
} hipMemcpyKind;
typedef enum hipChannelFormatKind { typedef enum hipChannelFormatKind {
hipChannelFormatKindSigned = 0, hipChannelFormatKindSigned = 0,
@ -167,7 +161,7 @@ typedef enum hipTextureAddressMode {
*/ */
typedef struct textureReference { typedef struct textureReference {
int normalized; int normalized;
//enum hipTextureReadMode readMode;// used only for driver API's //enum hipTextureReadMode readMode;// used only for driver API's //why is this commentend out?
enum hipTextureFilterMode filterMode; enum hipTextureFilterMode filterMode;
enum hipTextureAddressMode addressMode[3]; // Texture address mode for up to 3 dimensions enum hipTextureAddressMode addressMode[3]; // Texture address mode for up to 3 dimensions
struct hipChannelFormatDesc channelDesc; struct hipChannelFormatDesc channelDesc;
@ -185,39 +179,6 @@ typedef struct textureReference {
typedef textureReference* hipTexRef; typedef textureReference* hipTexRef;
/**
* ROCm 6 and ROCm 5 memory types are different.
* For now, we include both in the enum and then use the get_hip_memory_type
* Function to convert. When removing ROCm 5 compatibility this can be simplified.
*/
typedef enum hipMemoryType {
hipMemoryTypeHost_v5 = 0x00,
hipMemoryTypeDevice_v5 = 0x01,
hipMemoryTypeArray_v5 = 0x02,
hipMemoryTypeUnified_v5 = 0x03,
hipMemoryTypeUnregistered = 0,
hipMemoryTypeHost = 1,
hipMemoryTypeDevice = 2,
hipMemoryTypeManaged = 3,
hipMemoryTypeArray = 10,
hipMemoryTypeUnified = 11,
} hipMemoryType;
hipMemoryType get_hip_memory_type(hipMemoryType mem_type, int runtime_version);
/**
* Pointer attributes
*/
typedef struct hipPointerAttribute_t {
enum hipMemoryType memoryType;
int device;
void* devicePointer;
void* hostPointer;
int isManaged;
unsigned allocationFlags; /* flags specified when memory was allocated*/
/* peers? */
} hipPointerAttribute_t;
typedef struct ihipIpcEventHandle_t { typedef struct ihipIpcEventHandle_t {
char reserved[HIP_IPC_HANDLE_SIZE]; char reserved[HIP_IPC_HANDLE_SIZE];
} ihipIpcEventHandle_t; } ihipIpcEventHandle_t;
@ -314,128 +275,6 @@ typedef enum HIPoccupancy_flags_enum {
HIP_OCCUPANCY_DISABLE_CACHING_OVERRIDE = 0x1, HIP_OCCUPANCY_DISABLE_CACHING_OVERRIDE = 0x1,
} HIPoccupancy_flags; } HIPoccupancy_flags;
typedef enum hipDeviceAttribute_t {
hipDeviceAttributeCudaCompatibleBegin = 0,
hipDeviceAttributeEccEnabled = hipDeviceAttributeCudaCompatibleBegin, ///< Whether ECC support is enabled.
hipDeviceAttributeAccessPolicyMaxWindowSize, ///< Cuda only. The maximum size of the window policy in bytes.
hipDeviceAttributeAsyncEngineCount, ///< Cuda only. Asynchronous engines number.
hipDeviceAttributeCanMapHostMemory, ///< Whether host memory can be mapped into device address space
hipDeviceAttributeCanUseHostPointerForRegisteredMem,///< Cuda only. Device can access host registered memory
///< at the same virtual address as the CPU
hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz.
hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in.
hipDeviceAttributeComputePreemptionSupported, ///< Cuda only. Device supports Compute Preemption.
hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels concurrently.
hipDeviceAttributeConcurrentManagedAccess, ///< Device can coherently access managed memory concurrently with the CPU
hipDeviceAttributeCooperativeLaunch, ///< Support cooperative launch
hipDeviceAttributeCooperativeMultiDeviceLaunch, ///< Support cooperative launch on multiple devices
hipDeviceAttributeDeviceOverlap, ///< Cuda only. Device can concurrently copy memory and execute a kernel.
///< Deprecated. Use instead asyncEngineCount.
hipDeviceAttributeDirectManagedMemAccessFromHost, ///< Host can directly access managed memory on
///< the device without migration
hipDeviceAttributeGlobalL1CacheSupported, ///< Cuda only. Device supports caching globals in L1
hipDeviceAttributeHostNativeAtomicSupported, ///< Cuda only. Link between the device and the host supports native atomic operations
hipDeviceAttributeIntegrated, ///< Device is integrated GPU
hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices.
hipDeviceAttributeKernelExecTimeout, ///< Run time limit for kernels executed on the device
hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
hipDeviceAttributeLocalL1CacheSupported, ///< caching locals in L1 is supported
hipDeviceAttributeLuid, ///< Cuda only. 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms
hipDeviceAttributeLuidDeviceNodeMask, ///< Cuda only. Luid device node mask. Undefined on TCC and non-Windows platforms
hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number.
hipDeviceAttributeManagedMemory, ///< Device supports allocating managed memory on this system
hipDeviceAttributeMaxBlocksPerMultiProcessor, ///< Cuda only. Max block size per multiprocessor
hipDeviceAttributeMaxBlockDimX, ///< Max block size in width.
hipDeviceAttributeMaxBlockDimY, ///< Max block size in height.
hipDeviceAttributeMaxBlockDimZ, ///< Max block size in depth.
hipDeviceAttributeMaxGridDimX, ///< Max grid size in width.
hipDeviceAttributeMaxGridDimY, ///< Max grid size in height.
hipDeviceAttributeMaxGridDimZ, ///< Max grid size in depth.
hipDeviceAttributeMaxSurface1D, ///< Maximum size of 1D surface.
hipDeviceAttributeMaxSurface1DLayered, ///< Cuda only. Maximum dimensions of 1D layered surface.
hipDeviceAttributeMaxSurface2D, ///< Maximum dimension (width, height) of 2D surface.
hipDeviceAttributeMaxSurface2DLayered, ///< Cuda only. Maximum dimensions of 2D layered surface.
hipDeviceAttributeMaxSurface3D, ///< Maximum dimension (width, height, depth) of 3D surface.
hipDeviceAttributeMaxSurfaceCubemap, ///< Cuda only. Maximum dimensions of Cubemap surface.
hipDeviceAttributeMaxSurfaceCubemapLayered, ///< Cuda only. Maximum dimension of Cubemap layered surface.
hipDeviceAttributeMaxTexture1DWidth, ///< Maximum size of 1D texture.
hipDeviceAttributeMaxTexture1DLayered, ///< Cuda only. Maximum dimensions of 1D layered texture.
hipDeviceAttributeMaxTexture1DLinear, ///< Maximum number of elements allocatable in a 1D linear texture.
///< Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda.
hipDeviceAttributeMaxTexture1DMipmap, ///< Cuda only. Maximum size of 1D mipmapped texture.
hipDeviceAttributeMaxTexture2DWidth, ///< Maximum dimension width of 2D texture.
hipDeviceAttributeMaxTexture2DHeight, ///< Maximum dimension hight of 2D texture.
hipDeviceAttributeMaxTexture2DGather, ///< Cuda only. Maximum dimensions of 2D texture if gather operations performed.
hipDeviceAttributeMaxTexture2DLayered, ///< Cuda only. Maximum dimensions of 2D layered texture.
hipDeviceAttributeMaxTexture2DLinear, ///< Cuda only. Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory.
hipDeviceAttributeMaxTexture2DMipmap, ///< Cuda only. Maximum dimensions of 2D mipmapped texture.
hipDeviceAttributeMaxTexture3DWidth, ///< Maximum dimension width of 3D texture.
hipDeviceAttributeMaxTexture3DHeight, ///< Maximum dimension height of 3D texture.
hipDeviceAttributeMaxTexture3DDepth, ///< Maximum dimension depth of 3D texture.
hipDeviceAttributeMaxTexture3DAlt, ///< Cuda only. Maximum dimensions of alternate 3D texture.
hipDeviceAttributeMaxTextureCubemap, ///< Cuda only. Maximum dimensions of Cubemap texture
hipDeviceAttributeMaxTextureCubemapLayered, ///< Cuda only. Maximum dimensions of Cubemap layered texture.
hipDeviceAttributeMaxThreadsDim, ///< Maximum dimension of a block
hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block.
hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor.
hipDeviceAttributeMaxPitch, ///< Maximum pitch in bytes allowed by memory copies
hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits.
hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz.
hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number.
hipDeviceAttributeMultiGpuBoardGroupID, ///< Cuda only. Unique ID of device group on the same multi-GPU board
hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device.
hipDeviceAttributeName, ///< Device name.
hipDeviceAttributePageableMemoryAccess, ///< Device supports coherently accessing pageable memory
///< without calling hipHostRegister on it
hipDeviceAttributePageableMemoryAccessUsesHostPageTables, ///< Device accesses pageable memory via the host's page tables
hipDeviceAttributePciBusId, ///< PCI Bus ID.
hipDeviceAttributePciDeviceId, ///< PCI Device ID.
hipDeviceAttributePciDomainID, ///< PCI Domain ID.
hipDeviceAttributePersistingL2CacheMaxSize, ///< Cuda11 only. Maximum l2 persisting lines capacity in bytes
hipDeviceAttributeMaxRegistersPerBlock, ///< 32-bit registers available to a thread block. This number is shared
///< by all thread blocks simultaneously resident on a multiprocessor.
hipDeviceAttributeMaxRegistersPerMultiprocessor, ///< 32-bit registers available per block.
hipDeviceAttributeReservedSharedMemPerBlock, ///< Cuda11 only. Shared memory reserved by CUDA driver per block.
hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in bytes.
hipDeviceAttributeSharedMemPerBlockOptin, ///< Cuda only. Maximum shared memory per block usable by special opt in.
hipDeviceAttributeSharedMemPerMultiprocessor, ///< Cuda only. Shared memory available per multiprocessor.
hipDeviceAttributeSingleToDoublePrecisionPerfRatio, ///< Cuda only. Performance ratio of single precision to double precision.
hipDeviceAttributeStreamPrioritiesSupported, ///< Cuda only. Whether to support stream priorities.
hipDeviceAttributeSurfaceAlignment, ///< Cuda only. Alignment requirement for surfaces
hipDeviceAttributeTccDriver, ///< Cuda only. Whether device is a Tesla device using TCC driver
hipDeviceAttributeTextureAlignment, ///< Alignment requirement for textures
hipDeviceAttributeTexturePitchAlignment, ///< Pitch alignment requirement for 2D texture references bound to pitched memory;
hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes.
hipDeviceAttributeTotalGlobalMem, ///< Global memory available on devicice.
hipDeviceAttributeUnifiedAddressing, ///< Cuda only. An unified address space shared with the host.
hipDeviceAttributeUuid, ///< Cuda only. Unique ID in 16 byte.
hipDeviceAttributeWarpSize, ///< Warp size in threads.
hipDeviceAttributeCudaCompatibleEnd = 9999,
hipDeviceAttributeAmdSpecificBegin = 10000,
hipDeviceAttributeClockInstructionRate = hipDeviceAttributeAmdSpecificBegin, ///< Frequency in khz of the timer used by the device-side "clock*"
hipDeviceAttributeArch, ///< Device architecture
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory PerMultiprocessor.
hipDeviceAttributeGcnArch, ///< Device gcn architecture
hipDeviceAttributeGcnArchName, ///< Device gcnArch name in 256 bytes
hipDeviceAttributeHdpMemFlushCntl, ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register
hipDeviceAttributeHdpRegFlushCntl, ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register
hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, ///< Supports cooperative launch on multiple
///< devices with unmatched functions
hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, ///< Supports cooperative launch on multiple
///< devices with unmatched grid dimensions
hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, ///< Supports cooperative launch on multiple
///< devices with unmatched block dimensions
hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, ///< Supports cooperative launch on multiple
///< devices with unmatched shared memories
hipDeviceAttributeIsLargeBar, ///< Whether it is LargeBar
hipDeviceAttributeAsicRevision, ///< Revision of the GPU in this device
hipDeviceAttributeCanUseStreamWaitValue, ///< '1' if Device supports hipStreamWaitValue32() and
///< hipStreamWaitValue64() , '0' otherwise.
hipDeviceAttributeAmdSpecificEnd = 19999,
hipDeviceAttributeVendorSpecificBegin = 20000,
hipDeviceAttribute
// Extended attributes for vendors
} hipDeviceAttribute_t;
typedef struct HIPdevprop_st { typedef struct HIPdevprop_st {
int maxThreadsPerBlock; int maxThreadsPerBlock;
@ -481,84 +320,6 @@ typedef struct {
unsigned hasDynamicParallelism : 1; ///< Dynamic parallelism. unsigned hasDynamicParallelism : 1; ///< Dynamic parallelism.
} hipDeviceArch_t; } hipDeviceArch_t;
typedef struct hipDeviceProp_t {
char name[256]; ///< Device name.
size_t totalGlobalMem; ///< Size of global memory region (in bytes).
size_t sharedMemPerBlock; ///< Size of shared memory region (in bytes).
int regsPerBlock; ///< Registers per block.
int warpSize; ///< Warp size.
int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size.
int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block.
int maxGridSize[3]; ///< Max grid dimensions (XYZ).
int clockRate; ///< Max clock frequency of the multiProcessors in khz.
int memoryClockRate; ///< Max global memory clock frequency in khz.
int memoryBusWidth; ///< Global memory bus width in bits.
size_t totalConstMem; ///< Size of shared memory region (in bytes).
int major; ///< Major compute capability. On HCC, this is an approximation and features may
///< differ from CUDA CC. See the arch feature flags for portable ways to query
///< feature caps.
int minor; ///< Minor compute capability. On HCC, this is an approximation and features may
///< differ from CUDA CC. See the arch feature flags for portable ways to query
///< feature caps.
int multiProcessorCount; ///< Number of multi-processors (compute units).
int l2CacheSize; ///< L2 cache size.
int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor.
int computeMode; ///< Compute mode.
int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*"
///< instructions. New for HIP.
hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP.
int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently.
int pciDomainID; ///< PCI Domain ID
int pciBusID; ///< PCI Bus ID.
int pciDeviceID; ///< PCI Device ID.
size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per Multiprocessor.
int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not.
int canMapHostMemory; ///< Check whether HIP can map host memory
int gcnArch; ///< DEPRECATED: use gcnArchName instead
char gcnArchName[256]; ///< AMD GCN Arch Name.
int integrated; ///< APU vs dGPU
int cooperativeLaunch; ///< HIP device supports cooperative launch
int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple devices
int maxTexture1DLinear; ///< Maximum size for 1D textures bound to linear memory
int maxTexture1D; ///< Maximum number of elements in 1D images
int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements
int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image elements
unsigned int* hdpMemFlushCntl; ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
size_t memPitch; ///<Maximum pitch in bytes allowed by memory copies
size_t textureAlignment; ///<Alignment requirement for textures
size_t texturePitchAlignment; ///<Pitch alignment requirement for texture references bound to pitched memory
int kernelExecTimeoutEnabled; ///<Run time limit for kernels executed on the device
int ECCEnabled; ///<Device has ECC support enabled
int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0
int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on multiple
///devices with unmatched functions
int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on multiple
///devices with unmatched grid dimensions
int cooperativeMultiDeviceUnmatchedBlockDim; ///< HIP device supports cooperative launch on multiple
///devices with unmatched block dimensions
int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on multiple
///devices with unmatched shared memories
int isLargeBar; ///< 1: if it is a large PCI bar device, else 0
int asicRevision; ///< Revision of the GPU in this device
int managedMemory; ///< Device supports allocating managed memory on this system
int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device without migration
int concurrentManagedAccess; ///< Device can coherently access managed memory concurrently with the CPU
int pageableMemoryAccess; ///< Device supports coherently accessing pageable memory
///< without calling hipHostRegister on it
int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's page tables
} hipDeviceProp_t;
typedef enum HIPpointer_attribute_enum {
HIP_POINTER_ATTRIBUTE_CONTEXT = 1,
HIP_POINTER_ATTRIBUTE_MEMORY_TYPE = 2,
HIP_POINTER_ATTRIBUTE_DEVICE_POINTER = 3,
HIP_POINTER_ATTRIBUTE_HOST_POINTER = 4,
HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS = 6,
HIP_POINTER_ATTRIBUTE_BUFFER_ID = 7,
HIP_POINTER_ATTRIBUTE_IS_MANAGED = 8,
HIP_POINTER_ATTRIBUTE_DEVICE_ORDINAL = 9,
} HIPpointer_attribute;
typedef enum hipFunction_attribute { typedef enum hipFunction_attribute {
HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0, HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
@ -593,14 +354,6 @@ typedef enum HIPshared_carveout_enum {
HIP_SHAREDMEM_CARVEOUT_MAX_L1 = 0, HIP_SHAREDMEM_CARVEOUT_MAX_L1 = 0,
} HIPshared_carveout; } HIPshared_carveout;
typedef enum hipComputeMode {
hipComputeModeDefault = 0,
hipComputeModeProhibited = 2,
hipComputeModeExclusiveProcess = 3,
} hipComputeMode;
typedef enum HIPmem_advise_enum { typedef enum HIPmem_advise_enum {
HIP_MEM_ADVISE_SET_READ_MOSTLY = 1, HIP_MEM_ADVISE_SET_READ_MOSTLY = 1,
HIP_MEM_ADVISE_UNSET_READ_MOSTLY = 2, HIP_MEM_ADVISE_UNSET_READ_MOSTLY = 2,
@ -802,25 +555,6 @@ typedef enum HIPdevice_P2PAttribute_enum {
typedef struct hipGraphicsResource_st* hipGraphicsResource; typedef struct hipGraphicsResource_st* hipGraphicsResource;
typedef struct hip_Memcpy2D {
size_t srcXInBytes;
size_t srcY;
hipMemoryType srcMemoryType;
const void* srcHost;
hipDeviceptr_t srcDevice;
hArray * srcArray;
size_t srcPitch;
size_t dstXInBytes;
size_t dstY;
hipMemoryType dstMemoryType;
void* dstHost;
hipDeviceptr_t dstDevice;
hArray * dstArray;
size_t dstPitch;
size_t WidthInBytes;
size_t Height;
} hip_Memcpy2D;
typedef enum hipDeviceP2PAttr { typedef enum hipDeviceP2PAttr {
hipDevP2PAttrPerformanceRank = 0, hipDevP2PAttrPerformanceRank = 0,
hipDevP2PAttrAccessSupported, hipDevP2PAttrAccessSupported,
@ -828,60 +562,6 @@ typedef enum hipDeviceP2PAttr {
hipDevP2PAttrHipArrayAccessSupported hipDevP2PAttrHipArrayAccessSupported
} hipDeviceP2PAttr; } hipDeviceP2PAttr;
typedef struct HIP_MEMCPY3D {
unsigned int srcXInBytes;
unsigned int srcY;
unsigned int srcZ;
unsigned int srcLOD;
hipMemoryType srcMemoryType;
const void* srcHost;
hipDeviceptr_t srcDevice;
hArray srcArray;
unsigned int srcPitch;
unsigned int srcHeight;
unsigned int dstXInBytes;
unsigned int dstY;
unsigned int dstZ;
unsigned int dstLOD;
hipMemoryType dstMemoryType;
void* dstHost;
hipDeviceptr_t dstDevice;
hArray dstArray;
unsigned int dstPitch;
unsigned int dstHeight;
unsigned int WidthInBytes;
unsigned int Height;
unsigned int Depth;
} HIP_MEMCPY3D;
typedef struct HIP_MEMCPY3D_PEER_st {
size_t srcXInBytes;
size_t srcY;
size_t srcZ;
size_t srcLOD;
hipMemoryType srcMemoryType;
const void* srcHost;
hipDeviceptr_t srcDevice;
hArray * srcArray;
hipCtx_t srcContext;
size_t srcPitch;
size_t srcHeight;
size_t dstXInBytes;
size_t dstY;
size_t dstZ;
size_t dstLOD;
hipMemoryType dstMemoryType;
void* dstHost;
hipDeviceptr_t dstDevice;
hArray * dstArray;
hipCtx_t dstContext;
size_t dstPitch;
size_t dstHeight;
size_t WidthInBytes;
size_t Height;
size_t Depth;
} HIP_MEMCPY3D_PEER;
typedef struct HIP_ARRAY_DESCRIPTOR { typedef struct HIP_ARRAY_DESCRIPTOR {
size_t Width; size_t Width;
size_t Height; size_t Height;
@ -1051,12 +731,6 @@ typedef struct HIP_RESOURCE_VIEW_DESC_st {
unsigned int reserved[16]; unsigned int reserved[16];
} HIP_RESOURCE_VIEW_DESC; } HIP_RESOURCE_VIEW_DESC;
typedef struct HIP_POINTER_ATTRIBUTE_P2P_TOKENS_st {
unsigned long long p2pToken;
unsigned int vaSpaceToken;
} HIP_POINTER_ATTRIBUTE_P2P_TOKENS;
typedef unsigned int GLenum; typedef unsigned int GLenum;
typedef unsigned int GLuint; typedef unsigned int GLuint;
typedef int GLint; typedef int GLint;
@ -1073,34 +747,6 @@ typedef enum HIPGLmap_flags_enum {
HIP_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02, HIP_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,
} HIPGLmap_flags; } HIPGLmap_flags;
typedef enum hipExternalMemoryHandleType_enum {
hipExternalMemoryHandleTypeOpaqueFd = 1,
hipExternalMemoryHandleTypeOpaqueWin32 = 2,
hipExternalMemoryHandleTypeOpaqueWin32Kmt = 3,
hipExternalMemoryHandleTypeD3D12Heap = 4,
hipExternalMemoryHandleTypeD3D12Resource = 5,
hipExternalMemoryHandleTypeD3D11Resource = 6,
hipExternalMemoryHandleTypeD3D11ResourceKmt = 7,
} hipExternalMemoryHandleType;
typedef struct hipExternalMemoryHandleDesc_st {
hipExternalMemoryHandleType type;
union {
int fd;
struct {
void *handle;
const void *name;
} win32;
} handle;
unsigned long long size;
unsigned int flags;
} hipExternalMemoryHandleDesc;
typedef struct hipExternalMemoryBufferDesc_st {
unsigned long long offset;
unsigned long long size;
unsigned int flags;
} hipExternalMemoryBufferDesc;
/** /**
* hipRTC related * hipRTC related
@ -1166,6 +812,72 @@ typedef enum hiprtcJITInputType {
HIPRTC_JIT_NUM_INPUT_TYPES = ( HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES + 3 ) HIPRTC_JIT_NUM_INPUT_TYPES = ( HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES + 3 )
} hiprtcJITInputType; } hiprtcJITInputType;
#ifdef WITH_HIP_SDK_5
#include "hipew5.h"
#else
#include "hipew6.h"
#endif
/**
* Pointer attributes
*/
typedef struct hipPointerAttribute_t {
enum hipMemoryType memoryType;
int device;
void* devicePointer;
void* hostPointer;
int isManaged;
unsigned allocationFlags; /* flags specified when memory was allocated*/
/* peers? */
} hipPointerAttribute_t;
typedef struct hip_Memcpy2D {
size_t srcXInBytes;
size_t srcY;
hipMemoryType srcMemoryType;
const void* srcHost;
hipDeviceptr_t srcDevice;
hArray * srcArray;
size_t srcPitch;
size_t dstXInBytes;
size_t dstY;
hipMemoryType dstMemoryType;
void* dstHost;
hipDeviceptr_t dstDevice;
hArray * dstArray;
size_t dstPitch;
size_t WidthInBytes;
size_t Height;
} hip_Memcpy2D;
typedef struct HIP_MEMCPY3D_PEER_st {
size_t srcXInBytes;
size_t srcY;
size_t srcZ;
size_t srcLOD;
hipMemoryType srcMemoryType;
const void* srcHost;
hipDeviceptr_t srcDevice;
hArray * srcArray;
hipCtx_t srcContext;
size_t srcPitch;
size_t srcHeight;
size_t dstXInBytes;
size_t dstY;
size_t dstZ;
size_t dstLOD;
hipMemoryType dstMemoryType;
void* dstHost;
hipDeviceptr_t dstDevice;
hArray * dstArray;
hipCtx_t dstContext;
size_t dstPitch;
size_t dstHeight;
size_t WidthInBytes;
size_t Height;
size_t Depth;
} HIP_MEMCPY3D_PEER;
typedef struct ihiprtcLinkState* hiprtcLinkState; typedef struct ihiprtcLinkState* hiprtcLinkState;
/* Function types. */ /* Function types. */
@ -1177,7 +889,6 @@ typedef hipError_t HIPAPI thipDriverGetVersion(int* driverVersion);
typedef hipError_t HIPAPI thipRuntimeGetVersion(int* runtimeVersion); typedef hipError_t HIPAPI thipRuntimeGetVersion(int* runtimeVersion);
typedef hipError_t HIPAPI thipGetDevice(int* device); typedef hipError_t HIPAPI thipGetDevice(int* device);
typedef hipError_t HIPAPI thipGetDeviceCount(int* count); typedef hipError_t HIPAPI thipGetDeviceCount(int* count);
typedef hipError_t HIPAPI thipGetDeviceProperties(hipDeviceProp_t* props, int deviceId);
typedef hipError_t HIPAPI thipDeviceGet(hipDevice_t* device, int ordinal); typedef hipError_t HIPAPI thipDeviceGet(hipDevice_t* device, int ordinal);
typedef hipError_t HIPAPI thipDeviceGetName(char* name, int len, hipDevice_t dev); typedef hipError_t HIPAPI thipDeviceGetName(char* name, int len, hipDevice_t dev);
typedef hipError_t HIPAPI thipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attrib, hipDevice_t dev); typedef hipError_t HIPAPI thipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attrib, hipDevice_t dev);
@ -1328,7 +1039,6 @@ extern thipDriverGetVersion *hipDriverGetVersion;
extern thipRuntimeGetVersion *hipRuntimeGetVersion; extern thipRuntimeGetVersion *hipRuntimeGetVersion;
extern thipGetDevice *hipGetDevice; extern thipGetDevice *hipGetDevice;
extern thipGetDeviceCount *hipGetDeviceCount; extern thipGetDeviceCount *hipGetDeviceCount;
extern thipGetDeviceProperties *hipGetDeviceProperties;
extern thipDeviceGet *hipDeviceGet; extern thipDeviceGet *hipDeviceGet;
extern thipDeviceGetName *hipDeviceGetName; extern thipDeviceGetName *hipDeviceGetName;
extern thipDeviceGetAttribute *hipDeviceGetAttribute; extern thipDeviceGetAttribute *hipDeviceGetAttribute;

313
extern/hipew/include/hipew5.h vendored Normal file
View file

@ -0,0 +1,313 @@
/*
* Copyright 2011-2024 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*/
#ifndef __HIPEW5_H__
#define __HIPEW5_H__
#define WIN_DRIVER "amdhip64.dll"
typedef enum hipMemcpyKind {
hipMemcpyHostToHost = 0,
hipMemcpyHostToDevice = 1,
hipMemcpyDeviceToHost = 2,
hipMemcpyDeviceToDevice = 3,
hipMemcpyDefault = 4
} hipMemcpyKind;
typedef enum hipMemoryType {
hipMemoryTypeHost = 0,
hipMemoryTypeDevice = 1,
hipMemoryTypeArray = 2,
hipMemoryTypeUnified = 3,
hipMemoryTypeManaged = 4
} hipMemoryType;
typedef struct HIP_POINTER_ATTRIBUTE_P2P_TOKENS_st {
unsigned long long p2pToken;
unsigned int vaSpaceToken;
} HIP_POINTER_ATTRIBUTE_P2P_TOKENS;
typedef enum hipDeviceAttribute_t {
hipDeviceAttributeCudaCompatibleBegin = 0,
hipDeviceAttributeEccEnabled = hipDeviceAttributeCudaCompatibleBegin, ///< Whether ECC support is enabled.
hipDeviceAttributeAccessPolicyMaxWindowSize, ///< Cuda only. The maximum size of the window policy in bytes.
hipDeviceAttributeAsyncEngineCount, ///< Cuda only. Asynchronous engines number.
hipDeviceAttributeCanMapHostMemory, ///< Whether host memory can be mapped into device address space
hipDeviceAttributeCanUseHostPointerForRegisteredMem,///< Cuda only. Device can access host registered memory
///< at the same virtual address as the CPU
hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz.
hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in.
hipDeviceAttributeComputePreemptionSupported, ///< Cuda only. Device supports Compute Preemption.
hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels concurrently.
hipDeviceAttributeConcurrentManagedAccess, ///< Device can coherently access managed memory concurrently with the CPU
hipDeviceAttributeCooperativeLaunch, ///< Support cooperative launch
hipDeviceAttributeCooperativeMultiDeviceLaunch, ///< Support cooperative launch on multiple devices
hipDeviceAttributeDeviceOverlap, ///< Cuda only. Device can concurrently copy memory and execute a kernel.
///< Deprecated. Use instead asyncEngineCount.
hipDeviceAttributeDirectManagedMemAccessFromHost, ///< Host can directly access managed memory on
///< the device without migration
hipDeviceAttributeGlobalL1CacheSupported, ///< Cuda only. Device supports caching globals in L1
hipDeviceAttributeHostNativeAtomicSupported, ///< Cuda only. Link between the device and the host supports native atomic operations
hipDeviceAttributeIntegrated, ///< Device is integrated GPU
hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices.
hipDeviceAttributeKernelExecTimeout, ///< Run time limit for kernels executed on the device
hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
hipDeviceAttributeLocalL1CacheSupported, ///< caching locals in L1 is supported
hipDeviceAttributeLuid, ///< Cuda only. 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms
hipDeviceAttributeLuidDeviceNodeMask, ///< Cuda only. Luid device node mask. Undefined on TCC and non-Windows platforms
hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number.
hipDeviceAttributeManagedMemory, ///< Device supports allocating managed memory on this system
hipDeviceAttributeMaxBlocksPerMultiProcessor, ///< Cuda only. Max block size per multiprocessor
hipDeviceAttributeMaxBlockDimX, ///< Max block size in width.
hipDeviceAttributeMaxBlockDimY, ///< Max block size in height.
hipDeviceAttributeMaxBlockDimZ, ///< Max block size in depth.
hipDeviceAttributeMaxGridDimX, ///< Max grid size in width.
hipDeviceAttributeMaxGridDimY, ///< Max grid size in height.
hipDeviceAttributeMaxGridDimZ, ///< Max grid size in depth.
hipDeviceAttributeMaxSurface1D, ///< Maximum size of 1D surface.
hipDeviceAttributeMaxSurface1DLayered, ///< Cuda only. Maximum dimensions of 1D layered surface.
hipDeviceAttributeMaxSurface2D, ///< Maximum dimension (width, height) of 2D surface.
hipDeviceAttributeMaxSurface2DLayered, ///< Cuda only. Maximum dimensions of 2D layered surface.
hipDeviceAttributeMaxSurface3D, ///< Maximum dimension (width, height, depth) of 3D surface.
hipDeviceAttributeMaxSurfaceCubemap, ///< Cuda only. Maximum dimensions of Cubemap surface.
hipDeviceAttributeMaxSurfaceCubemapLayered, ///< Cuda only. Maximum dimension of Cubemap layered surface.
hipDeviceAttributeMaxTexture1DWidth, ///< Maximum size of 1D texture.
hipDeviceAttributeMaxTexture1DLayered, ///< Cuda only. Maximum dimensions of 1D layered texture.
hipDeviceAttributeMaxTexture1DLinear, ///< Maximum number of elements allocatable in a 1D linear texture.
///< Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda.
hipDeviceAttributeMaxTexture1DMipmap, ///< Cuda only. Maximum size of 1D mipmapped texture.
hipDeviceAttributeMaxTexture2DWidth, ///< Maximum dimension width of 2D texture.
hipDeviceAttributeMaxTexture2DHeight, ///< Maximum dimension hight of 2D texture.
hipDeviceAttributeMaxTexture2DGather, ///< Cuda only. Maximum dimensions of 2D texture if gather operations performed.
hipDeviceAttributeMaxTexture2DLayered, ///< Cuda only. Maximum dimensions of 2D layered texture.
hipDeviceAttributeMaxTexture2DLinear, ///< Cuda only. Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory.
hipDeviceAttributeMaxTexture2DMipmap, ///< Cuda only. Maximum dimensions of 2D mipmapped texture.
hipDeviceAttributeMaxTexture3DWidth, ///< Maximum dimension width of 3D texture.
hipDeviceAttributeMaxTexture3DHeight, ///< Maximum dimension height of 3D texture.
hipDeviceAttributeMaxTexture3DDepth, ///< Maximum dimension depth of 3D texture.
hipDeviceAttributeMaxTexture3DAlt, ///< Cuda only. Maximum dimensions of alternate 3D texture.
hipDeviceAttributeMaxTextureCubemap, ///< Cuda only. Maximum dimensions of Cubemap texture
hipDeviceAttributeMaxTextureCubemapLayered, ///< Cuda only. Maximum dimensions of Cubemap layered texture.
hipDeviceAttributeMaxThreadsDim, ///< Maximum dimension of a block
hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block.
hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor.
hipDeviceAttributeMaxPitch, ///< Maximum pitch in bytes allowed by memory copies
hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits.
hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz.
hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number.
hipDeviceAttributeMultiGpuBoardGroupID, ///< Cuda only. Unique ID of device group on the same multi-GPU board
hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device.
hipDeviceAttributeName, ///< Device name.
hipDeviceAttributePageableMemoryAccess, ///< Device supports coherently accessing pageable memory
///< without calling hipHostRegister on it
hipDeviceAttributePageableMemoryAccessUsesHostPageTables, ///< Device accesses pageable memory via the host's page tables
hipDeviceAttributePciBusId, ///< PCI Bus ID.
hipDeviceAttributePciDeviceId, ///< PCI Device ID.
hipDeviceAttributePciDomainID, ///< PCI Domain ID.
hipDeviceAttributePersistingL2CacheMaxSize, ///< Cuda11 only. Maximum l2 persisting lines capacity in bytes
hipDeviceAttributeMaxRegistersPerBlock, ///< 32-bit registers available to a thread block. This number is shared
///< by all thread blocks simultaneously resident on a multiprocessor.
hipDeviceAttributeMaxRegistersPerMultiprocessor, ///< 32-bit registers available per block.
hipDeviceAttributeReservedSharedMemPerBlock, ///< Cuda11 only. Shared memory reserved by CUDA driver per block.
hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in bytes.
hipDeviceAttributeSharedMemPerBlockOptin, ///< Cuda only. Maximum shared memory per block usable by special opt in.
hipDeviceAttributeSharedMemPerMultiprocessor, ///< Cuda only. Shared memory available per multiprocessor.
hipDeviceAttributeSingleToDoublePrecisionPerfRatio, ///< Cuda only. Performance ratio of single precision to double precision.
hipDeviceAttributeStreamPrioritiesSupported, ///< Cuda only. Whether to support stream priorities.
hipDeviceAttributeSurfaceAlignment, ///< Cuda only. Alignment requirement for surfaces
hipDeviceAttributeTccDriver, ///< Cuda only. Whether device is a Tesla device using TCC driver
hipDeviceAttributeTextureAlignment, ///< Alignment requirement for textures
hipDeviceAttributeTexturePitchAlignment, ///< Pitch alignment requirement for 2D texture references bound to pitched memory;
hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes.
hipDeviceAttributeTotalGlobalMem, ///< Global memory available on devicice.
hipDeviceAttributeUnifiedAddressing, ///< Cuda only. An unified address space shared with the host.
hipDeviceAttributeUuid, ///< Cuda only. Unique ID in 16 byte.
hipDeviceAttributeWarpSize, ///< Warp size in threads.
hipDeviceAttributeCudaCompatibleEnd = 9999,
hipDeviceAttributeAmdSpecificBegin = 10000,
hipDeviceAttributeClockInstructionRate = hipDeviceAttributeAmdSpecificBegin, ///< Frequency in khz of the timer used by the device-side "clock*"
hipDeviceAttributeArch, ///< Device architecture
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory PerMultiprocessor.
hipDeviceAttributeGcnArch, ///< Device gcn architecture
hipDeviceAttributeGcnArchName, ///< Device gcnArch name in 256 bytes
hipDeviceAttributeHdpMemFlushCntl, ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register
hipDeviceAttributeHdpRegFlushCntl, ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register
hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, ///< Supports cooperative launch on multiple
///< devices with unmatched functions
hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, ///< Supports cooperative launch on multiple
///< devices with unmatched grid dimensions
hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, ///< Supports cooperative launch on multiple
///< devices with unmatched block dimensions
hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, ///< Supports cooperative launch on multiple
///< devices with unmatched shared memories
hipDeviceAttributeIsLargeBar, ///< Whether it is LargeBar
hipDeviceAttributeAsicRevision, ///< Revision of the GPU in this device
hipDeviceAttributeCanUseStreamWaitValue, ///< '1' if Device supports hipStreamWaitValue32() and
///< hipStreamWaitValue64() , '0' otherwise.
hipDeviceAttributeAmdSpecificEnd = 19999,
hipDeviceAttributeVendorSpecificBegin = 20000,
hipDeviceAttribute
// Extended attributes for vendors
} hipDeviceAttribute_t;
typedef struct hipDeviceProp_t {
char name[256]; ///< Device name.
size_t totalGlobalMem; ///< Size of global memory region (in bytes).
size_t sharedMemPerBlock; ///< Size of shared memory region (in bytes).
int regsPerBlock; ///< Registers per block.
int warpSize; ///< Warp size.
int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size.
int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block.
int maxGridSize[3]; ///< Max grid dimensions (XYZ).
int clockRate; ///< Max clock frequency of the multiProcessors in khz.
int memoryClockRate; ///< Max global memory clock frequency in khz.
int memoryBusWidth; ///< Global memory bus width in bits.
size_t totalConstMem; ///< Size of shared memory region (in bytes).
int major; ///< Major compute capability. On HCC, this is an approximation and features may
///< differ from CUDA CC. See the arch feature flags for portable ways to query
///< feature caps.
int minor; ///< Minor compute capability. On HCC, this is an approximation and features may
///< differ from CUDA CC. See the arch feature flags for portable ways to query
///< feature caps.
int multiProcessorCount; ///< Number of multi-processors (compute units).
int l2CacheSize; ///< L2 cache size.
int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor.
int computeMode; ///< Compute mode.
int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*"
///< instructions. New for HIP.
hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP.
int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently.
int pciDomainID; ///< PCI Domain ID
int pciBusID; ///< PCI Bus ID.
int pciDeviceID; ///< PCI Device ID.
size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per Multiprocessor.
int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not.
int canMapHostMemory; ///< Check whether HIP can map host memory
int gcnArch; ///< DEPRECATED: use gcnArchName instead
char gcnArchName[256]; ///< AMD GCN Arch Name.
int integrated; ///< APU vs dGPU
int cooperativeLaunch; ///< HIP device supports cooperative launch
int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple devices
int maxTexture1DLinear; ///< Maximum size for 1D textures bound to linear memory
int maxTexture1D; ///< Maximum number of elements in 1D images
int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements
int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image elements
unsigned int* hdpMemFlushCntl; ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
size_t memPitch; ///<Maximum pitch in bytes allowed by memory copies
size_t textureAlignment; ///<Alignment requirement for textures
size_t texturePitchAlignment; ///<Pitch alignment requirement for texture references bound to pitched memory
int kernelExecTimeoutEnabled; ///<Run time limit for kernels executed on the device
int ECCEnabled; ///<Device has ECC support enabled
int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0
int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on multiple
///devices with unmatched functions
int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on multiple
///devices with unmatched grid dimensions
int cooperativeMultiDeviceUnmatchedBlockDim; ///< HIP device supports cooperative launch on multiple
///devices with unmatched block dimensions
int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on multiple
///devices with unmatched shared memories
int isLargeBar; ///< 1: if it is a large PCI bar device, else 0
int asicRevision; ///< Revision of the GPU in this device
int managedMemory; ///< Device supports allocating managed memory on this system
int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device without migration
int concurrentManagedAccess; ///< Device can coherently access managed memory concurrently with the CPU
int pageableMemoryAccess; ///< Device supports coherently accessing pageable memory
///< without calling hipHostRegister on it
int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's page tables
} hipDeviceProp_t;
typedef enum HIPpointer_attribute_enum {
HIP_POINTER_ATTRIBUTE_CONTEXT = 1,
HIP_POINTER_ATTRIBUTE_MEMORY_TYPE = 2,
HIP_POINTER_ATTRIBUTE_DEVICE_POINTER = 3,
HIP_POINTER_ATTRIBUTE_HOST_POINTER = 4,
HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS = 6,
HIP_POINTER_ATTRIBUTE_BUFFER_ID = 7,
HIP_POINTER_ATTRIBUTE_IS_MANAGED = 8,
HIP_POINTER_ATTRIBUTE_DEVICE_ORDINAL = 9,
} HIPpointer_attribute;
typedef enum hipComputeMode {
hipComputeModeDefault = 0,
hipComputeModeProhibited = 2,
hipComputeModeExclusiveProcess = 3,
} hipComputeMode;
typedef struct HIP_MEMCPY3D {
unsigned int srcXInBytes;
unsigned int srcY;
unsigned int srcZ;
unsigned int srcLOD;
hipMemoryType srcMemoryType;
const void* srcHost;
hipDeviceptr_t srcDevice;
hArray srcArray;
unsigned int srcPitch;
unsigned int srcHeight;
unsigned int dstXInBytes;
unsigned int dstY;
unsigned int dstZ;
unsigned int dstLOD;
hipMemoryType dstMemoryType;
void* dstHost;
hipDeviceptr_t dstDevice;
hArray dstArray;
unsigned int dstPitch;
unsigned int dstHeight;
unsigned int WidthInBytes;
unsigned int Height;
unsigned int Depth;
} HIP_MEMCPY3D;
typedef enum hipExternalMemoryHandleType_enum {
hipExternalMemoryHandleTypeOpaqueFd = 1,
hipExternalMemoryHandleTypeOpaqueWin32 = 2,
hipExternalMemoryHandleTypeOpaqueWin32Kmt = 3,
hipExternalMemoryHandleTypeD3D12Heap = 4,
hipExternalMemoryHandleTypeD3D12Resource = 5,
hipExternalMemoryHandleTypeD3D11Resource = 6,
hipExternalMemoryHandleTypeD3D11ResourceKmt = 7
} hipExternalMemoryHandleType;
typedef struct hipExternalMemoryHandleDesc_st {
hipExternalMemoryHandleType type;
union {
int fd;
struct {
void *handle;
const void *name;
} win32;
} handle;
unsigned long long size;
unsigned int flags;
} hipExternalMemoryHandleDesc;
typedef struct hipExternalMemoryBufferDesc_st {
unsigned long long offset;
unsigned long long size;
unsigned int flags;
} hipExternalMemoryBufferDesc;
typedef hipError_t HIPAPI thipGetDeviceProperties(hipDeviceProp_t* props, int deviceId);
extern thipGetDeviceProperties *hipGetDeviceProperties;
#endif /* __HIPEW_H__ */

462
extern/hipew/include/hipew6.h vendored Normal file
View file

@ -0,0 +1,462 @@
/*
* Copyright 2011-2024 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*/
#ifndef __HIPEW6_H__
#define __HIPEW6_H__
#define WIN_DRIVER "amdhip64_6.dll"
#define hipIpcMemLazyEnablePeerAccess 0x01
#define hipMemAttachGlobal 0x01
#define hipMemAttachHost 0x02
#define hipDeviceScheduleAuto 0x0
#define hipDeviceScheduleSpin 0x1
#define hipDeviceScheduleYield 0x2
#define hipDeviceScheduleBlockingSync 0x4
#define hipDeviceScheduleMask 0x7
#define hipDeviceMapHost 0x8
#define hipDeviceLmemResizeToMax 0x10
#define hipStreamDefault 0x00
#define hipStreamNonBlocking 0x01
#define hipEventDefault 0x0
#define hipEventBlockingSync 0x1
#define hipEventDisableTiming 0x2
#define hipEventInterprocess 0x4
#define hipOccupancyDefault 0x00
typedef enum hipMemcpyKind {
hipMemcpyHostToHost = 0,
hipMemcpyHostToDevice = 1,
hipMemcpyDeviceToHost = 2,
hipMemcpyDeviceToDevice = 3,
hipMemcpyDefault = 4,
hipMemcpyDeviceToDeviceNoCU = 1024
} hipMemcpyKind;
typedef enum hipMemoryType {
hipMemoryTypeUnregistered = 0,
hipMemoryTypeHost = 1,
hipMemoryTypeDevice = 2,
hipMemoryTypeManaged = 3,
hipMemoryTypeArray = 10,
hipMemoryTypeUnified = 11
} hipMemoryType;
typedef enum hipDeviceAttribute_t {
hipDeviceAttributeCudaCompatibleBegin = 0,
hipDeviceAttributeEccEnabled = hipDeviceAttributeCudaCompatibleBegin, ///< Whether ECC support is enabled.
hipDeviceAttributeAccessPolicyMaxWindowSize, ///< Cuda only. The maximum size of the window policy in bytes.
hipDeviceAttributeAsyncEngineCount, ///< Cuda only. Asynchronous engines number.
hipDeviceAttributeCanMapHostMemory, ///< Whether host memory can be mapped into device address space
hipDeviceAttributeCanUseHostPointerForRegisteredMem,///< Cuda only. Device can access host registered memory
///< at the same virtual address as the CPU
hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz.
hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in.
hipDeviceAttributeComputePreemptionSupported, ///< Cuda only. Device supports Compute Preemption.
hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels concurrently.
hipDeviceAttributeConcurrentManagedAccess, ///< Device can coherently access managed memory concurrently with the CPU
hipDeviceAttributeCooperativeLaunch, ///< Support cooperative launch
hipDeviceAttributeCooperativeMultiDeviceLaunch, ///< Support cooperative launch on multiple devices
hipDeviceAttributeDeviceOverlap, ///< Cuda only. Device can concurrently copy memory and execute a kernel.
///< Deprecated. Use instead asyncEngineCount.
hipDeviceAttributeDirectManagedMemAccessFromHost, ///< Host can directly access managed memory on
///< the device without migration
hipDeviceAttributeGlobalL1CacheSupported, ///< Cuda only. Device supports caching globals in L1
hipDeviceAttributeHostNativeAtomicSupported, ///< Cuda only. Link between the device and the host supports native atomic operations
hipDeviceAttributeIntegrated, ///< Device is integrated GPU
hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices.
hipDeviceAttributeKernelExecTimeout, ///< Run time limit for kernels executed on the device
hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
hipDeviceAttributeLocalL1CacheSupported, ///< caching locals in L1 is supported
hipDeviceAttributeLuid, ///< Cuda only. 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms
hipDeviceAttributeLuidDeviceNodeMask, ///< Cuda only. Luid device node mask. Undefined on TCC and non-Windows platforms
hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number.
hipDeviceAttributeManagedMemory, ///< Device supports allocating managed memory on this system
hipDeviceAttributeMaxBlocksPerMultiProcessor, ///< Cuda only. Max block size per multiprocessor
hipDeviceAttributeMaxBlockDimX, ///< Max block size in width.
hipDeviceAttributeMaxBlockDimY, ///< Max block size in height.
hipDeviceAttributeMaxBlockDimZ, ///< Max block size in depth.
hipDeviceAttributeMaxGridDimX, ///< Max grid size in width.
hipDeviceAttributeMaxGridDimY, ///< Max grid size in height.
hipDeviceAttributeMaxGridDimZ, ///< Max grid size in depth.
hipDeviceAttributeMaxSurface1D, ///< Maximum size of 1D surface.
hipDeviceAttributeMaxSurface1DLayered, ///< Cuda only. Maximum dimensions of 1D layered surface.
hipDeviceAttributeMaxSurface2D, ///< Maximum dimension (width, height) of 2D surface.
hipDeviceAttributeMaxSurface2DLayered, ///< Cuda only. Maximum dimensions of 2D layered surface.
hipDeviceAttributeMaxSurface3D, ///< Maximum dimension (width, height, depth) of 3D surface.
hipDeviceAttributeMaxSurfaceCubemap, ///< Cuda only. Maximum dimensions of Cubemap surface.
hipDeviceAttributeMaxSurfaceCubemapLayered, ///< Cuda only. Maximum dimension of Cubemap layered surface.
hipDeviceAttributeMaxTexture1DWidth, ///< Maximum size of 1D texture.
hipDeviceAttributeMaxTexture1DLayered, ///< Cuda only. Maximum dimensions of 1D layered texture.
hipDeviceAttributeMaxTexture1DLinear, ///< Maximum number of elements allocatable in a 1D linear texture.
///< Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda.
hipDeviceAttributeMaxTexture1DMipmap, ///< Cuda only. Maximum size of 1D mipmapped texture.
hipDeviceAttributeMaxTexture2DWidth, ///< Maximum dimension width of 2D texture.
hipDeviceAttributeMaxTexture2DHeight, ///< Maximum dimension hight of 2D texture.
hipDeviceAttributeMaxTexture2DGather, ///< Cuda only. Maximum dimensions of 2D texture if gather operations performed.
hipDeviceAttributeMaxTexture2DLayered, ///< Cuda only. Maximum dimensions of 2D layered texture.
hipDeviceAttributeMaxTexture2DLinear, ///< Cuda only. Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory.
hipDeviceAttributeMaxTexture2DMipmap, ///< Cuda only. Maximum dimensions of 2D mipmapped texture.
hipDeviceAttributeMaxTexture3DWidth, ///< Maximum dimension width of 3D texture.
hipDeviceAttributeMaxTexture3DHeight, ///< Maximum dimension height of 3D texture.
hipDeviceAttributeMaxTexture3DDepth, ///< Maximum dimension depth of 3D texture.
hipDeviceAttributeMaxTexture3DAlt, ///< Cuda only. Maximum dimensions of alternate 3D texture.
hipDeviceAttributeMaxTextureCubemap, ///< Cuda only. Maximum dimensions of Cubemap texture
hipDeviceAttributeMaxTextureCubemapLayered, ///< Cuda only. Maximum dimensions of Cubemap layered texture.
hipDeviceAttributeMaxThreadsDim, ///< Maximum dimension of a block
hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block.
hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor.
hipDeviceAttributeMaxPitch, ///< Maximum pitch in bytes allowed by memory copies
hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits.
hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz.
hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number.
hipDeviceAttributeMultiGpuBoardGroupID, ///< Cuda only. Unique ID of device group on the same multi-GPU board
hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device.
hipDeviceAttributeUnused1, ///< Previously hipDeviceAttributeName
hipDeviceAttributePageableMemoryAccess, ///< Device supports coherently accessing pageable memory
///< without calling hipHostRegister on it
hipDeviceAttributePageableMemoryAccessUsesHostPageTables, ///< Device accesses pageable memory via the host's page tables
hipDeviceAttributePciBusId, ///< PCI Bus ID.
hipDeviceAttributePciDeviceId, ///< PCI Device ID.
hipDeviceAttributePciDomainID, ///< PCI Domain ID.
hipDeviceAttributePersistingL2CacheMaxSize, ///< Cuda11 only. Maximum l2 persisting lines capacity in bytes
hipDeviceAttributeMaxRegistersPerBlock, ///< 32-bit registers available to a thread block. This number is shared
///< by all thread blocks simultaneously resident on a multiprocessor.
hipDeviceAttributeMaxRegistersPerMultiprocessor, ///< 32-bit registers available per block.
hipDeviceAttributeReservedSharedMemPerBlock, ///< Cuda11 only. Shared memory reserved by CUDA driver per block.
hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in bytes.
hipDeviceAttributeSharedMemPerBlockOptin, ///< Cuda only. Maximum shared memory per block usable by special opt in.
hipDeviceAttributeSharedMemPerMultiprocessor, ///< Cuda only. Shared memory available per multiprocessor.
hipDeviceAttributeSingleToDoublePrecisionPerfRatio, ///< Cuda only. Performance ratio of single precision to double precision.
hipDeviceAttributeStreamPrioritiesSupported, ///< Cuda only. Whether to support stream priorities.
hipDeviceAttributeSurfaceAlignment, ///< Cuda only. Alignment requirement for surfaces
hipDeviceAttributeTccDriver, ///< Cuda only. Whether device is a Tesla device using TCC driver
hipDeviceAttributeTextureAlignment, ///< Alignment requirement for textures
hipDeviceAttributeTexturePitchAlignment, ///< Pitch alignment requirement for 2D texture references bound to pitched memory;
hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes.
hipDeviceAttributeTotalGlobalMem, ///< Global memory available on devicice.
hipDeviceAttributeUnifiedAddressing, ///< Cuda only. An unified address space shared with the host.
hipDeviceAttributeUnused2, ///< Previously hipDeviceAttributeUuid
hipDeviceAttributeWarpSize, ///< Warp size in threads.
hipDeviceAttributeMemoryPoolsSupported, ///< Device supports HIP Stream Ordered Memory Allocator
hipDeviceAttributeVirtualMemoryManagementSupported, ///< Device supports HIP virtual memory management
hipDeviceAttributeHostRegisterSupported, ///< Can device support host memory registration via hipHostRegister
hipDeviceAttributeMemoryPoolSupportedHandleTypes, ///< Supported handle mask for HIP Stream Ordered Memory Allocator
hipDeviceAttributeCudaCompatibleEnd = 9999,
hipDeviceAttributeAmdSpecificBegin = 10000,
hipDeviceAttributeClockInstructionRate = hipDeviceAttributeAmdSpecificBegin, ///< Frequency in khz of the timer used by the device-side "clock*"
hipDeviceAttributeUnused3, ///< Previously hipDeviceAttributeArch
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory PerMultiprocessor.
hipDeviceAttributeUnused4, ///< Previously hipDeviceAttributeGcnArch
hipDeviceAttributeUnused5, ///< Previously hipDeviceAttributeGcnArchName
hipDeviceAttributeHdpMemFlushCntl, ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register
hipDeviceAttributeHdpRegFlushCntl, ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register
hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, ///< Supports cooperative launch on multiple
///< devices with unmatched functions
hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, ///< Supports cooperative launch on multiple
///< devices with unmatched grid dimensions
hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, ///< Supports cooperative launch on multiple
///< devices with unmatched block dimensions
hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, ///< Supports cooperative launch on multiple
///< devices with unmatched shared memories
hipDeviceAttributeIsLargeBar, ///< Whether it is LargeBar
hipDeviceAttributeAsicRevision, ///< Revision of the GPU in this device
hipDeviceAttributeCanUseStreamWaitValue, ///< '1' if Device supports hipStreamWaitValue32() and
///< hipStreamWaitValue64() , '0' otherwise.
hipDeviceAttributeAmdSpecificEnd = 19999,
hipDeviceAttributeVendorSpecificBegin = 20000,
hipDeviceAttribute
// Extended attributes for vendors
} hipDeviceAttribute_t;
typedef struct hipUUID_t {
char bytes[16];
} hipUUID;
typedef struct hipDeviceProp_t {
char name[256]; ///< Device name.
hipUUID uuid; ///< UUID of a device
char luid[8]; ///< 8-byte unique identifier. Only valid on windows
unsigned int luidDeviceNodeMask; ///< LUID node mask
size_t totalGlobalMem; ///< Size of global memory region (in bytes).
size_t sharedMemPerBlock; ///< Size of shared memory per block (in bytes).
int regsPerBlock; ///< Registers per block.
int warpSize; ///< Warp size.
size_t memPitch; ///< Maximum pitch in bytes allowed by memory copies
///< pitched memory
int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size.
int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block.
int maxGridSize[3]; ///< Max grid dimensions (XYZ).
int clockRate; ///< Max clock frequency of the multiProcessors in khz.
size_t totalConstMem; ///< Size of shared constant memory region on the device
///< (in bytes).
int major; ///< Major compute capability. On HCC, this is an approximation and features may
///< differ from CUDA CC. See the arch feature flags for portable ways to query
///< feature caps.
int minor; ///< Minor compute capability. On HCC, this is an approximation and features may
///< differ from CUDA CC. See the arch feature flags for portable ways to query
///< feature caps.
size_t textureAlignment; ///< Alignment requirement for textures
size_t texturePitchAlignment; ///< Pitch alignment requirement for texture references bound to
int deviceOverlap; ///< Deprecated. Use asyncEngineCount instead
int multiProcessorCount; ///< Number of multi-processors (compute units).
int kernelExecTimeoutEnabled; ///< Run time limit for kernels executed on the device
int integrated; ///< APU vs dGPU
int canMapHostMemory; ///< Check whether HIP can map host memory
int computeMode; ///< Compute mode.
int maxTexture1D; ///< Maximum number of elements in 1D images
int maxTexture1DMipmap; ///< Maximum 1D mipmap texture size
int maxTexture1DLinear; ///< Maximum size for 1D textures bound to linear memory
int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements
int maxTexture2DMipmap[2]; ///< Maximum number of elements in 2D array mipmap of images
int maxTexture2DLinear[3]; ///< Maximum 2D tex dimensions if tex are bound to pitched memory
int maxTexture2DGather[2]; ///< Maximum 2D tex dimensions if gather has to be performed
int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image
///< elements
int maxTexture3DAlt[3]; ///< Maximum alternate 3D texture dims
int maxTextureCubemap; ///< Maximum cubemap texture dims
int maxTexture1DLayered[2]; ///< Maximum number of elements in 1D array images
int maxTexture2DLayered[3]; ///< Maximum number of elements in 2D array images
int maxTextureCubemapLayered[2]; ///< Maximum cubemaps layered texture dims
int maxSurface1D; ///< Maximum 1D surface size
int maxSurface2D[2]; ///< Maximum 2D surface size
int maxSurface3D[3]; ///< Maximum 3D surface size
int maxSurface1DLayered[2]; ///< Maximum 1D layered surface size
int maxSurface2DLayered[3]; ///< Maximum 2D layared surface size
int maxSurfaceCubemap; ///< Maximum cubemap surface size
int maxSurfaceCubemapLayered[2]; ///< Maximum cubemap layered surface size
size_t surfaceAlignment; ///< Alignment requirement for surface
int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently.
int ECCEnabled; ///< Device has ECC support enabled
int pciBusID; ///< PCI Bus ID.
int pciDeviceID; ///< PCI Device ID.
int pciDomainID; ///< PCI Domain ID
int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0
int asyncEngineCount; ///< Number of async engines
int unifiedAddressing; ///< Does device and host share unified address space
int memoryClockRate; ///< Max global memory clock frequency in khz.
int memoryBusWidth; ///< Global memory bus width in bits.
int l2CacheSize; ///< L2 cache size.
int persistingL2CacheMaxSize; ///< Device's max L2 persisting lines in bytes
int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor.
int streamPrioritiesSupported; ///< Device supports stream priority
int globalL1CacheSupported; ///< Indicates globals are cached in L1
int localL1CacheSupported; ///< Locals are cahced in L1
size_t sharedMemPerMultiprocessor; ///< Amount of shared memory available per multiprocessor.
int regsPerMultiprocessor; ///< registers available per multiprocessor
int managedMemory; ///< Device supports allocating managed memory on this system
int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not.
int multiGpuBoardGroupID; ///< Unique identifier for a group of devices on same multiboard GPU
int hostNativeAtomicSupported; ///< Link between host and device supports native atomics
int singleToDoublePrecisionPerfRatio; ///< Deprecated. CUDA only.
int pageableMemoryAccess; ///< Device supports coherently accessing pageable memory
///< without calling hipHostRegister on it
int concurrentManagedAccess; ///< Device can coherently access managed memory concurrently with
///< the CPU
int computePreemptionSupported; ///< Is compute preemption supported on the device
int canUseHostPointerForRegisteredMem; ///< Device can access host registered memory with same
///< address as the host
int cooperativeLaunch; ///< HIP device supports cooperative launch
int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple
///< devices
size_t
sharedMemPerBlockOptin; ///< Per device m ax shared mem per block usable by special opt in
int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's
///< page tables
int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device
///< without migration
int maxBlocksPerMultiProcessor; ///< Max number of blocks on CU
int accessPolicyMaxWindowSize; ///< Max value of access policy window
size_t reservedSharedMemPerBlock; ///< Shared memory reserved by driver per block
int hostRegisterSupported; ///< Device supports hipHostRegister
int sparseHipArraySupported; ///< Indicates if device supports sparse hip arrays
int hostRegisterReadOnlySupported; ///< Device supports using the hipHostRegisterReadOnly flag
///< with hipHostRegistger
int timelineSemaphoreInteropSupported; ///< Indicates external timeline semaphore support
int memoryPoolsSupported; ///< Indicates if device supports hipMallocAsync and hipMemPool APIs
int gpuDirectRDMASupported; ///< Indicates device support of RDMA APIs
unsigned int gpuDirectRDMAFlushWritesOptions; ///< Bitmask to be interpreted according to
///< hipFlushGPUDirectRDMAWritesOptions
int gpuDirectRDMAWritesOrdering; ///< value of hipGPUDirectRDMAWritesOrdering
unsigned int
memoryPoolSupportedHandleTypes; ///< Bitmask of handle types support with mempool based IPC
int deferredMappingHipArraySupported; ///< Device supports deferred mapping HIP arrays and HIP
///< mipmapped arrays
int ipcEventSupported; ///< Device supports IPC events
int clusterLaunch; ///< Device supports cluster launch
int unifiedFunctionPointers; ///< Indicates device supports unified function pointers
int reserved[63]; ///< CUDA Reserved.
int hipReserved[32]; ///< Reserved for adding new entries for HIP/CUDA.
/* HIP Only struct members */
char gcnArchName[256]; ///< AMD GCN Arch Name. HIP Only.
size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per CU. HIP Only.
int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*"
///< instructions. New for HIP.
hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP.
unsigned int* hdpMemFlushCntl; ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on
///< multiple
/// devices with unmatched functions
int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on
///< multiple
/// devices with unmatched grid dimensions
int cooperativeMultiDeviceUnmatchedBlockDim; ///< HIP device supports cooperative launch on
///< multiple
/// devices with unmatched block dimensions
int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on
///< multiple
/// devices with unmatched shared memories
int isLargeBar; ///< 1: if it is a large PCI bar device, else 0
int asicRevision; ///< Revision of the GPU in this device
} hipDeviceProp_t;
typedef enum HIPpointer_attribute_enum {
HIP_POINTER_ATTRIBUTE_CONTEXT = 1, ///< The context on which a pointer was allocated
///< @warning - not supported in HIP
HIP_POINTER_ATTRIBUTE_MEMORY_TYPE, ///< memory type describing location of a pointer
HIP_POINTER_ATTRIBUTE_DEVICE_POINTER,///< address at which the pointer is allocated on device
HIP_POINTER_ATTRIBUTE_HOST_POINTER, ///< address at which the pointer is allocated on host
HIP_POINTER_ATTRIBUTE_P2P_TOKENS, ///< A pair of tokens for use with linux kernel interface
///< @warning - not supported in HIP
HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS, ///< Synchronize every synchronous memory operation
///< initiated on this region
HIP_POINTER_ATTRIBUTE_BUFFER_ID, ///< Unique ID for an allocated memory region
HIP_POINTER_ATTRIBUTE_IS_MANAGED, ///< Indicates if the pointer points to managed memory
HIP_POINTER_ATTRIBUTE_DEVICE_ORDINAL,///< device ordinal of a device on which a pointer
///< was allocated or registered
HIP_POINTER_ATTRIBUTE_IS_LEGACY_HIP_IPC_CAPABLE, ///< if this pointer maps to an allocation
///< that is suitable for hipIpcGetMemHandle
///< @warning - not supported in HIP
HIP_POINTER_ATTRIBUTE_RANGE_START_ADDR,///< Starting address for this requested pointer
HIP_POINTER_ATTRIBUTE_RANGE_SIZE, ///< Size of the address range for this requested pointer
HIP_POINTER_ATTRIBUTE_MAPPED, ///< tells if this pointer is in a valid address range
///< that is mapped to a backing allocation
HIP_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES,///< Bitmask of allowed hipmemAllocationHandleType
///< for this allocation @warning - not supported in HIP
HIP_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE, ///< returns if the memory referenced by
///< this pointer can be used with the GPUDirect RDMA API
///< @warning - not supported in HIP
HIP_POINTER_ATTRIBUTE_ACCESS_FLAGS, ///< Returns the access flags the device associated with
///< for the corresponding memory referenced by the ptr
HIP_POINTER_ATTRIBUTE_MEMPOOL_HANDLE ///< Returns the mempool handle for the allocation if
///< it was allocated from a mempool
///< @warning - not supported in HIP
} HIPpointer_attribute;
typedef enum hipComputeMode {
hipComputeModeDefault = 0,
hipComputeModeExclusive = 1,
hipComputeModeProhibited = 2,
hipComputeModeExclusiveProcess = 3,
} hipComputeMode;
typedef struct HIP_MEMCPY3D {
size_t srcXInBytes;
size_t srcY;
size_t srcZ;
size_t srcLOD;
hipMemoryType srcMemoryType;
const void* srcHost;
hipDeviceptr_t srcDevice;
hArray srcArray;
size_t srcPitch;
size_t srcHeight;
size_t dstXInBytes;
size_t dstY;
size_t dstZ;
size_t dstLOD;
hipMemoryType dstMemoryType;
void* dstHost;
hipDeviceptr_t dstDevice;
hArray dstArray;
size_t dstPitch;
size_t dstHeight;
size_t WidthInBytes;
size_t Height;
size_t Depth;
} HIP_MEMCPY3D;
#if 0 //version 5 and 6 are the same but the structure already in hipew use doesn't match the structure in either version! but matches CUDA ew
//need to check further
typedef struct hipTextureDesc {
enum hipTextureAddressMode addressMode[3]; // Texture address mode for up to 3 dimensions
enum hipTextureFilterMode filterMode;
enum hipTextureReadMode readMode;
int sRGB; // Perform sRGB->linear conversion during texture read
float borderColor[4];
int normalizedCoords;
unsigned int maxAnisotropy;
enum hipTextureFilterMode mipmapFilterMode;
float mipmapLevelBias;
float minMipmapLevelClamp;
float maxMipmapLevelClamp;
}hipTextureDesc;
#endif
typedef enum hipExternalMemoryHandleType_enum {
hipExternalMemoryHandleTypeOpaqueFd = 1,
hipExternalMemoryHandleTypeOpaqueWin32 = 2,
hipExternalMemoryHandleTypeOpaqueWin32Kmt = 3,
hipExternalMemoryHandleTypeD3D12Heap = 4,
hipExternalMemoryHandleTypeD3D12Resource = 5,
hipExternalMemoryHandleTypeD3D11Resource = 6,
hipExternalMemoryHandleTypeD3D11ResourceKmt = 7,
hipExternalMemoryHandleTypeNvSciBuf = 8
} hipExternalMemoryHandleType;
typedef struct hipExternalMemoryHandleDesc_st {
hipExternalMemoryHandleType type;
union {
int fd;
struct {
void *handle;
const void *name;
} win32;
const void *nvSciBufObject;
} handle;
unsigned long long size;
unsigned int flags;
unsigned int reserved[16];
} hipExternalMemoryHandleDesc;
typedef struct hipExternalMemoryBufferDesc_st {
unsigned long long offset;
unsigned long long size;
unsigned int flags;
unsigned int reserved[16];
} hipExternalMemoryBufferDesc;
typedef hipError_t HIPAPI thipGetDevicePropertiesR0600(hipDeviceProp_t* props, int deviceId);
extern thipGetDevicePropertiesR0600 *hipGetDevicePropertiesR0600;
#define hipGetDeviceProperties hipGetDevicePropertiesR0600
#endif /* __HIPEW_H__ */

View file

@ -38,7 +38,11 @@ thipDriverGetVersion *hipDriverGetVersion;
thipRuntimeGetVersion *hipRuntimeGetVersion; thipRuntimeGetVersion *hipRuntimeGetVersion;
thipGetDevice *hipGetDevice; thipGetDevice *hipGetDevice;
thipGetDeviceCount *hipGetDeviceCount; thipGetDeviceCount *hipGetDeviceCount;
thipGetDeviceProperties *hipGetDeviceProperties; #ifdef WITH_HIP_SDK_5
thipGetDeviceProperties *hipGetDeviceProperties;
#else
thipGetDevicePropertiesR0600 *hipGetDevicePropertiesR0600;
#endif
thipDeviceGet* hipDeviceGet; thipDeviceGet* hipDeviceGet;
thipDeviceGetName *hipDeviceGetName; thipDeviceGetName *hipDeviceGetName;
thipDeviceGetAttribute *hipDeviceGetAttribute; thipDeviceGetAttribute *hipDeviceGetAttribute;
@ -233,19 +237,25 @@ static int hipewHipInit(void) {
/* Library paths. */ /* Library paths. */
#ifdef _WIN32 #ifdef _WIN32
/* Expected in C:/Windows/System32 or similar, no path needed. */ /* Expected in C:/Windows/System32 or similar, no path needed. */
const char *hip_paths[] = {"amdhip64.dll", "amdhip64_6.dll", NULL}; const char *hip_paths[] = {WIN_DRIVER, NULL};
#elif defined(__APPLE__) #elif defined(__APPLE__)
/* Default installation path. */ /* Default installation path. */
const char *hip_paths[] = {"", NULL}; const char *hip_paths[] = {"", NULL};
#else #else
/* ROCm 6 changes paths from /opt/rocm/hip/lib to /opt/rocm/lib, so /* ROCm 6 changes paths from /opt/rocm/hip/lib to /opt/rocm/lib, so
* search for libraries there. It still includes .so.5. */ * search for libraries there. It still includes .so.5. */
#ifdef WITH_HIP_SDK_5
const char *hip_paths[] = {"libamdhip64.so.5", const char *hip_paths[] = {"libamdhip64.so.5",
"/opt/rocm/lib/libamdhip64.so.5", "/opt/rocm/lib/libamdhip64.so.5",
"/opt/rocm/hip/lib/libamdhip64.so.5", "/opt/rocm/hip/lib/libamdhip64.so.5",
"libamdhip64.so", NULL};
"/opt/rocm/lib/libamdhip64.so", #else
"/opt/rocm/hip/lib/libamdhip64.so", NULL}; const char *hip_paths[] = {"libamdhip64.so.6",
"/opt/rocm/lib/libamdhip64.so.6",
"/opt/rocm/hip/lib/libamdhip64.so.6",
NULL};
#endif
#endif #endif
static int initialized = 0; static int initialized = 0;
static int result = 0; static int result = 0;
@ -280,6 +290,11 @@ static int hipewHipInit(void) {
} }
/* Fetch all function pointers. */ /* Fetch all function pointers. */
#ifdef WITH_HIP_SDK_5
HIP_LIBRARY_FIND_CHECKED(hipGetDeviceProperties);
#else
HIP_LIBRARY_FIND_CHECKED(hipGetDevicePropertiesR0600);
#endif
HIP_LIBRARY_FIND_CHECKED(hipGetErrorName); HIP_LIBRARY_FIND_CHECKED(hipGetErrorName);
HIP_LIBRARY_FIND_CHECKED(hipGetErrorString); HIP_LIBRARY_FIND_CHECKED(hipGetErrorString);
HIP_LIBRARY_FIND_CHECKED(hipGetLastError); HIP_LIBRARY_FIND_CHECKED(hipGetLastError);
@ -288,7 +303,6 @@ static int hipewHipInit(void) {
HIP_LIBRARY_FIND_CHECKED(hipRuntimeGetVersion); HIP_LIBRARY_FIND_CHECKED(hipRuntimeGetVersion);
HIP_LIBRARY_FIND_CHECKED(hipGetDevice); HIP_LIBRARY_FIND_CHECKED(hipGetDevice);
HIP_LIBRARY_FIND_CHECKED(hipGetDeviceCount); HIP_LIBRARY_FIND_CHECKED(hipGetDeviceCount);
HIP_LIBRARY_FIND_CHECKED(hipGetDeviceProperties);
HIP_LIBRARY_FIND_CHECKED(hipDeviceGet); HIP_LIBRARY_FIND_CHECKED(hipDeviceGet);
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetName); HIP_LIBRARY_FIND_CHECKED(hipDeviceGetName);
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetAttribute); HIP_LIBRARY_FIND_CHECKED(hipDeviceGetAttribute);
@ -411,28 +425,6 @@ static int hipewHipInit(void) {
return result; return result;
} }
hipMemoryType get_hip_memory_type(hipMemoryType mem_type, int runtime_version) {
/** Convert hipMemoryType for backwards compatibility with rocm5/6.
* This can be removed when support for ROCm 5 is removed. */
/* If version is 5 we need to use the old enum vals (60000000 is start of ROCm 6) */
if (runtime_version > 60000000) {
return mem_type;
}
switch (mem_type) {
case hipMemoryTypeHost:
return hipMemoryTypeHost_v5;
case hipMemoryTypeDevice:
return hipMemoryTypeDevice_v5;
case hipMemoryTypeArray:
return hipMemoryTypeArray_v5;
case hipMemoryTypeUnified:
return hipMemoryTypeUnified_v5;
default:
return hipMemoryTypeUnregistered; /* This should not happen. */
}
}
int hipewInit(hipuint32_t flags) { int hipewInit(hipuint32_t flags) {
int result = HIPEW_SUCCESS; int result = HIPEW_SUCCESS;

View file

@ -1749,7 +1749,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
col.label(text=rpt_("and AMD Radeon Pro %s driver or newer") % driver_version, col.label(text=rpt_("and AMD Radeon Pro %s driver or newer") % driver_version,
icon='BLANK1', translate=False) icon='BLANK1', translate=False)
elif sys.platform.startswith("linux"): elif sys.platform.startswith("linux"):
driver_version = "22.10" driver_version = "23.40"
col.label( col.label(
text=rpt_("Requires AMD GPU with RDNA architecture"), text=rpt_("Requires AMD GPU with RDNA architecture"),
icon='BLANK1', icon='BLANK1',

View file

@ -313,16 +313,17 @@ string HIPDevice::compile_kernel(const uint kernel_features, const char *name, c
return string(); return string();
} }
const int hipcc_hip_version = hipewCompilerVersion(); # ifdef WITH_HIP_SDK_5
VLOG_INFO << "Found hipcc " << hipcc << ", HIP version " << hipcc_hip_version << "."; int hip_major_ver = hipRuntimeVersion / 10000000;
if (hipcc_hip_version < 40) { if (hip_major_ver > 5) {
printf( set_error(string_printf(
"Unsupported HIP version %d.%d detected, " "HIP Runtime version %d does not work with kernels compiled with HIP SDK 5\n",
"you need HIP 4.0 or newer.\n", hip_major_ver));
hipcc_hip_version / 10,
hipcc_hip_version % 10);
return string(); return string();
} }
# endif
const int hipcc_hip_version = hipewCompilerVersion();
VLOG_INFO << "Found hipcc " << hipcc << ", HIP version " << hipcc_hip_version << ".";
double starttime = time_dt(); double starttime = time_dt();
@ -755,9 +756,9 @@ void HIPDevice::tex_alloc(device_texture &mem)
HIP_MEMCPY3D param; HIP_MEMCPY3D param;
memset(&param, 0, sizeof(HIP_MEMCPY3D)); memset(&param, 0, sizeof(HIP_MEMCPY3D));
param.dstMemoryType = get_memory_type(hipMemoryTypeArray); param.dstMemoryType = hipMemoryTypeArray;
param.dstArray = array_3d; param.dstArray = array_3d;
param.srcMemoryType = get_memory_type(hipMemoryTypeHost); param.srcMemoryType = hipMemoryTypeHost;
param.srcHost = mem.host_pointer; param.srcHost = mem.host_pointer;
param.srcPitch = src_pitch; param.srcPitch = src_pitch;
param.WidthInBytes = param.srcPitch; param.WidthInBytes = param.srcPitch;
@ -787,10 +788,10 @@ void HIPDevice::tex_alloc(device_texture &mem)
hip_Memcpy2D param; hip_Memcpy2D param;
memset(&param, 0, sizeof(param)); memset(&param, 0, sizeof(param));
param.dstMemoryType = get_memory_type(hipMemoryTypeDevice); param.dstMemoryType = hipMemoryTypeDevice;
param.dstDevice = mem.device_pointer; param.dstDevice = mem.device_pointer;
param.dstPitch = dst_pitch; param.dstPitch = dst_pitch;
param.srcMemoryType = get_memory_type(hipMemoryTypeHost); param.srcMemoryType = hipMemoryTypeHost;
param.srcHost = mem.host_pointer; param.srcHost = mem.host_pointer;
param.srcPitch = src_pitch; param.srcPitch = src_pitch;
param.WidthInBytes = param.srcPitch; param.WidthInBytes = param.srcPitch;
@ -978,11 +979,6 @@ int HIPDevice::get_device_default_attribute(hipDeviceAttribute_t attribute, int
return value; return value;
} }
hipMemoryType HIPDevice::get_memory_type(hipMemoryType mem_type)
{
return get_hip_memory_type(mem_type, hipRuntimeVersion);
}
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif #endif

View file

@ -103,7 +103,6 @@ class HIPDevice : public GPUDevice {
protected: protected:
bool get_device_attribute(hipDeviceAttribute_t attribute, int *value); bool get_device_attribute(hipDeviceAttribute_t attribute, int *value);
int get_device_default_attribute(hipDeviceAttribute_t attribute, int default_value); int get_device_default_attribute(hipDeviceAttribute_t attribute, int default_value);
hipMemoryType get_memory_type(hipMemoryType mem_type);
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END

@ -1 +1 @@
Subproject commit a2917f7e221fc1f81c4f018788ff51c619417413 Subproject commit 4f7c423914f5c422bd4709e9e5a969fd87bb855e