From 4e5a9c5dfb582b03d6e11d751e5a5c7e1bcdda9d Mon Sep 17 00:00:00 2001 From: salipourto Date: Tue, 17 Dec 2024 16:19:36 +0100 Subject: [PATCH] Cycles: Handling SDK/ROCm 6+ lack of backward compatibility with pre ROCm 6 This commit introduces proper handling of ROCm 5 and ROCm 6 runtimes on Linux, based on the version of the ROCm compiler used at build time. Previously, HIPEW (the HIP equivalent of Cuda Wrangler) defaulted to loading the ROCm 5 runtime. If ROCm 5 was unavailable, it would attempt to load ROCm 6. However, ROCm 6 introduces changes in certain structures and functions that are not backward compatible, leading to potential issues when kernels compiled with the ROCm 6 compiler are executed on the ROCm 5 runtime. ### Summary of Changes: **Separation of Structures and Functions:** Structures and functions are now separated into hipew5 and hipew6 to accommodate the differences between ROCm versions. **Build-Time Version Detection:** The ROCm version is determined during build time, and the corresponding hipew5 or hipew6 is included accordingly. **Runtime Default to ROCm 6:** By default, HIPEW now loads the ROCm 6 runtime and includes hipew6 (Linux only). **JIT Compilation Behavior:** Since ROCm 6 is the default version, JIT compilation is supported only when the ROCm 6 compiler is detected at runtime. **HIP-RT Update:** HIP-RT has been updated to load the ROCm 6 runtime by default. These changes ensure compatibility and stability when switching between ROCm versions, avoiding issues caused by runtime and compiler mismatches. Co-authored-by: Alaska Co-authored-by: Sergey Sharybin Pull Request: https://projects.blender.org/blender/blender/pulls/130153 --- .../build_environment/cmake/hiprt.cmake | 13 +- extern/hipew/CMakeLists.txt | 8 + extern/hipew/include/hipew.h | 426 +++------------- extern/hipew/include/hipew5.h | 313 ++++++++++++ extern/hipew/include/hipew6.h | 462 ++++++++++++++++++ extern/hipew/src/hipew.c | 54 +- intern/cycles/blender/addon/properties.py | 2 +- intern/cycles/device/hip/device_impl.cpp | 30 +- intern/cycles/device/hip/device_impl.h | 1 - lib/linux_x64 | 2 +- 10 files changed, 901 insertions(+), 410 deletions(-) create mode 100644 extern/hipew/include/hipew5.h create mode 100644 extern/hipew/include/hipew6.h diff --git a/build_files/build_environment/cmake/hiprt.cmake b/build_files/build_environment/cmake/hiprt.cmake index b835b14558c..618e83bfe19 100644 --- a/build_files/build_environment/cmake/hiprt.cmake +++ b/build_files/build_environment/cmake/hiprt.cmake @@ -14,9 +14,20 @@ set(HIPRT_EXTRA_ARGS -DBITCODE=ON -DGENERATE_BAKE_KERNEL=OFF -DNO_UNITTEST=ON - -DHIPRT_PREFER_HIP_5=ON ) +if(WIN32) + # Windows is currently defaulting to HIP 5 for the buildbot and the + # dependency build environment. + list(APPEND HIPRT_EXTRA_ARGS -DHIPRT_PREFER_HIP_5=ON) +else() + # The Linux uses HIP 6 by default in those environments, but it had + # -DHIPRT_PREFER_HIP_5=ON passed to the dependency builder in its + # initial implementation. Force it to off so that incremental build + # in the existing build environment does the right thing. + list(APPEND HIPRT_EXTRA_ARGS -DHIPRT_PREFER_HIP_5=OFF) +endif() + set(HIPRT_SOURCE_DIR ${BUILD_DIR}/hiprt/src/external_hiprt) set(HIPRT_BUILD_DIR ${BUILD_DIR}/hiprt/src/external_hiprt-build) diff --git a/extern/hipew/CMakeLists.txt b/extern/hipew/CMakeLists.txt index ee216b78beb..a113813b2fc 100644 --- a/extern/hipew/CMakeLists.txt +++ b/extern/hipew/CMakeLists.txt @@ -14,6 +14,8 @@ set(INC_SYS set(SRC src/hipew.c include/hipew.h + include/hipew6.h + include/hipew5.h ) set(LIB @@ -32,4 +34,10 @@ if(HIPRT_INCLUDE_DIR) ) endif() + blender_add_lib(extern_hipew "${SRC}" "${INC}" "${INC_SYS}" "${LIB}") + +find_package(HIP 5.5.0) +if(HIP_VERSION_MAJOR EQUAL 5) + target_compile_definitions(extern_hipew PUBLIC WITH_HIP_SDK_5) +endif() diff --git a/extern/hipew/include/hipew.h b/extern/hipew/include/hipew.h index 52bc26f8bbd..e87daaec213 100644 --- a/extern/hipew/include/hipew.h +++ b/extern/hipew/include/hipew.h @@ -23,6 +23,7 @@ extern "C" { #include + #define HIP_IPC_HANDLE_SIZE 64 #define hipHostMallocDefault 0x00 #define hipHostMallocPortable 0x01 @@ -116,13 +117,6 @@ typedef struct HIPuuid_st { char bytes[16]; } HIPuuid; -typedef enum hipMemcpyKind { - hipMemcpyHostToHost = 0, - hipMemcpyHostToDevice = 1, - hipMemcpyDeviceToHost = 2, - hipMemcpyDeviceToDevice = 3, - hipMemcpyDefault = 4 -} hipMemcpyKind; typedef enum hipChannelFormatKind { hipChannelFormatKindSigned = 0, @@ -167,7 +161,7 @@ typedef enum hipTextureAddressMode { */ typedef struct textureReference { int normalized; - //enum hipTextureReadMode readMode;// used only for driver API's + //enum hipTextureReadMode readMode;// used only for driver API's //why is this commentend out? enum hipTextureFilterMode filterMode; enum hipTextureAddressMode addressMode[3]; // Texture address mode for up to 3 dimensions struct hipChannelFormatDesc channelDesc; @@ -185,39 +179,6 @@ typedef struct textureReference { typedef textureReference* hipTexRef; -/** - * ROCm 6 and ROCm 5 memory types are different. - * For now, we include both in the enum and then use the get_hip_memory_type - * Function to convert. When removing ROCm 5 compatibility this can be simplified. -*/ -typedef enum hipMemoryType { - hipMemoryTypeHost_v5 = 0x00, - hipMemoryTypeDevice_v5 = 0x01, - hipMemoryTypeArray_v5 = 0x02, - hipMemoryTypeUnified_v5 = 0x03, - hipMemoryTypeUnregistered = 0, - hipMemoryTypeHost = 1, - hipMemoryTypeDevice = 2, - hipMemoryTypeManaged = 3, - hipMemoryTypeArray = 10, - hipMemoryTypeUnified = 11, -} hipMemoryType; - -hipMemoryType get_hip_memory_type(hipMemoryType mem_type, int runtime_version); - -/** - * Pointer attributes - */ -typedef struct hipPointerAttribute_t { - enum hipMemoryType memoryType; - int device; - void* devicePointer; - void* hostPointer; - int isManaged; - unsigned allocationFlags; /* flags specified when memory was allocated*/ - /* peers? */ -} hipPointerAttribute_t; - typedef struct ihipIpcEventHandle_t { char reserved[HIP_IPC_HANDLE_SIZE]; } ihipIpcEventHandle_t; @@ -314,128 +275,6 @@ typedef enum HIPoccupancy_flags_enum { HIP_OCCUPANCY_DISABLE_CACHING_OVERRIDE = 0x1, } HIPoccupancy_flags; -typedef enum hipDeviceAttribute_t { - hipDeviceAttributeCudaCompatibleBegin = 0, - hipDeviceAttributeEccEnabled = hipDeviceAttributeCudaCompatibleBegin, ///< Whether ECC support is enabled. - hipDeviceAttributeAccessPolicyMaxWindowSize, ///< Cuda only. The maximum size of the window policy in bytes. - hipDeviceAttributeAsyncEngineCount, ///< Cuda only. Asynchronous engines number. - hipDeviceAttributeCanMapHostMemory, ///< Whether host memory can be mapped into device address space - hipDeviceAttributeCanUseHostPointerForRegisteredMem,///< Cuda only. Device can access host registered memory - ///< at the same virtual address as the CPU - hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz. - hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in. - hipDeviceAttributeComputePreemptionSupported, ///< Cuda only. Device supports Compute Preemption. - hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels concurrently. - hipDeviceAttributeConcurrentManagedAccess, ///< Device can coherently access managed memory concurrently with the CPU - hipDeviceAttributeCooperativeLaunch, ///< Support cooperative launch - hipDeviceAttributeCooperativeMultiDeviceLaunch, ///< Support cooperative launch on multiple devices - hipDeviceAttributeDeviceOverlap, ///< Cuda only. Device can concurrently copy memory and execute a kernel. - ///< Deprecated. Use instead asyncEngineCount. - hipDeviceAttributeDirectManagedMemAccessFromHost, ///< Host can directly access managed memory on - ///< the device without migration - hipDeviceAttributeGlobalL1CacheSupported, ///< Cuda only. Device supports caching globals in L1 - hipDeviceAttributeHostNativeAtomicSupported, ///< Cuda only. Link between the device and the host supports native atomic operations - hipDeviceAttributeIntegrated, ///< Device is integrated GPU - hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices. - hipDeviceAttributeKernelExecTimeout, ///< Run time limit for kernels executed on the device - hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache. - hipDeviceAttributeLocalL1CacheSupported, ///< caching locals in L1 is supported - hipDeviceAttributeLuid, ///< Cuda only. 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms - hipDeviceAttributeLuidDeviceNodeMask, ///< Cuda only. Luid device node mask. Undefined on TCC and non-Windows platforms - hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number. - hipDeviceAttributeManagedMemory, ///< Device supports allocating managed memory on this system - hipDeviceAttributeMaxBlocksPerMultiProcessor, ///< Cuda only. Max block size per multiprocessor - hipDeviceAttributeMaxBlockDimX, ///< Max block size in width. - hipDeviceAttributeMaxBlockDimY, ///< Max block size in height. - hipDeviceAttributeMaxBlockDimZ, ///< Max block size in depth. - hipDeviceAttributeMaxGridDimX, ///< Max grid size in width. - hipDeviceAttributeMaxGridDimY, ///< Max grid size in height. - hipDeviceAttributeMaxGridDimZ, ///< Max grid size in depth. - hipDeviceAttributeMaxSurface1D, ///< Maximum size of 1D surface. - hipDeviceAttributeMaxSurface1DLayered, ///< Cuda only. Maximum dimensions of 1D layered surface. - hipDeviceAttributeMaxSurface2D, ///< Maximum dimension (width, height) of 2D surface. - hipDeviceAttributeMaxSurface2DLayered, ///< Cuda only. Maximum dimensions of 2D layered surface. - hipDeviceAttributeMaxSurface3D, ///< Maximum dimension (width, height, depth) of 3D surface. - hipDeviceAttributeMaxSurfaceCubemap, ///< Cuda only. Maximum dimensions of Cubemap surface. - hipDeviceAttributeMaxSurfaceCubemapLayered, ///< Cuda only. Maximum dimension of Cubemap layered surface. - hipDeviceAttributeMaxTexture1DWidth, ///< Maximum size of 1D texture. - hipDeviceAttributeMaxTexture1DLayered, ///< Cuda only. Maximum dimensions of 1D layered texture. - hipDeviceAttributeMaxTexture1DLinear, ///< Maximum number of elements allocatable in a 1D linear texture. - ///< Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda. - hipDeviceAttributeMaxTexture1DMipmap, ///< Cuda only. Maximum size of 1D mipmapped texture. - hipDeviceAttributeMaxTexture2DWidth, ///< Maximum dimension width of 2D texture. - hipDeviceAttributeMaxTexture2DHeight, ///< Maximum dimension hight of 2D texture. - hipDeviceAttributeMaxTexture2DGather, ///< Cuda only. Maximum dimensions of 2D texture if gather operations performed. - hipDeviceAttributeMaxTexture2DLayered, ///< Cuda only. Maximum dimensions of 2D layered texture. - hipDeviceAttributeMaxTexture2DLinear, ///< Cuda only. Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory. - hipDeviceAttributeMaxTexture2DMipmap, ///< Cuda only. Maximum dimensions of 2D mipmapped texture. - hipDeviceAttributeMaxTexture3DWidth, ///< Maximum dimension width of 3D texture. - hipDeviceAttributeMaxTexture3DHeight, ///< Maximum dimension height of 3D texture. - hipDeviceAttributeMaxTexture3DDepth, ///< Maximum dimension depth of 3D texture. - hipDeviceAttributeMaxTexture3DAlt, ///< Cuda only. Maximum dimensions of alternate 3D texture. - hipDeviceAttributeMaxTextureCubemap, ///< Cuda only. Maximum dimensions of Cubemap texture - hipDeviceAttributeMaxTextureCubemapLayered, ///< Cuda only. Maximum dimensions of Cubemap layered texture. - hipDeviceAttributeMaxThreadsDim, ///< Maximum dimension of a block - hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block. - hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor. - hipDeviceAttributeMaxPitch, ///< Maximum pitch in bytes allowed by memory copies - hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits. - hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz. - hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number. - hipDeviceAttributeMultiGpuBoardGroupID, ///< Cuda only. Unique ID of device group on the same multi-GPU board - hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device. - hipDeviceAttributeName, ///< Device name. - hipDeviceAttributePageableMemoryAccess, ///< Device supports coherently accessing pageable memory - ///< without calling hipHostRegister on it - hipDeviceAttributePageableMemoryAccessUsesHostPageTables, ///< Device accesses pageable memory via the host's page tables - hipDeviceAttributePciBusId, ///< PCI Bus ID. - hipDeviceAttributePciDeviceId, ///< PCI Device ID. - hipDeviceAttributePciDomainID, ///< PCI Domain ID. - hipDeviceAttributePersistingL2CacheMaxSize, ///< Cuda11 only. Maximum l2 persisting lines capacity in bytes - hipDeviceAttributeMaxRegistersPerBlock, ///< 32-bit registers available to a thread block. This number is shared - ///< by all thread blocks simultaneously resident on a multiprocessor. - hipDeviceAttributeMaxRegistersPerMultiprocessor, ///< 32-bit registers available per block. - hipDeviceAttributeReservedSharedMemPerBlock, ///< Cuda11 only. Shared memory reserved by CUDA driver per block. - hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in bytes. - hipDeviceAttributeSharedMemPerBlockOptin, ///< Cuda only. Maximum shared memory per block usable by special opt in. - hipDeviceAttributeSharedMemPerMultiprocessor, ///< Cuda only. Shared memory available per multiprocessor. - hipDeviceAttributeSingleToDoublePrecisionPerfRatio, ///< Cuda only. Performance ratio of single precision to double precision. - hipDeviceAttributeStreamPrioritiesSupported, ///< Cuda only. Whether to support stream priorities. - hipDeviceAttributeSurfaceAlignment, ///< Cuda only. Alignment requirement for surfaces - hipDeviceAttributeTccDriver, ///< Cuda only. Whether device is a Tesla device using TCC driver - hipDeviceAttributeTextureAlignment, ///< Alignment requirement for textures - hipDeviceAttributeTexturePitchAlignment, ///< Pitch alignment requirement for 2D texture references bound to pitched memory; - hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes. - hipDeviceAttributeTotalGlobalMem, ///< Global memory available on devicice. - hipDeviceAttributeUnifiedAddressing, ///< Cuda only. An unified address space shared with the host. - hipDeviceAttributeUuid, ///< Cuda only. Unique ID in 16 byte. - hipDeviceAttributeWarpSize, ///< Warp size in threads. - hipDeviceAttributeCudaCompatibleEnd = 9999, - hipDeviceAttributeAmdSpecificBegin = 10000, - hipDeviceAttributeClockInstructionRate = hipDeviceAttributeAmdSpecificBegin, ///< Frequency in khz of the timer used by the device-side "clock*" - hipDeviceAttributeArch, ///< Device architecture - hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory PerMultiprocessor. - hipDeviceAttributeGcnArch, ///< Device gcn architecture - hipDeviceAttributeGcnArchName, ///< Device gcnArch name in 256 bytes - hipDeviceAttributeHdpMemFlushCntl, ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register - hipDeviceAttributeHdpRegFlushCntl, ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register - hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, ///< Supports cooperative launch on multiple - ///< devices with unmatched functions - hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, ///< Supports cooperative launch on multiple - ///< devices with unmatched grid dimensions - hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, ///< Supports cooperative launch on multiple - ///< devices with unmatched block dimensions - hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, ///< Supports cooperative launch on multiple - ///< devices with unmatched shared memories - hipDeviceAttributeIsLargeBar, ///< Whether it is LargeBar - hipDeviceAttributeAsicRevision, ///< Revision of the GPU in this device - hipDeviceAttributeCanUseStreamWaitValue, ///< '1' if Device supports hipStreamWaitValue32() and - ///< hipStreamWaitValue64() , '0' otherwise. - hipDeviceAttributeAmdSpecificEnd = 19999, - hipDeviceAttributeVendorSpecificBegin = 20000, - hipDeviceAttribute - // Extended attributes for vendors -} hipDeviceAttribute_t; typedef struct HIPdevprop_st { int maxThreadsPerBlock; @@ -481,84 +320,6 @@ typedef struct { unsigned hasDynamicParallelism : 1; ///< Dynamic parallelism. } hipDeviceArch_t; -typedef struct hipDeviceProp_t { - char name[256]; ///< Device name. - size_t totalGlobalMem; ///< Size of global memory region (in bytes). - size_t sharedMemPerBlock; ///< Size of shared memory region (in bytes). - int regsPerBlock; ///< Registers per block. - int warpSize; ///< Warp size. - int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size. - int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block. - int maxGridSize[3]; ///< Max grid dimensions (XYZ). - int clockRate; ///< Max clock frequency of the multiProcessors in khz. - int memoryClockRate; ///< Max global memory clock frequency in khz. - int memoryBusWidth; ///< Global memory bus width in bits. - size_t totalConstMem; ///< Size of shared memory region (in bytes). - int major; ///< Major compute capability. On HCC, this is an approximation and features may - ///< differ from CUDA CC. See the arch feature flags for portable ways to query - ///< feature caps. - int minor; ///< Minor compute capability. On HCC, this is an approximation and features may - ///< differ from CUDA CC. See the arch feature flags for portable ways to query - ///< feature caps. - int multiProcessorCount; ///< Number of multi-processors (compute units). - int l2CacheSize; ///< L2 cache size. - int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor. - int computeMode; ///< Compute mode. - int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*" - ///< instructions. New for HIP. - hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP. - int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently. - int pciDomainID; ///< PCI Domain ID - int pciBusID; ///< PCI Bus ID. - int pciDeviceID; ///< PCI Device ID. - size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per Multiprocessor. - int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not. - int canMapHostMemory; ///< Check whether HIP can map host memory - int gcnArch; ///< DEPRECATED: use gcnArchName instead - char gcnArchName[256]; ///< AMD GCN Arch Name. - int integrated; ///< APU vs dGPU - int cooperativeLaunch; ///< HIP device supports cooperative launch - int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple devices - int maxTexture1DLinear; ///< Maximum size for 1D textures bound to linear memory - int maxTexture1D; ///< Maximum number of elements in 1D images - int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements - int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image elements - unsigned int* hdpMemFlushCntl; ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register - unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register - size_t memPitch; ///linear conversion during texture read + float borderColor[4]; + int normalizedCoords; + unsigned int maxAnisotropy; + enum hipTextureFilterMode mipmapFilterMode; + float mipmapLevelBias; + float minMipmapLevelClamp; + float maxMipmapLevelClamp; +}hipTextureDesc; +#endif + +typedef enum hipExternalMemoryHandleType_enum { + hipExternalMemoryHandleTypeOpaqueFd = 1, + hipExternalMemoryHandleTypeOpaqueWin32 = 2, + hipExternalMemoryHandleTypeOpaqueWin32Kmt = 3, + hipExternalMemoryHandleTypeD3D12Heap = 4, + hipExternalMemoryHandleTypeD3D12Resource = 5, + hipExternalMemoryHandleTypeD3D11Resource = 6, + hipExternalMemoryHandleTypeD3D11ResourceKmt = 7, + hipExternalMemoryHandleTypeNvSciBuf = 8 +} hipExternalMemoryHandleType; + +typedef struct hipExternalMemoryHandleDesc_st { + hipExternalMemoryHandleType type; + union { + int fd; + struct { + void *handle; + const void *name; + } win32; + const void *nvSciBufObject; + } handle; + unsigned long long size; + unsigned int flags; + unsigned int reserved[16]; +} hipExternalMemoryHandleDesc; + +typedef struct hipExternalMemoryBufferDesc_st { + unsigned long long offset; + unsigned long long size; + unsigned int flags; + unsigned int reserved[16]; +} hipExternalMemoryBufferDesc; + + +typedef hipError_t HIPAPI thipGetDevicePropertiesR0600(hipDeviceProp_t* props, int deviceId); +extern thipGetDevicePropertiesR0600 *hipGetDevicePropertiesR0600; + +#define hipGetDeviceProperties hipGetDevicePropertiesR0600 + + +#endif /* __HIPEW_H__ */ diff --git a/extern/hipew/src/hipew.c b/extern/hipew/src/hipew.c index 5259f8b2f31..d6fdc034979 100644 --- a/extern/hipew/src/hipew.c +++ b/extern/hipew/src/hipew.c @@ -38,7 +38,11 @@ thipDriverGetVersion *hipDriverGetVersion; thipRuntimeGetVersion *hipRuntimeGetVersion; thipGetDevice *hipGetDevice; thipGetDeviceCount *hipGetDeviceCount; -thipGetDeviceProperties *hipGetDeviceProperties; +#ifdef WITH_HIP_SDK_5 + thipGetDeviceProperties *hipGetDeviceProperties; +#else + thipGetDevicePropertiesR0600 *hipGetDevicePropertiesR0600; +#endif thipDeviceGet* hipDeviceGet; thipDeviceGetName *hipDeviceGetName; thipDeviceGetAttribute *hipDeviceGetAttribute; @@ -233,19 +237,25 @@ static int hipewHipInit(void) { /* Library paths. */ #ifdef _WIN32 /* Expected in C:/Windows/System32 or similar, no path needed. */ - const char *hip_paths[] = {"amdhip64.dll", "amdhip64_6.dll", NULL}; + const char *hip_paths[] = {WIN_DRIVER, NULL}; #elif defined(__APPLE__) /* Default installation path. */ const char *hip_paths[] = {"", NULL}; #else /* ROCm 6 changes paths from /opt/rocm/hip/lib to /opt/rocm/lib, so * search for libraries there. It still includes .so.5. */ - const char *hip_paths[] = {"libamdhip64.so.5", - "/opt/rocm/lib/libamdhip64.so.5", - "/opt/rocm/hip/lib/libamdhip64.so.5", - "libamdhip64.so", - "/opt/rocm/lib/libamdhip64.so", - "/opt/rocm/hip/lib/libamdhip64.so", NULL}; + #ifdef WITH_HIP_SDK_5 + const char *hip_paths[] = {"libamdhip64.so.5", + "/opt/rocm/lib/libamdhip64.so.5", + "/opt/rocm/hip/lib/libamdhip64.so.5", + NULL}; + #else + const char *hip_paths[] = {"libamdhip64.so.6", + "/opt/rocm/lib/libamdhip64.so.6", + "/opt/rocm/hip/lib/libamdhip64.so.6", + NULL}; + + #endif #endif static int initialized = 0; static int result = 0; @@ -280,6 +290,11 @@ static int hipewHipInit(void) { } /* Fetch all function pointers. */ +#ifdef WITH_HIP_SDK_5 + HIP_LIBRARY_FIND_CHECKED(hipGetDeviceProperties); +#else + HIP_LIBRARY_FIND_CHECKED(hipGetDevicePropertiesR0600); +#endif HIP_LIBRARY_FIND_CHECKED(hipGetErrorName); HIP_LIBRARY_FIND_CHECKED(hipGetErrorString); HIP_LIBRARY_FIND_CHECKED(hipGetLastError); @@ -288,7 +303,6 @@ static int hipewHipInit(void) { HIP_LIBRARY_FIND_CHECKED(hipRuntimeGetVersion); HIP_LIBRARY_FIND_CHECKED(hipGetDevice); HIP_LIBRARY_FIND_CHECKED(hipGetDeviceCount); - HIP_LIBRARY_FIND_CHECKED(hipGetDeviceProperties); HIP_LIBRARY_FIND_CHECKED(hipDeviceGet); HIP_LIBRARY_FIND_CHECKED(hipDeviceGetName); HIP_LIBRARY_FIND_CHECKED(hipDeviceGetAttribute); @@ -411,28 +425,6 @@ static int hipewHipInit(void) { return result; } -hipMemoryType get_hip_memory_type(hipMemoryType mem_type, int runtime_version) { - /** Convert hipMemoryType for backwards compatibility with rocm5/6. - * This can be removed when support for ROCm 5 is removed. */ - - /* If version is 5 we need to use the old enum vals (60000000 is start of ROCm 6) */ - if (runtime_version > 60000000) { - return mem_type; - } - - switch (mem_type) { - case hipMemoryTypeHost: - return hipMemoryTypeHost_v5; - case hipMemoryTypeDevice: - return hipMemoryTypeDevice_v5; - case hipMemoryTypeArray: - return hipMemoryTypeArray_v5; - case hipMemoryTypeUnified: - return hipMemoryTypeUnified_v5; - default: - return hipMemoryTypeUnregistered; /* This should not happen. */ - } -} int hipewInit(hipuint32_t flags) { int result = HIPEW_SUCCESS; diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 631907ae2dc..27e93ec7864 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -1749,7 +1749,7 @@ class CyclesPreferences(bpy.types.AddonPreferences): col.label(text=rpt_("and AMD Radeon Pro %s driver or newer") % driver_version, icon='BLANK1', translate=False) elif sys.platform.startswith("linux"): - driver_version = "22.10" + driver_version = "23.40" col.label( text=rpt_("Requires AMD GPU with RDNA architecture"), icon='BLANK1', diff --git a/intern/cycles/device/hip/device_impl.cpp b/intern/cycles/device/hip/device_impl.cpp index 1ab0beea822..01a45fb29fd 100644 --- a/intern/cycles/device/hip/device_impl.cpp +++ b/intern/cycles/device/hip/device_impl.cpp @@ -313,16 +313,17 @@ string HIPDevice::compile_kernel(const uint kernel_features, const char *name, c return string(); } - const int hipcc_hip_version = hipewCompilerVersion(); - VLOG_INFO << "Found hipcc " << hipcc << ", HIP version " << hipcc_hip_version << "."; - if (hipcc_hip_version < 40) { - printf( - "Unsupported HIP version %d.%d detected, " - "you need HIP 4.0 or newer.\n", - hipcc_hip_version / 10, - hipcc_hip_version % 10); +# ifdef WITH_HIP_SDK_5 + int hip_major_ver = hipRuntimeVersion / 10000000; + if (hip_major_ver > 5) { + set_error(string_printf( + "HIP Runtime version %d does not work with kernels compiled with HIP SDK 5\n", + hip_major_ver)); return string(); } +# endif + const int hipcc_hip_version = hipewCompilerVersion(); + VLOG_INFO << "Found hipcc " << hipcc << ", HIP version " << hipcc_hip_version << "."; double starttime = time_dt(); @@ -755,9 +756,9 @@ void HIPDevice::tex_alloc(device_texture &mem) HIP_MEMCPY3D param; memset(¶m, 0, sizeof(HIP_MEMCPY3D)); - param.dstMemoryType = get_memory_type(hipMemoryTypeArray); + param.dstMemoryType = hipMemoryTypeArray; param.dstArray = array_3d; - param.srcMemoryType = get_memory_type(hipMemoryTypeHost); + param.srcMemoryType = hipMemoryTypeHost; param.srcHost = mem.host_pointer; param.srcPitch = src_pitch; param.WidthInBytes = param.srcPitch; @@ -787,10 +788,10 @@ void HIPDevice::tex_alloc(device_texture &mem) hip_Memcpy2D param; memset(¶m, 0, sizeof(param)); - param.dstMemoryType = get_memory_type(hipMemoryTypeDevice); + param.dstMemoryType = hipMemoryTypeDevice; param.dstDevice = mem.device_pointer; param.dstPitch = dst_pitch; - param.srcMemoryType = get_memory_type(hipMemoryTypeHost); + param.srcMemoryType = hipMemoryTypeHost; param.srcHost = mem.host_pointer; param.srcPitch = src_pitch; param.WidthInBytes = param.srcPitch; @@ -978,11 +979,6 @@ int HIPDevice::get_device_default_attribute(hipDeviceAttribute_t attribute, int return value; } -hipMemoryType HIPDevice::get_memory_type(hipMemoryType mem_type) -{ - return get_hip_memory_type(mem_type, hipRuntimeVersion); -} - CCL_NAMESPACE_END #endif diff --git a/intern/cycles/device/hip/device_impl.h b/intern/cycles/device/hip/device_impl.h index 5078cc9bb94..b230ff12d3c 100644 --- a/intern/cycles/device/hip/device_impl.h +++ b/intern/cycles/device/hip/device_impl.h @@ -103,7 +103,6 @@ class HIPDevice : public GPUDevice { protected: bool get_device_attribute(hipDeviceAttribute_t attribute, int *value); int get_device_default_attribute(hipDeviceAttribute_t attribute, int default_value); - hipMemoryType get_memory_type(hipMemoryType mem_type); }; CCL_NAMESPACE_END diff --git a/lib/linux_x64 b/lib/linux_x64 index a2917f7e221..4f7c423914f 160000 --- a/lib/linux_x64 +++ b/lib/linux_x64 @@ -1 +1 @@ -Subproject commit a2917f7e221fc1f81c4f018788ff51c619417413 +Subproject commit 4f7c423914f5c422bd4709e9e5a969fd87bb855e