Cycles: Handling SDK/ROCm 6+ lack of backward compatibility with pre ROCm 6

This commit introduces proper handling of ROCm 5 and ROCm 6 runtimes on Linux, based on the version of the ROCm compiler used at build time. Previously, HIPEW (the HIP equivalent of Cuda Wrangler) defaulted to loading the ROCm 5 runtime. If ROCm 5 was unavailable, it would attempt to load ROCm 6. However, ROCm 6 introduces changes in certain structures and functions that are not backward compatible, leading to potential issues when kernels compiled with the ROCm 6 compiler are executed on the ROCm 5 runtime. ### Summary of Changes: **Separation of Structures and Functions:** Structures and functions are now separated into hipew5 and hipew6 to accommodate the differences between ROCm versions. **Build-Time Version Detection:** The ROCm version is determined during build time, and the corresponding hipew5 or hipew6 is included accordingly. **Runtime Default to ROCm 6:** By default, HIPEW now loads the ROCm 6 runtime and includes hipew6 (Linux only). **JIT Compilation Behavior:** Since ROCm 6 is the default version, JIT compilation is supported only when the ROCm 6 compiler is detected at runtime. **HIP-RT Update:** HIP-RT has been updated to load the ROCm 6 runtime by default. These changes ensure compatibility and stability when switching between ROCm versions, avoiding issues caused by runtime and compiler mismatches. Co-authored-by: Alaska <alaskayou01@gmail.com> Co-authored-by: Sergey Sharybin <sergey@blender.org> Pull Request: https://projects.blender.org/blender/blender/pulls/130153
2025-01-22 15:32:15 -05:00 · 2024-12-17 16:19:36 +01:00 · 2024-12-17 16:19:36 +01:00 · 4e5a9c5dfb
commit 4e5a9c5dfb
parent cd13f52e75
10 changed files with 901 additions and 410 deletions
--- a/build_files/build_environment/cmake/hiprt.cmake
+++ b/build_files/build_environment/cmake/hiprt.cmake
@ -14,9 +14,20 @@ set(HIPRT_EXTRA_ARGS
  -DBITCODE=ON
  -DGENERATE_BAKE_KERNEL=OFF
  -DNO_UNITTEST=ON
  -DHIPRT_PREFER_HIP_5=ON
 )
 if(WIN32)
  # Windows is currently defaulting to HIP 5 for the buildbot and the
  # dependency build environment.
  list(APPEND HIPRT_EXTRA_ARGS -DHIPRT_PREFER_HIP_5=ON)
 else()
  # The Linux uses HIP 6 by default in those environments, but it had
  # -DHIPRT_PREFER_HIP_5=ON passed to the dependency builder in its
  # initial implementation. Force it to off so that incremental build
  # in the existing build environment does the right thing.
  list(APPEND HIPRT_EXTRA_ARGS -DHIPRT_PREFER_HIP_5=OFF)
 endif()
 set(HIPRT_SOURCE_DIR ${BUILD_DIR}/hiprt/src/external_hiprt)
 set(HIPRT_BUILD_DIR ${BUILD_DIR}/hiprt/src/external_hiprt-build)
--- a/extern/hipew/CMakeLists.txt
+++ b/extern/hipew/CMakeLists.txt
@ -14,6 +14,8 @@ set(INC_SYS
 set(SRC
  src/hipew.c
  include/hipew.h
  include/hipew6.h
  include/hipew5.h
 )
 set(LIB
@ -32,4 +34,10 @@ if(HIPRT_INCLUDE_DIR)
  )
 endif()
 blender_add_lib(extern_hipew "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
 find_package(HIP 5.5.0)
 if(HIP_VERSION_MAJOR EQUAL 5)
  target_compile_definitions(extern_hipew PUBLIC WITH_HIP_SDK_5)
 endif()
--- a/extern/hipew/include/hipew.h
+++ b/extern/hipew/include/hipew.h
@ -23,6 +23,7 @@ extern "C" {
 #include <stdlib.h>
 #define HIP_IPC_HANDLE_SIZE 64
 #define hipHostMallocDefault 0x00
 #define hipHostMallocPortable 0x01
@ -116,13 +117,6 @@ typedef struct HIPuuid_st {
  char bytes[16];
 } HIPuuid;
 typedef enum hipMemcpyKind {
    hipMemcpyHostToHost = 0,
    hipMemcpyHostToDevice = 1,
    hipMemcpyDeviceToHost = 2,
    hipMemcpyDeviceToDevice = 3,
    hipMemcpyDefault = 4
 } hipMemcpyKind;
 typedef enum hipChannelFormatKind {
    hipChannelFormatKindSigned = 0,
@ -167,7 +161,7 @@ typedef enum hipTextureAddressMode {
 */
 typedef struct textureReference {
    int normalized;
-    //enum hipTextureReadMode readMode;// used only for driver API's
+    //enum hipTextureReadMode readMode;// used only for driver API's //why is this commentend out?
    enum hipTextureFilterMode filterMode;
    enum hipTextureAddressMode addressMode[3];  // Texture address mode for up to 3 dimensions
    struct hipChannelFormatDesc channelDesc;
@ -185,39 +179,6 @@ typedef struct textureReference {
 typedef textureReference* hipTexRef;
 /**
 * ROCm 6 and ROCm 5 memory types are different.
 * For now, we include both in the enum and then use the get_hip_memory_type
 * Function to convert. When removing ROCm 5 compatibility this can be simplified.
 */
 typedef enum hipMemoryType {
  hipMemoryTypeHost_v5 = 0x00,
  hipMemoryTypeDevice_v5 = 0x01,
  hipMemoryTypeArray_v5 = 0x02,
  hipMemoryTypeUnified_v5 = 0x03,  
  hipMemoryTypeUnregistered = 0,
  hipMemoryTypeHost = 1,
  hipMemoryTypeDevice = 2,
  hipMemoryTypeManaged = 3,
  hipMemoryTypeArray = 10,
  hipMemoryTypeUnified = 11,
 } hipMemoryType;
 hipMemoryType get_hip_memory_type(hipMemoryType mem_type, int runtime_version);
 /**
 * Pointer attributes
 */
 typedef struct hipPointerAttribute_t {
    enum hipMemoryType memoryType;
    int device;
    void* devicePointer;
    void* hostPointer;
    int isManaged;
    unsigned allocationFlags; /* flags specified when memory was allocated*/
    /* peers? */
 } hipPointerAttribute_t;
 typedef struct ihipIpcEventHandle_t {
  char reserved[HIP_IPC_HANDLE_SIZE];
 } ihipIpcEventHandle_t;
@ -314,128 +275,6 @@ typedef enum HIPoccupancy_flags_enum {
  HIP_OCCUPANCY_DISABLE_CACHING_OVERRIDE = 0x1,
 } HIPoccupancy_flags;
 typedef enum hipDeviceAttribute_t {
  hipDeviceAttributeCudaCompatibleBegin = 0,
  hipDeviceAttributeEccEnabled = hipDeviceAttributeCudaCompatibleBegin, ///< Whether ECC support is enabled.
  hipDeviceAttributeAccessPolicyMaxWindowSize,        ///< Cuda only. The maximum size of the window policy in bytes.
  hipDeviceAttributeAsyncEngineCount,                 ///< Cuda only. Asynchronous engines number.
  hipDeviceAttributeCanMapHostMemory,                 ///< Whether host memory can be mapped into device address space
  hipDeviceAttributeCanUseHostPointerForRegisteredMem,///< Cuda only. Device can access host registered memory
                                                      ///< at the same virtual address as the CPU
  hipDeviceAttributeClockRate,                        ///< Peak clock frequency in kilohertz.
  hipDeviceAttributeComputeMode,                      ///< Compute mode that device is currently in.
  hipDeviceAttributeComputePreemptionSupported,       ///< Cuda only. Device supports Compute Preemption.
  hipDeviceAttributeConcurrentKernels,                ///< Device can possibly execute multiple kernels concurrently.
  hipDeviceAttributeConcurrentManagedAccess,          ///< Device can coherently access managed memory concurrently with the CPU
  hipDeviceAttributeCooperativeLaunch,                ///< Support cooperative launch
  hipDeviceAttributeCooperativeMultiDeviceLaunch,     ///< Support cooperative launch on multiple devices
  hipDeviceAttributeDeviceOverlap,               ///< Cuda only. Device can concurrently copy memory and execute a kernel.  
                                                      ///< Deprecated. Use instead asyncEngineCount.
  hipDeviceAttributeDirectManagedMemAccessFromHost,   ///< Host can directly access managed memory on
                                                      ///< the device without migration
  hipDeviceAttributeGlobalL1CacheSupported,           ///< Cuda only. Device supports caching globals in L1
  hipDeviceAttributeHostNativeAtomicSupported,        ///< Cuda only. Link between the device and the host supports native atomic operations
  hipDeviceAttributeIntegrated,                       ///< Device is integrated GPU
  hipDeviceAttributeIsMultiGpuBoard,                  ///< Multiple GPU devices.
  hipDeviceAttributeKernelExecTimeout,                ///< Run time limit for kernels executed on the device
  hipDeviceAttributeL2CacheSize,                      ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
  hipDeviceAttributeLocalL1CacheSupported,            ///< caching locals in L1 is supported
  hipDeviceAttributeLuid,                             ///< Cuda only. 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms
  hipDeviceAttributeLuidDeviceNodeMask,               ///< Cuda only. Luid device node mask. Undefined on TCC and non-Windows platforms
  hipDeviceAttributeComputeCapabilityMajor,           ///< Major compute capability version number.
  hipDeviceAttributeManagedMemory,                    ///< Device supports allocating managed memory on this system
  hipDeviceAttributeMaxBlocksPerMultiProcessor,       ///< Cuda only. Max block size per multiprocessor
  hipDeviceAttributeMaxBlockDimX,                     ///< Max block size in width.
  hipDeviceAttributeMaxBlockDimY,                     ///< Max block size in height.
  hipDeviceAttributeMaxBlockDimZ,                     ///< Max block size in depth.
  hipDeviceAttributeMaxGridDimX,                      ///< Max grid size  in width.
  hipDeviceAttributeMaxGridDimY,                      ///< Max grid size  in height.
  hipDeviceAttributeMaxGridDimZ,                      ///< Max grid size  in depth.
  hipDeviceAttributeMaxSurface1D,                     ///< Maximum size of 1D surface.
  hipDeviceAttributeMaxSurface1DLayered,              ///< Cuda only. Maximum dimensions of 1D layered surface.
  hipDeviceAttributeMaxSurface2D,                     ///< Maximum dimension (width, height) of 2D surface.
  hipDeviceAttributeMaxSurface2DLayered,              ///< Cuda only. Maximum dimensions of 2D layered surface.
  hipDeviceAttributeMaxSurface3D,                     ///< Maximum dimension (width, height, depth) of 3D surface.
  hipDeviceAttributeMaxSurfaceCubemap,                ///< Cuda only. Maximum dimensions of Cubemap surface.
  hipDeviceAttributeMaxSurfaceCubemapLayered,         ///< Cuda only. Maximum dimension of Cubemap layered surface.
  hipDeviceAttributeMaxTexture1DWidth,                ///< Maximum size of 1D texture.
  hipDeviceAttributeMaxTexture1DLayered,              ///< Cuda only. Maximum dimensions of 1D layered texture.
  hipDeviceAttributeMaxTexture1DLinear,               ///< Maximum number of elements allocatable in a 1D linear texture.
                                                      ///< Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda.
  hipDeviceAttributeMaxTexture1DMipmap,               ///< Cuda only. Maximum size of 1D mipmapped texture.
  hipDeviceAttributeMaxTexture2DWidth,                ///< Maximum dimension width of 2D texture.
  hipDeviceAttributeMaxTexture2DHeight,               ///< Maximum dimension hight of 2D texture.
  hipDeviceAttributeMaxTexture2DGather,               ///< Cuda only. Maximum dimensions of 2D texture if gather operations  performed.
  hipDeviceAttributeMaxTexture2DLayered,              ///< Cuda only. Maximum dimensions of 2D layered texture.
  hipDeviceAttributeMaxTexture2DLinear,               ///< Cuda only. Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory.
  hipDeviceAttributeMaxTexture2DMipmap,               ///< Cuda only. Maximum dimensions of 2D mipmapped texture.
  hipDeviceAttributeMaxTexture3DWidth,                ///< Maximum dimension width of 3D texture.
  hipDeviceAttributeMaxTexture3DHeight,               ///< Maximum dimension height of 3D texture.
  hipDeviceAttributeMaxTexture3DDepth,                ///< Maximum dimension depth of 3D texture.
  hipDeviceAttributeMaxTexture3DAlt,                  ///< Cuda only. Maximum dimensions of alternate 3D texture.
  hipDeviceAttributeMaxTextureCubemap,                ///< Cuda only. Maximum dimensions of Cubemap texture
  hipDeviceAttributeMaxTextureCubemapLayered,         ///< Cuda only. Maximum dimensions of Cubemap layered texture.
  hipDeviceAttributeMaxThreadsDim,                    ///< Maximum dimension of a block
  hipDeviceAttributeMaxThreadsPerBlock,               ///< Maximum number of threads per block.
  hipDeviceAttributeMaxThreadsPerMultiProcessor,      ///< Maximum resident threads per multiprocessor.
  hipDeviceAttributeMaxPitch,                         ///< Maximum pitch in bytes allowed by memory copies
  hipDeviceAttributeMemoryBusWidth,                   ///< Global memory bus width in bits.
  hipDeviceAttributeMemoryClockRate,                  ///< Peak memory clock frequency in kilohertz.
  hipDeviceAttributeComputeCapabilityMinor,           ///< Minor compute capability version number.
  hipDeviceAttributeMultiGpuBoardGroupID,             ///< Cuda only. Unique ID of device group on the same multi-GPU board
  hipDeviceAttributeMultiprocessorCount,              ///< Number of multiprocessors on the device.
  hipDeviceAttributeName,                             ///< Device name.
  hipDeviceAttributePageableMemoryAccess,             ///< Device supports coherently accessing pageable memory
                                                      ///< without calling hipHostRegister on it
  hipDeviceAttributePageableMemoryAccessUsesHostPageTables, ///< Device accesses pageable memory via the host's page tables
  hipDeviceAttributePciBusId,                         ///< PCI Bus ID.
  hipDeviceAttributePciDeviceId,                      ///< PCI Device ID.
  hipDeviceAttributePciDomainID,                      ///< PCI Domain ID.
  hipDeviceAttributePersistingL2CacheMaxSize,         ///< Cuda11 only. Maximum l2 persisting lines capacity in bytes
  hipDeviceAttributeMaxRegistersPerBlock,             ///< 32-bit registers available to a thread block. This number is shared
                                                      ///< by all thread blocks simultaneously resident on a multiprocessor.
  hipDeviceAttributeMaxRegistersPerMultiprocessor,    ///< 32-bit registers available per block.
  hipDeviceAttributeReservedSharedMemPerBlock,        ///< Cuda11 only. Shared memory reserved by CUDA driver per block.
  hipDeviceAttributeMaxSharedMemoryPerBlock,          ///< Maximum shared memory available per block in bytes.
  hipDeviceAttributeSharedMemPerBlockOptin,           ///< Cuda only. Maximum shared memory per block usable by special opt in.
  hipDeviceAttributeSharedMemPerMultiprocessor,       ///< Cuda only. Shared memory available per multiprocessor.
  hipDeviceAttributeSingleToDoublePrecisionPerfRatio, ///< Cuda only. Performance ratio of single precision to double precision.
  hipDeviceAttributeStreamPrioritiesSupported,        ///< Cuda only. Whether to support stream priorities.
  hipDeviceAttributeSurfaceAlignment,                 ///< Cuda only. Alignment requirement for surfaces
  hipDeviceAttributeTccDriver,                        ///< Cuda only. Whether device is a Tesla device using TCC driver
  hipDeviceAttributeTextureAlignment,                 ///< Alignment requirement for textures
  hipDeviceAttributeTexturePitchAlignment,            ///< Pitch alignment requirement for 2D texture references bound to pitched memory;
  hipDeviceAttributeTotalConstantMemory,              ///< Constant memory size in bytes.
  hipDeviceAttributeTotalGlobalMem,                   ///< Global memory available on devicice.
  hipDeviceAttributeUnifiedAddressing,                ///< Cuda only. An unified address space shared with the host.
  hipDeviceAttributeUuid,                             ///< Cuda only. Unique ID in 16 byte.
  hipDeviceAttributeWarpSize,                         ///< Warp size in threads.
  hipDeviceAttributeCudaCompatibleEnd = 9999,
  hipDeviceAttributeAmdSpecificBegin = 10000,
  hipDeviceAttributeClockInstructionRate = hipDeviceAttributeAmdSpecificBegin,  ///< Frequency in khz of the timer used by the device-side "clock*"
  hipDeviceAttributeArch,                                     ///< Device architecture
  hipDeviceAttributeMaxSharedMemoryPerMultiprocessor,         ///< Maximum Shared Memory PerMultiprocessor.
  hipDeviceAttributeGcnArch,                                  ///< Device gcn architecture
  hipDeviceAttributeGcnArchName,                              ///< Device gcnArch name in 256 bytes
  hipDeviceAttributeHdpMemFlushCntl,                          ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register
  hipDeviceAttributeHdpRegFlushCntl,                          ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register
  hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc,      ///< Supports cooperative launch on multiple
                                                              ///< devices with unmatched functions
  hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim,   ///< Supports cooperative launch on multiple
                                                              ///< devices with unmatched grid dimensions
  hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim,  ///< Supports cooperative launch on multiple
                                                              ///< devices with unmatched block dimensions
  hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, ///< Supports cooperative launch on multiple
                                                              ///< devices with unmatched shared memories
  hipDeviceAttributeIsLargeBar,                               ///< Whether it is LargeBar
  hipDeviceAttributeAsicRevision,                             ///< Revision of the GPU in this device
  hipDeviceAttributeCanUseStreamWaitValue,                    ///< '1' if Device supports hipStreamWaitValue32() and
                                                              ///< hipStreamWaitValue64() , '0' otherwise.
  hipDeviceAttributeAmdSpecificEnd = 19999,
  hipDeviceAttributeVendorSpecificBegin = 20000,
  hipDeviceAttribute
  // Extended attributes for vendors
 } hipDeviceAttribute_t;
 typedef struct HIPdevprop_st {
  int maxThreadsPerBlock;
@ -481,84 +320,6 @@ typedef struct {
    unsigned hasDynamicParallelism : 1;  ///< Dynamic parallelism.
 } hipDeviceArch_t;
 typedef struct hipDeviceProp_t {
    char name[256];            ///< Device name.
    size_t totalGlobalMem;     ///< Size of global memory region (in bytes).
    size_t sharedMemPerBlock;  ///< Size of shared memory region (in bytes).
    int regsPerBlock;          ///< Registers per block.
    int warpSize;              ///< Warp size.
    int maxThreadsPerBlock;    ///< Max work items per work group or workgroup max size.
    int maxThreadsDim[3];      ///< Max number of threads in each dimension (XYZ) of a block.
    int maxGridSize[3];        ///< Max grid dimensions (XYZ).
    int clockRate;             ///< Max clock frequency of the multiProcessors in khz.
    int memoryClockRate;       ///< Max global memory clock frequency in khz.
    int memoryBusWidth;        ///< Global memory bus width in bits.
    size_t totalConstMem;      ///< Size of shared memory region (in bytes).
    int major;  ///< Major compute capability.  On HCC, this is an approximation and features may
                ///< differ from CUDA CC.  See the arch feature flags for portable ways to query
                ///< feature caps.
    int minor;  ///< Minor compute capability.  On HCC, this is an approximation and features may
                ///< differ from CUDA CC.  See the arch feature flags for portable ways to query
                ///< feature caps.
    int multiProcessorCount;          ///< Number of multi-processors (compute units).
    int l2CacheSize;                  ///< L2 cache size.
    int maxThreadsPerMultiProcessor;  ///< Maximum resident threads per multi-processor.
    int computeMode;                  ///< Compute mode.
    int clockInstructionRate;  ///< Frequency in khz of the timer used by the device-side "clock*"
                               ///< instructions.  New for HIP.
    hipDeviceArch_t arch;      ///< Architectural feature flags.  New for HIP.
    int concurrentKernels;     ///< Device can possibly execute multiple kernels concurrently.
    int pciDomainID;           ///< PCI Domain ID
    int pciBusID;              ///< PCI Bus ID.
    int pciDeviceID;           ///< PCI Device ID.
    size_t maxSharedMemoryPerMultiProcessor;  ///< Maximum Shared Memory Per Multiprocessor.
    int isMultiGpuBoard;                      ///< 1 if device is on a multi-GPU board, 0 if not.
    int canMapHostMemory;                     ///< Check whether HIP can map host memory
    int gcnArch;                              ///< DEPRECATED: use gcnArchName instead
    char gcnArchName[256];                    ///< AMD GCN Arch Name.
    int integrated;            ///< APU vs dGPU
    int cooperativeLaunch;            ///< HIP device supports cooperative launch
    int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple devices
    int maxTexture1DLinear;    ///< Maximum size for 1D textures bound to linear memory
    int maxTexture1D;          ///< Maximum number of elements in 1D images
    int maxTexture2D[2];       ///< Maximum dimensions (width, height) of 2D images, in image elements
    int maxTexture3D[3];       ///< Maximum dimensions (width, height, depth) of 3D images, in image elements
    unsigned int* hdpMemFlushCntl;      ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
    unsigned int* hdpRegFlushCntl;      ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
    size_t memPitch;                 ///<Maximum pitch in bytes allowed by memory copies
    size_t textureAlignment;         ///<Alignment requirement for textures
    size_t texturePitchAlignment;    ///<Pitch alignment requirement for texture references bound to pitched memory
    int kernelExecTimeoutEnabled;    ///<Run time limit for kernels executed on the device
    int ECCEnabled;                  ///<Device has ECC support enabled
    int tccDriver;                   ///< 1:If device is Tesla device using TCC driver, else 0
    int cooperativeMultiDeviceUnmatchedFunc;        ///< HIP device supports cooperative launch on multiple
                                                    ///devices with unmatched functions
    int cooperativeMultiDeviceUnmatchedGridDim;     ///< HIP device supports cooperative launch on multiple
                                                    ///devices with unmatched grid dimensions
    int cooperativeMultiDeviceUnmatchedBlockDim;    ///< HIP device supports cooperative launch on multiple
                                                    ///devices with unmatched block dimensions
    int cooperativeMultiDeviceUnmatchedSharedMem;   ///< HIP device supports cooperative launch on multiple
                                                    ///devices with unmatched shared memories
    int isLargeBar;                  ///< 1: if it is a large PCI bar device, else 0
    int asicRevision;                ///< Revision of the GPU in this device
    int managedMemory;               ///< Device supports allocating managed memory on this system
    int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device without migration
    int concurrentManagedAccess;     ///< Device can coherently access managed memory concurrently with the CPU
    int pageableMemoryAccess;        ///< Device supports coherently accessing pageable memory
                                     ///< without calling hipHostRegister on it
    int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's page tables
 } hipDeviceProp_t;
 typedef enum HIPpointer_attribute_enum {
  HIP_POINTER_ATTRIBUTE_CONTEXT = 1,
  HIP_POINTER_ATTRIBUTE_MEMORY_TYPE = 2,
  HIP_POINTER_ATTRIBUTE_DEVICE_POINTER = 3,
  HIP_POINTER_ATTRIBUTE_HOST_POINTER = 4,
  HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS = 6,
  HIP_POINTER_ATTRIBUTE_BUFFER_ID = 7,
  HIP_POINTER_ATTRIBUTE_IS_MANAGED = 8,
  HIP_POINTER_ATTRIBUTE_DEVICE_ORDINAL = 9,
 } HIPpointer_attribute;
 typedef enum hipFunction_attribute {
  HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
@ -593,14 +354,6 @@ typedef enum HIPshared_carveout_enum {
  HIP_SHAREDMEM_CARVEOUT_MAX_L1 = 0,
 } HIPshared_carveout;
 typedef enum hipComputeMode {
  hipComputeModeDefault = 0,
  hipComputeModeProhibited = 2,
  hipComputeModeExclusiveProcess = 3,
 } hipComputeMode;
 typedef enum HIPmem_advise_enum {
  HIP_MEM_ADVISE_SET_READ_MOSTLY = 1,
  HIP_MEM_ADVISE_UNSET_READ_MOSTLY = 2,
@ -802,25 +555,6 @@ typedef enum HIPdevice_P2PAttribute_enum {
 typedef struct hipGraphicsResource_st* hipGraphicsResource;
 typedef struct hip_Memcpy2D {
  size_t srcXInBytes;
  size_t srcY;
  hipMemoryType srcMemoryType;
  const void* srcHost;
  hipDeviceptr_t srcDevice;
  hArray * srcArray;
  size_t srcPitch;
  size_t dstXInBytes;
  size_t dstY;
  hipMemoryType dstMemoryType;
  void* dstHost;
  hipDeviceptr_t dstDevice;
  hArray * dstArray;
  size_t dstPitch;
  size_t WidthInBytes;
  size_t Height;
 } hip_Memcpy2D;
 typedef enum hipDeviceP2PAttr {
  hipDevP2PAttrPerformanceRank = 0,
  hipDevP2PAttrAccessSupported,
@ -828,60 +562,6 @@ typedef enum hipDeviceP2PAttr {
  hipDevP2PAttrHipArrayAccessSupported
 } hipDeviceP2PAttr;
 typedef struct HIP_MEMCPY3D {
  unsigned int srcXInBytes;
  unsigned int srcY;
  unsigned int srcZ;
  unsigned int srcLOD;
  hipMemoryType srcMemoryType;
  const void* srcHost;
  hipDeviceptr_t srcDevice;
  hArray srcArray;
  unsigned int srcPitch;
  unsigned int srcHeight;
  unsigned int dstXInBytes;
  unsigned int dstY;
  unsigned int dstZ;
  unsigned int dstLOD;
  hipMemoryType dstMemoryType;
  void* dstHost;
  hipDeviceptr_t dstDevice;
  hArray dstArray;
  unsigned int dstPitch;
  unsigned int dstHeight;
  unsigned int WidthInBytes;
  unsigned int Height;
  unsigned int Depth;
 } HIP_MEMCPY3D;
 typedef struct HIP_MEMCPY3D_PEER_st {
  size_t srcXInBytes;
  size_t srcY;
  size_t srcZ;
  size_t srcLOD;
  hipMemoryType srcMemoryType;
  const void* srcHost;
  hipDeviceptr_t srcDevice;
  hArray * srcArray;
  hipCtx_t srcContext;
  size_t srcPitch;
  size_t srcHeight;
  size_t dstXInBytes;
  size_t dstY;
  size_t dstZ;
  size_t dstLOD;
  hipMemoryType dstMemoryType;
  void* dstHost;
  hipDeviceptr_t dstDevice;
  hArray * dstArray;
  hipCtx_t dstContext;
  size_t dstPitch;
  size_t dstHeight;
  size_t WidthInBytes;
  size_t Height;
  size_t Depth;
 } HIP_MEMCPY3D_PEER;
 typedef struct HIP_ARRAY_DESCRIPTOR {
  size_t Width;
  size_t Height;
@ -1051,12 +731,6 @@ typedef struct HIP_RESOURCE_VIEW_DESC_st {
  unsigned int reserved[16];
 } HIP_RESOURCE_VIEW_DESC;
 typedef struct HIP_POINTER_ATTRIBUTE_P2P_TOKENS_st {
  unsigned long long p2pToken;
  unsigned int vaSpaceToken;
 } HIP_POINTER_ATTRIBUTE_P2P_TOKENS;
 typedef unsigned int GLenum;
 typedef unsigned int GLuint;
 typedef int GLint;
@ -1073,34 +747,6 @@ typedef enum HIPGLmap_flags_enum {
  HIP_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,
 } HIPGLmap_flags;
 typedef enum hipExternalMemoryHandleType_enum {
  hipExternalMemoryHandleTypeOpaqueFd = 1,
  hipExternalMemoryHandleTypeOpaqueWin32 = 2,
  hipExternalMemoryHandleTypeOpaqueWin32Kmt = 3,
  hipExternalMemoryHandleTypeD3D12Heap = 4,
  hipExternalMemoryHandleTypeD3D12Resource = 5,
  hipExternalMemoryHandleTypeD3D11Resource = 6,
  hipExternalMemoryHandleTypeD3D11ResourceKmt = 7,
 } hipExternalMemoryHandleType;
 typedef struct hipExternalMemoryHandleDesc_st {
  hipExternalMemoryHandleType type;
  union {
    int fd;
    struct {
      void *handle;
      const void *name;
    } win32;
  } handle;
  unsigned long long size;
  unsigned int flags;
 } hipExternalMemoryHandleDesc;
 typedef struct hipExternalMemoryBufferDesc_st {
  unsigned long long offset;
  unsigned long long size;
  unsigned int flags;
 } hipExternalMemoryBufferDesc;
 /**
 * hipRTC related
@ -1166,6 +812,72 @@ typedef enum hiprtcJITInputType {
 	HIPRTC_JIT_NUM_INPUT_TYPES = ( HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES + 3 )
 } hiprtcJITInputType;
 #ifdef WITH_HIP_SDK_5
 #include "hipew5.h"
 #else
 #include "hipew6.h"
 #endif
 /**
 * Pointer attributes
 */
 typedef struct hipPointerAttribute_t {
    enum hipMemoryType memoryType;
    int device;
    void* devicePointer;
    void* hostPointer;
    int isManaged;
    unsigned allocationFlags; /* flags specified when memory was allocated*/
    /* peers? */
 } hipPointerAttribute_t;
 typedef struct hip_Memcpy2D {
  size_t srcXInBytes;
  size_t srcY;
  hipMemoryType srcMemoryType;
  const void* srcHost;
  hipDeviceptr_t srcDevice;
  hArray * srcArray;
  size_t srcPitch;
  size_t dstXInBytes;
  size_t dstY;
  hipMemoryType dstMemoryType;
  void* dstHost;
  hipDeviceptr_t dstDevice;
  hArray * dstArray;
  size_t dstPitch;
  size_t WidthInBytes;
  size_t Height;
 } hip_Memcpy2D;
 typedef struct HIP_MEMCPY3D_PEER_st {
  size_t srcXInBytes;
  size_t srcY;
  size_t srcZ;
  size_t srcLOD;
  hipMemoryType srcMemoryType;
  const void* srcHost;
  hipDeviceptr_t srcDevice;
  hArray * srcArray;
  hipCtx_t srcContext;
  size_t srcPitch;
  size_t srcHeight;
  size_t dstXInBytes;
  size_t dstY;
  size_t dstZ;
  size_t dstLOD;
  hipMemoryType dstMemoryType;
  void* dstHost;
  hipDeviceptr_t dstDevice;
  hArray * dstArray;
  hipCtx_t dstContext;
  size_t dstPitch;
  size_t dstHeight;
  size_t WidthInBytes;
  size_t Height;
  size_t Depth;
 } HIP_MEMCPY3D_PEER;
 typedef struct ihiprtcLinkState* hiprtcLinkState;
 /* Function types. */
@ -1177,7 +889,6 @@ typedef hipError_t HIPAPI thipDriverGetVersion(int* driverVersion);
 typedef hipError_t HIPAPI thipRuntimeGetVersion(int* runtimeVersion);
 typedef hipError_t HIPAPI thipGetDevice(int* device);
 typedef hipError_t HIPAPI thipGetDeviceCount(int* count);
 typedef hipError_t HIPAPI thipGetDeviceProperties(hipDeviceProp_t* props, int deviceId);
 typedef hipError_t HIPAPI thipDeviceGet(hipDevice_t* device, int ordinal);
 typedef hipError_t HIPAPI thipDeviceGetName(char* name, int len, hipDevice_t dev);
 typedef hipError_t HIPAPI thipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attrib, hipDevice_t dev);
@ -1328,7 +1039,6 @@ extern thipDriverGetVersion *hipDriverGetVersion;
 extern thipRuntimeGetVersion *hipRuntimeGetVersion;
 extern thipGetDevice *hipGetDevice;
 extern thipGetDeviceCount *hipGetDeviceCount;
 extern thipGetDeviceProperties *hipGetDeviceProperties;
 extern thipDeviceGet *hipDeviceGet;
 extern thipDeviceGetName *hipDeviceGetName;
 extern thipDeviceGetAttribute *hipDeviceGetAttribute;
--- a/extern/hipew/include/hipew5.h
+++ b/extern/hipew/include/hipew5.h
@ -0,0 +1,313 @@
 /*
 * Copyright 2011-2024 Blender Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License
 */
 #ifndef __HIPEW5_H__
 #define __HIPEW5_H__
 #define WIN_DRIVER "amdhip64.dll"
 typedef enum hipMemcpyKind {
    hipMemcpyHostToHost = 0,
    hipMemcpyHostToDevice = 1,
    hipMemcpyDeviceToHost = 2,
    hipMemcpyDeviceToDevice = 3,
    hipMemcpyDefault = 4
 } hipMemcpyKind;
 typedef enum hipMemoryType {
  hipMemoryTypeHost    = 0,
  hipMemoryTypeDevice  = 1,
  hipMemoryTypeArray   = 2,
  hipMemoryTypeUnified = 3,
  hipMemoryTypeManaged = 4
 } hipMemoryType;
 typedef struct HIP_POINTER_ATTRIBUTE_P2P_TOKENS_st {
  unsigned long long p2pToken;
  unsigned int vaSpaceToken;
 } HIP_POINTER_ATTRIBUTE_P2P_TOKENS;
 typedef enum hipDeviceAttribute_t {
  hipDeviceAttributeCudaCompatibleBegin = 0,
  hipDeviceAttributeEccEnabled = hipDeviceAttributeCudaCompatibleBegin, ///< Whether ECC support is enabled.
  hipDeviceAttributeAccessPolicyMaxWindowSize,        ///< Cuda only. The maximum size of the window policy in bytes.
  hipDeviceAttributeAsyncEngineCount,                 ///< Cuda only. Asynchronous engines number.
  hipDeviceAttributeCanMapHostMemory,                 ///< Whether host memory can be mapped into device address space
  hipDeviceAttributeCanUseHostPointerForRegisteredMem,///< Cuda only. Device can access host registered memory
                                                      ///< at the same virtual address as the CPU
  hipDeviceAttributeClockRate,                        ///< Peak clock frequency in kilohertz.
  hipDeviceAttributeComputeMode,                      ///< Compute mode that device is currently in.
  hipDeviceAttributeComputePreemptionSupported,       ///< Cuda only. Device supports Compute Preemption.
  hipDeviceAttributeConcurrentKernels,                ///< Device can possibly execute multiple kernels concurrently.
  hipDeviceAttributeConcurrentManagedAccess,          ///< Device can coherently access managed memory concurrently with the CPU
  hipDeviceAttributeCooperativeLaunch,                ///< Support cooperative launch
  hipDeviceAttributeCooperativeMultiDeviceLaunch,     ///< Support cooperative launch on multiple devices
  hipDeviceAttributeDeviceOverlap,               ///< Cuda only. Device can concurrently copy memory and execute a kernel.  
                                                      ///< Deprecated. Use instead asyncEngineCount.
  hipDeviceAttributeDirectManagedMemAccessFromHost,   ///< Host can directly access managed memory on
                                                      ///< the device without migration
  hipDeviceAttributeGlobalL1CacheSupported,           ///< Cuda only. Device supports caching globals in L1
  hipDeviceAttributeHostNativeAtomicSupported,        ///< Cuda only. Link between the device and the host supports native atomic operations
  hipDeviceAttributeIntegrated,                       ///< Device is integrated GPU
  hipDeviceAttributeIsMultiGpuBoard,                  ///< Multiple GPU devices.
  hipDeviceAttributeKernelExecTimeout,                ///< Run time limit for kernels executed on the device
  hipDeviceAttributeL2CacheSize,                      ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
  hipDeviceAttributeLocalL1CacheSupported,            ///< caching locals in L1 is supported
  hipDeviceAttributeLuid,                             ///< Cuda only. 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms
  hipDeviceAttributeLuidDeviceNodeMask,               ///< Cuda only. Luid device node mask. Undefined on TCC and non-Windows platforms
  hipDeviceAttributeComputeCapabilityMajor,           ///< Major compute capability version number.
  hipDeviceAttributeManagedMemory,                    ///< Device supports allocating managed memory on this system
  hipDeviceAttributeMaxBlocksPerMultiProcessor,       ///< Cuda only. Max block size per multiprocessor
  hipDeviceAttributeMaxBlockDimX,                     ///< Max block size in width.
  hipDeviceAttributeMaxBlockDimY,                     ///< Max block size in height.
  hipDeviceAttributeMaxBlockDimZ,                     ///< Max block size in depth.
  hipDeviceAttributeMaxGridDimX,                      ///< Max grid size  in width.
  hipDeviceAttributeMaxGridDimY,                      ///< Max grid size  in height.
  hipDeviceAttributeMaxGridDimZ,                      ///< Max grid size  in depth.
  hipDeviceAttributeMaxSurface1D,                     ///< Maximum size of 1D surface.
  hipDeviceAttributeMaxSurface1DLayered,              ///< Cuda only. Maximum dimensions of 1D layered surface.
  hipDeviceAttributeMaxSurface2D,                     ///< Maximum dimension (width, height) of 2D surface.
  hipDeviceAttributeMaxSurface2DLayered,              ///< Cuda only. Maximum dimensions of 2D layered surface.
  hipDeviceAttributeMaxSurface3D,                     ///< Maximum dimension (width, height, depth) of 3D surface.
  hipDeviceAttributeMaxSurfaceCubemap,                ///< Cuda only. Maximum dimensions of Cubemap surface.
  hipDeviceAttributeMaxSurfaceCubemapLayered,         ///< Cuda only. Maximum dimension of Cubemap layered surface.
  hipDeviceAttributeMaxTexture1DWidth,                ///< Maximum size of 1D texture.
  hipDeviceAttributeMaxTexture1DLayered,              ///< Cuda only. Maximum dimensions of 1D layered texture.
  hipDeviceAttributeMaxTexture1DLinear,               ///< Maximum number of elements allocatable in a 1D linear texture.
                                                      ///< Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda.
  hipDeviceAttributeMaxTexture1DMipmap,               ///< Cuda only. Maximum size of 1D mipmapped texture.
  hipDeviceAttributeMaxTexture2DWidth,                ///< Maximum dimension width of 2D texture.
  hipDeviceAttributeMaxTexture2DHeight,               ///< Maximum dimension hight of 2D texture.
  hipDeviceAttributeMaxTexture2DGather,               ///< Cuda only. Maximum dimensions of 2D texture if gather operations  performed.
  hipDeviceAttributeMaxTexture2DLayered,              ///< Cuda only. Maximum dimensions of 2D layered texture.
  hipDeviceAttributeMaxTexture2DLinear,               ///< Cuda only. Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory.
  hipDeviceAttributeMaxTexture2DMipmap,               ///< Cuda only. Maximum dimensions of 2D mipmapped texture.
  hipDeviceAttributeMaxTexture3DWidth,                ///< Maximum dimension width of 3D texture.
  hipDeviceAttributeMaxTexture3DHeight,               ///< Maximum dimension height of 3D texture.
  hipDeviceAttributeMaxTexture3DDepth,                ///< Maximum dimension depth of 3D texture.
  hipDeviceAttributeMaxTexture3DAlt,                  ///< Cuda only. Maximum dimensions of alternate 3D texture.
  hipDeviceAttributeMaxTextureCubemap,                ///< Cuda only. Maximum dimensions of Cubemap texture
  hipDeviceAttributeMaxTextureCubemapLayered,         ///< Cuda only. Maximum dimensions of Cubemap layered texture.
  hipDeviceAttributeMaxThreadsDim,                    ///< Maximum dimension of a block
  hipDeviceAttributeMaxThreadsPerBlock,               ///< Maximum number of threads per block.
  hipDeviceAttributeMaxThreadsPerMultiProcessor,      ///< Maximum resident threads per multiprocessor.
  hipDeviceAttributeMaxPitch,                         ///< Maximum pitch in bytes allowed by memory copies
  hipDeviceAttributeMemoryBusWidth,                   ///< Global memory bus width in bits.
  hipDeviceAttributeMemoryClockRate,                  ///< Peak memory clock frequency in kilohertz.
  hipDeviceAttributeComputeCapabilityMinor,           ///< Minor compute capability version number.
  hipDeviceAttributeMultiGpuBoardGroupID,             ///< Cuda only. Unique ID of device group on the same multi-GPU board
  hipDeviceAttributeMultiprocessorCount,              ///< Number of multiprocessors on the device.
  hipDeviceAttributeName,                             ///< Device name.
  hipDeviceAttributePageableMemoryAccess,             ///< Device supports coherently accessing pageable memory
                                                      ///< without calling hipHostRegister on it
  hipDeviceAttributePageableMemoryAccessUsesHostPageTables, ///< Device accesses pageable memory via the host's page tables
  hipDeviceAttributePciBusId,                         ///< PCI Bus ID.
  hipDeviceAttributePciDeviceId,                      ///< PCI Device ID.
  hipDeviceAttributePciDomainID,                      ///< PCI Domain ID.
  hipDeviceAttributePersistingL2CacheMaxSize,         ///< Cuda11 only. Maximum l2 persisting lines capacity in bytes
  hipDeviceAttributeMaxRegistersPerBlock,             ///< 32-bit registers available to a thread block. This number is shared
                                                      ///< by all thread blocks simultaneously resident on a multiprocessor.
  hipDeviceAttributeMaxRegistersPerMultiprocessor,    ///< 32-bit registers available per block.
  hipDeviceAttributeReservedSharedMemPerBlock,        ///< Cuda11 only. Shared memory reserved by CUDA driver per block.
  hipDeviceAttributeMaxSharedMemoryPerBlock,          ///< Maximum shared memory available per block in bytes.
  hipDeviceAttributeSharedMemPerBlockOptin,           ///< Cuda only. Maximum shared memory per block usable by special opt in.
  hipDeviceAttributeSharedMemPerMultiprocessor,       ///< Cuda only. Shared memory available per multiprocessor.
  hipDeviceAttributeSingleToDoublePrecisionPerfRatio, ///< Cuda only. Performance ratio of single precision to double precision.
  hipDeviceAttributeStreamPrioritiesSupported,        ///< Cuda only. Whether to support stream priorities.
  hipDeviceAttributeSurfaceAlignment,                 ///< Cuda only. Alignment requirement for surfaces
  hipDeviceAttributeTccDriver,                        ///< Cuda only. Whether device is a Tesla device using TCC driver
  hipDeviceAttributeTextureAlignment,                 ///< Alignment requirement for textures
  hipDeviceAttributeTexturePitchAlignment,            ///< Pitch alignment requirement for 2D texture references bound to pitched memory;
  hipDeviceAttributeTotalConstantMemory,              ///< Constant memory size in bytes.
  hipDeviceAttributeTotalGlobalMem,                   ///< Global memory available on devicice.
  hipDeviceAttributeUnifiedAddressing,                ///< Cuda only. An unified address space shared with the host.
  hipDeviceAttributeUuid,                             ///< Cuda only. Unique ID in 16 byte.
  hipDeviceAttributeWarpSize,                         ///< Warp size in threads.
  hipDeviceAttributeCudaCompatibleEnd = 9999,
  hipDeviceAttributeAmdSpecificBegin = 10000,
  hipDeviceAttributeClockInstructionRate = hipDeviceAttributeAmdSpecificBegin,  ///< Frequency in khz of the timer used by the device-side "clock*"
  hipDeviceAttributeArch,                                     ///< Device architecture
  hipDeviceAttributeMaxSharedMemoryPerMultiprocessor,         ///< Maximum Shared Memory PerMultiprocessor.
  hipDeviceAttributeGcnArch,                                  ///< Device gcn architecture
  hipDeviceAttributeGcnArchName,                              ///< Device gcnArch name in 256 bytes
  hipDeviceAttributeHdpMemFlushCntl,                          ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register
  hipDeviceAttributeHdpRegFlushCntl,                          ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register
  hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc,      ///< Supports cooperative launch on multiple
                                                              ///< devices with unmatched functions
  hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim,   ///< Supports cooperative launch on multiple
                                                              ///< devices with unmatched grid dimensions
  hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim,  ///< Supports cooperative launch on multiple
                                                              ///< devices with unmatched block dimensions
  hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, ///< Supports cooperative launch on multiple
                                                              ///< devices with unmatched shared memories
  hipDeviceAttributeIsLargeBar,                               ///< Whether it is LargeBar
  hipDeviceAttributeAsicRevision,                             ///< Revision of the GPU in this device
  hipDeviceAttributeCanUseStreamWaitValue,                    ///< '1' if Device supports hipStreamWaitValue32() and
                                                              ///< hipStreamWaitValue64() , '0' otherwise.
  hipDeviceAttributeAmdSpecificEnd = 19999,
  hipDeviceAttributeVendorSpecificBegin = 20000,
  hipDeviceAttribute
  // Extended attributes for vendors
 } hipDeviceAttribute_t;
 typedef struct hipDeviceProp_t {
    char name[256];            ///< Device name.
    size_t totalGlobalMem;     ///< Size of global memory region (in bytes).
    size_t sharedMemPerBlock;  ///< Size of shared memory region (in bytes).
    int regsPerBlock;          ///< Registers per block.
    int warpSize;              ///< Warp size.
    int maxThreadsPerBlock;    ///< Max work items per work group or workgroup max size.
    int maxThreadsDim[3];      ///< Max number of threads in each dimension (XYZ) of a block.
    int maxGridSize[3];        ///< Max grid dimensions (XYZ).
    int clockRate;             ///< Max clock frequency of the multiProcessors in khz.
    int memoryClockRate;       ///< Max global memory clock frequency in khz.
    int memoryBusWidth;        ///< Global memory bus width in bits.
    size_t totalConstMem;      ///< Size of shared memory region (in bytes).
    int major;  ///< Major compute capability.  On HCC, this is an approximation and features may
                ///< differ from CUDA CC.  See the arch feature flags for portable ways to query
                ///< feature caps.
    int minor;  ///< Minor compute capability.  On HCC, this is an approximation and features may
                ///< differ from CUDA CC.  See the arch feature flags for portable ways to query
                ///< feature caps.
    int multiProcessorCount;          ///< Number of multi-processors (compute units).
    int l2CacheSize;                  ///< L2 cache size.
    int maxThreadsPerMultiProcessor;  ///< Maximum resident threads per multi-processor.
    int computeMode;                  ///< Compute mode.
    int clockInstructionRate;  ///< Frequency in khz of the timer used by the device-side "clock*"
                               ///< instructions.  New for HIP.
    hipDeviceArch_t arch;      ///< Architectural feature flags.  New for HIP.
    int concurrentKernels;     ///< Device can possibly execute multiple kernels concurrently.
    int pciDomainID;           ///< PCI Domain ID
    int pciBusID;              ///< PCI Bus ID.
    int pciDeviceID;           ///< PCI Device ID.
    size_t maxSharedMemoryPerMultiProcessor;  ///< Maximum Shared Memory Per Multiprocessor.
    int isMultiGpuBoard;                      ///< 1 if device is on a multi-GPU board, 0 if not.
    int canMapHostMemory;                     ///< Check whether HIP can map host memory
    int gcnArch;                              ///< DEPRECATED: use gcnArchName instead
    char gcnArchName[256];                    ///< AMD GCN Arch Name.
    int integrated;            ///< APU vs dGPU
    int cooperativeLaunch;            ///< HIP device supports cooperative launch
    int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple devices
    int maxTexture1DLinear;    ///< Maximum size for 1D textures bound to linear memory
    int maxTexture1D;          ///< Maximum number of elements in 1D images
    int maxTexture2D[2];       ///< Maximum dimensions (width, height) of 2D images, in image elements
    int maxTexture3D[3];       ///< Maximum dimensions (width, height, depth) of 3D images, in image elements
    unsigned int* hdpMemFlushCntl;      ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
    unsigned int* hdpRegFlushCntl;      ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
    size_t memPitch;                 ///<Maximum pitch in bytes allowed by memory copies
    size_t textureAlignment;         ///<Alignment requirement for textures
    size_t texturePitchAlignment;    ///<Pitch alignment requirement for texture references bound to pitched memory
    int kernelExecTimeoutEnabled;    ///<Run time limit for kernels executed on the device
    int ECCEnabled;                  ///<Device has ECC support enabled
    int tccDriver;                   ///< 1:If device is Tesla device using TCC driver, else 0
    int cooperativeMultiDeviceUnmatchedFunc;        ///< HIP device supports cooperative launch on multiple
                                                    ///devices with unmatched functions
    int cooperativeMultiDeviceUnmatchedGridDim;     ///< HIP device supports cooperative launch on multiple
                                                    ///devices with unmatched grid dimensions
    int cooperativeMultiDeviceUnmatchedBlockDim;    ///< HIP device supports cooperative launch on multiple
                                                    ///devices with unmatched block dimensions
    int cooperativeMultiDeviceUnmatchedSharedMem;   ///< HIP device supports cooperative launch on multiple
                                                    ///devices with unmatched shared memories
    int isLargeBar;                  ///< 1: if it is a large PCI bar device, else 0
    int asicRevision;                ///< Revision of the GPU in this device
    int managedMemory;               ///< Device supports allocating managed memory on this system
    int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device without migration
    int concurrentManagedAccess;     ///< Device can coherently access managed memory concurrently with the CPU
    int pageableMemoryAccess;        ///< Device supports coherently accessing pageable memory
                                     ///< without calling hipHostRegister on it
    int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's page tables
 } hipDeviceProp_t;
 typedef enum HIPpointer_attribute_enum {
  HIP_POINTER_ATTRIBUTE_CONTEXT = 1,
  HIP_POINTER_ATTRIBUTE_MEMORY_TYPE = 2,
  HIP_POINTER_ATTRIBUTE_DEVICE_POINTER = 3,
  HIP_POINTER_ATTRIBUTE_HOST_POINTER = 4,
  HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS = 6,
  HIP_POINTER_ATTRIBUTE_BUFFER_ID = 7,
  HIP_POINTER_ATTRIBUTE_IS_MANAGED = 8,
  HIP_POINTER_ATTRIBUTE_DEVICE_ORDINAL = 9,
 } HIPpointer_attribute;
 typedef enum hipComputeMode {
  hipComputeModeDefault = 0,
  hipComputeModeProhibited = 2,
  hipComputeModeExclusiveProcess = 3,
 } hipComputeMode;
 typedef struct HIP_MEMCPY3D {
  unsigned int srcXInBytes;
  unsigned int srcY;
  unsigned int srcZ;
  unsigned int srcLOD;
  hipMemoryType srcMemoryType;
  const void* srcHost;
  hipDeviceptr_t srcDevice;
  hArray srcArray;
  unsigned int srcPitch;
  unsigned int srcHeight;
  unsigned int dstXInBytes;
  unsigned int dstY;
  unsigned int dstZ;
  unsigned int dstLOD;
  hipMemoryType dstMemoryType;
  void* dstHost;
  hipDeviceptr_t dstDevice;
  hArray dstArray;
  unsigned int dstPitch;
  unsigned int dstHeight;
  unsigned int WidthInBytes;
  unsigned int Height;
  unsigned int Depth;
 } HIP_MEMCPY3D;
 typedef enum hipExternalMemoryHandleType_enum {
  hipExternalMemoryHandleTypeOpaqueFd = 1,
  hipExternalMemoryHandleTypeOpaqueWin32 = 2,
  hipExternalMemoryHandleTypeOpaqueWin32Kmt = 3,
  hipExternalMemoryHandleTypeD3D12Heap = 4,
  hipExternalMemoryHandleTypeD3D12Resource = 5,
  hipExternalMemoryHandleTypeD3D11Resource = 6,
  hipExternalMemoryHandleTypeD3D11ResourceKmt = 7
 } hipExternalMemoryHandleType;
 typedef struct hipExternalMemoryHandleDesc_st {
  hipExternalMemoryHandleType type;
  union {
    int fd;
    struct {
      void *handle;
      const void *name;
    } win32;
  } handle;
  unsigned long long size;
  unsigned int flags;
 } hipExternalMemoryHandleDesc;
 typedef struct hipExternalMemoryBufferDesc_st {
  unsigned long long offset;
  unsigned long long size;
  unsigned int flags;
 } hipExternalMemoryBufferDesc;
 typedef hipError_t HIPAPI thipGetDeviceProperties(hipDeviceProp_t* props, int deviceId);
 extern thipGetDeviceProperties *hipGetDeviceProperties;
 #endif  /* __HIPEW_H__ */
--- a/extern/hipew/include/hipew6.h
+++ b/extern/hipew/include/hipew6.h
@ -0,0 +1,462 @@
 /*
 * Copyright 2011-2024 Blender Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License
 */
 #ifndef __HIPEW6_H__
 #define __HIPEW6_H__
 #define WIN_DRIVER "amdhip64_6.dll"
 #define hipIpcMemLazyEnablePeerAccess 0x01
 #define hipMemAttachGlobal  0x01
 #define hipMemAttachHost    0x02
 #define hipDeviceScheduleAuto 0x0
 #define hipDeviceScheduleSpin  0x1
 #define hipDeviceScheduleYield  0x2
 #define hipDeviceScheduleBlockingSync 0x4
 #define hipDeviceScheduleMask 0x7
 #define hipDeviceMapHost 0x8
 #define hipDeviceLmemResizeToMax 0x10
 #define hipStreamDefault  0x00
 #define hipStreamNonBlocking 0x01
 #define hipEventDefault 0x0
 #define hipEventBlockingSync 0x1
 #define hipEventDisableTiming  0x2
 #define hipEventInterprocess 0x4
 #define hipOccupancyDefault 0x00
 typedef enum hipMemcpyKind {
    hipMemcpyHostToHost = 0,
    hipMemcpyHostToDevice = 1,
    hipMemcpyDeviceToHost = 2,
    hipMemcpyDeviceToDevice = 3,
    hipMemcpyDefault = 4,
    hipMemcpyDeviceToDeviceNoCU = 1024
 } hipMemcpyKind;
 typedef enum hipMemoryType {  
    hipMemoryTypeUnregistered = 0,
    hipMemoryTypeHost         = 1,
    hipMemoryTypeDevice       = 2,
    hipMemoryTypeManaged      = 3,
    hipMemoryTypeArray        = 10,
    hipMemoryTypeUnified      = 11
 } hipMemoryType;
 typedef enum hipDeviceAttribute_t {
  hipDeviceAttributeCudaCompatibleBegin = 0,
  hipDeviceAttributeEccEnabled = hipDeviceAttributeCudaCompatibleBegin, ///< Whether ECC support is enabled.
  hipDeviceAttributeAccessPolicyMaxWindowSize,        ///< Cuda only. The maximum size of the window policy in bytes.
  hipDeviceAttributeAsyncEngineCount,                 ///< Cuda only. Asynchronous engines number.
  hipDeviceAttributeCanMapHostMemory,                 ///< Whether host memory can be mapped into device address space
  hipDeviceAttributeCanUseHostPointerForRegisteredMem,///< Cuda only. Device can access host registered memory
                                                      ///< at the same virtual address as the CPU
  hipDeviceAttributeClockRate,                        ///< Peak clock frequency in kilohertz.
  hipDeviceAttributeComputeMode,                      ///< Compute mode that device is currently in.
  hipDeviceAttributeComputePreemptionSupported,       ///< Cuda only. Device supports Compute Preemption.
  hipDeviceAttributeConcurrentKernels,                ///< Device can possibly execute multiple kernels concurrently.
  hipDeviceAttributeConcurrentManagedAccess,          ///< Device can coherently access managed memory concurrently with the CPU
  hipDeviceAttributeCooperativeLaunch,                ///< Support cooperative launch
  hipDeviceAttributeCooperativeMultiDeviceLaunch,     ///< Support cooperative launch on multiple devices
  hipDeviceAttributeDeviceOverlap,               ///< Cuda only. Device can concurrently copy memory and execute a kernel.  
                                                      ///< Deprecated. Use instead asyncEngineCount.
  hipDeviceAttributeDirectManagedMemAccessFromHost,   ///< Host can directly access managed memory on
                                                      ///< the device without migration
  hipDeviceAttributeGlobalL1CacheSupported,           ///< Cuda only. Device supports caching globals in L1
  hipDeviceAttributeHostNativeAtomicSupported,        ///< Cuda only. Link between the device and the host supports native atomic operations
  hipDeviceAttributeIntegrated,                       ///< Device is integrated GPU
  hipDeviceAttributeIsMultiGpuBoard,                  ///< Multiple GPU devices.
  hipDeviceAttributeKernelExecTimeout,                ///< Run time limit for kernels executed on the device
  hipDeviceAttributeL2CacheSize,                      ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
  hipDeviceAttributeLocalL1CacheSupported,            ///< caching locals in L1 is supported
  hipDeviceAttributeLuid,                             ///< Cuda only. 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms
  hipDeviceAttributeLuidDeviceNodeMask,               ///< Cuda only. Luid device node mask. Undefined on TCC and non-Windows platforms
  hipDeviceAttributeComputeCapabilityMajor,           ///< Major compute capability version number.
  hipDeviceAttributeManagedMemory,                    ///< Device supports allocating managed memory on this system
  hipDeviceAttributeMaxBlocksPerMultiProcessor,       ///< Cuda only. Max block size per multiprocessor
  hipDeviceAttributeMaxBlockDimX,                     ///< Max block size in width.
  hipDeviceAttributeMaxBlockDimY,                     ///< Max block size in height.
  hipDeviceAttributeMaxBlockDimZ,                     ///< Max block size in depth.
  hipDeviceAttributeMaxGridDimX,                      ///< Max grid size  in width.
  hipDeviceAttributeMaxGridDimY,                      ///< Max grid size  in height.
  hipDeviceAttributeMaxGridDimZ,                      ///< Max grid size  in depth.
  hipDeviceAttributeMaxSurface1D,                     ///< Maximum size of 1D surface.
  hipDeviceAttributeMaxSurface1DLayered,              ///< Cuda only. Maximum dimensions of 1D layered surface.
  hipDeviceAttributeMaxSurface2D,                     ///< Maximum dimension (width, height) of 2D surface.
  hipDeviceAttributeMaxSurface2DLayered,              ///< Cuda only. Maximum dimensions of 2D layered surface.
  hipDeviceAttributeMaxSurface3D,                     ///< Maximum dimension (width, height, depth) of 3D surface.
  hipDeviceAttributeMaxSurfaceCubemap,                ///< Cuda only. Maximum dimensions of Cubemap surface.
  hipDeviceAttributeMaxSurfaceCubemapLayered,         ///< Cuda only. Maximum dimension of Cubemap layered surface.
  hipDeviceAttributeMaxTexture1DWidth,                ///< Maximum size of 1D texture.
  hipDeviceAttributeMaxTexture1DLayered,              ///< Cuda only. Maximum dimensions of 1D layered texture.
  hipDeviceAttributeMaxTexture1DLinear,               ///< Maximum number of elements allocatable in a 1D linear texture.
                                                      ///< Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda.
  hipDeviceAttributeMaxTexture1DMipmap,               ///< Cuda only. Maximum size of 1D mipmapped texture.
  hipDeviceAttributeMaxTexture2DWidth,                ///< Maximum dimension width of 2D texture.
  hipDeviceAttributeMaxTexture2DHeight,               ///< Maximum dimension hight of 2D texture.
  hipDeviceAttributeMaxTexture2DGather,               ///< Cuda only. Maximum dimensions of 2D texture if gather operations  performed.
  hipDeviceAttributeMaxTexture2DLayered,              ///< Cuda only. Maximum dimensions of 2D layered texture.
  hipDeviceAttributeMaxTexture2DLinear,               ///< Cuda only. Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory.
  hipDeviceAttributeMaxTexture2DMipmap,               ///< Cuda only. Maximum dimensions of 2D mipmapped texture.
  hipDeviceAttributeMaxTexture3DWidth,                ///< Maximum dimension width of 3D texture.
  hipDeviceAttributeMaxTexture3DHeight,               ///< Maximum dimension height of 3D texture.
  hipDeviceAttributeMaxTexture3DDepth,                ///< Maximum dimension depth of 3D texture.
  hipDeviceAttributeMaxTexture3DAlt,                  ///< Cuda only. Maximum dimensions of alternate 3D texture.
  hipDeviceAttributeMaxTextureCubemap,                ///< Cuda only. Maximum dimensions of Cubemap texture
  hipDeviceAttributeMaxTextureCubemapLayered,         ///< Cuda only. Maximum dimensions of Cubemap layered texture.
  hipDeviceAttributeMaxThreadsDim,                    ///< Maximum dimension of a block
  hipDeviceAttributeMaxThreadsPerBlock,               ///< Maximum number of threads per block.
  hipDeviceAttributeMaxThreadsPerMultiProcessor,      ///< Maximum resident threads per multiprocessor.
  hipDeviceAttributeMaxPitch,                         ///< Maximum pitch in bytes allowed by memory copies
  hipDeviceAttributeMemoryBusWidth,                   ///< Global memory bus width in bits.
  hipDeviceAttributeMemoryClockRate,                  ///< Peak memory clock frequency in kilohertz.
  hipDeviceAttributeComputeCapabilityMinor,           ///< Minor compute capability version number.
  hipDeviceAttributeMultiGpuBoardGroupID,             ///< Cuda only. Unique ID of device group on the same multi-GPU board
  hipDeviceAttributeMultiprocessorCount,              ///< Number of multiprocessors on the device.
  hipDeviceAttributeUnused1,                          ///< Previously hipDeviceAttributeName
  hipDeviceAttributePageableMemoryAccess,             ///< Device supports coherently accessing pageable memory
                                                      ///< without calling hipHostRegister on it
  hipDeviceAttributePageableMemoryAccessUsesHostPageTables, ///< Device accesses pageable memory via the host's page tables
  hipDeviceAttributePciBusId,                         ///< PCI Bus ID.
  hipDeviceAttributePciDeviceId,                      ///< PCI Device ID.
  hipDeviceAttributePciDomainID,                      ///< PCI Domain ID.
  hipDeviceAttributePersistingL2CacheMaxSize,         ///< Cuda11 only. Maximum l2 persisting lines capacity in bytes
  hipDeviceAttributeMaxRegistersPerBlock,             ///< 32-bit registers available to a thread block. This number is shared
                                                      ///< by all thread blocks simultaneously resident on a multiprocessor.
  hipDeviceAttributeMaxRegistersPerMultiprocessor,    ///< 32-bit registers available per block.
  hipDeviceAttributeReservedSharedMemPerBlock,        ///< Cuda11 only. Shared memory reserved by CUDA driver per block.
  hipDeviceAttributeMaxSharedMemoryPerBlock,          ///< Maximum shared memory available per block in bytes.
  hipDeviceAttributeSharedMemPerBlockOptin,           ///< Cuda only. Maximum shared memory per block usable by special opt in.
  hipDeviceAttributeSharedMemPerMultiprocessor,       ///< Cuda only. Shared memory available per multiprocessor.
  hipDeviceAttributeSingleToDoublePrecisionPerfRatio, ///< Cuda only. Performance ratio of single precision to double precision.
  hipDeviceAttributeStreamPrioritiesSupported,        ///< Cuda only. Whether to support stream priorities.
  hipDeviceAttributeSurfaceAlignment,                 ///< Cuda only. Alignment requirement for surfaces
  hipDeviceAttributeTccDriver,                        ///< Cuda only. Whether device is a Tesla device using TCC driver
  hipDeviceAttributeTextureAlignment,                 ///< Alignment requirement for textures
  hipDeviceAttributeTexturePitchAlignment,            ///< Pitch alignment requirement for 2D texture references bound to pitched memory;
  hipDeviceAttributeTotalConstantMemory,              ///< Constant memory size in bytes.
  hipDeviceAttributeTotalGlobalMem,                   ///< Global memory available on devicice.
  hipDeviceAttributeUnifiedAddressing,                ///< Cuda only. An unified address space shared with the host.
  hipDeviceAttributeUnused2,                          ///< Previously hipDeviceAttributeUuid
  hipDeviceAttributeWarpSize,                         ///< Warp size in threads.
  hipDeviceAttributeMemoryPoolsSupported,             ///< Device supports HIP Stream Ordered Memory Allocator
  hipDeviceAttributeVirtualMemoryManagementSupported, ///< Device supports HIP virtual memory management
  hipDeviceAttributeHostRegisterSupported,            ///< Can device support host memory registration via hipHostRegister
  hipDeviceAttributeMemoryPoolSupportedHandleTypes,   ///< Supported handle mask for HIP Stream Ordered Memory Allocator
  hipDeviceAttributeCudaCompatibleEnd = 9999,
  hipDeviceAttributeAmdSpecificBegin = 10000,
  hipDeviceAttributeClockInstructionRate = hipDeviceAttributeAmdSpecificBegin,  ///< Frequency in khz of the timer used by the device-side "clock*"
  hipDeviceAttributeUnused3,                                  ///< Previously hipDeviceAttributeArch
  hipDeviceAttributeMaxSharedMemoryPerMultiprocessor,         ///< Maximum Shared Memory PerMultiprocessor.
  hipDeviceAttributeUnused4,                                  ///< Previously hipDeviceAttributeGcnArch
  hipDeviceAttributeUnused5,                                  ///< Previously hipDeviceAttributeGcnArchName
  hipDeviceAttributeHdpMemFlushCntl,                          ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register
  hipDeviceAttributeHdpRegFlushCntl,                          ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register
  hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc,      ///< Supports cooperative launch on multiple
                                                              ///< devices with unmatched functions
  hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim,   ///< Supports cooperative launch on multiple
                                                              ///< devices with unmatched grid dimensions
  hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim,  ///< Supports cooperative launch on multiple
                                                              ///< devices with unmatched block dimensions
  hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, ///< Supports cooperative launch on multiple
                                                              ///< devices with unmatched shared memories
  hipDeviceAttributeIsLargeBar,                               ///< Whether it is LargeBar
  hipDeviceAttributeAsicRevision,                             ///< Revision of the GPU in this device
  hipDeviceAttributeCanUseStreamWaitValue,                    ///< '1' if Device supports hipStreamWaitValue32() and
                                                              ///< hipStreamWaitValue64() , '0' otherwise.
  hipDeviceAttributeAmdSpecificEnd = 19999,
  hipDeviceAttributeVendorSpecificBegin = 20000,
  hipDeviceAttribute
  // Extended attributes for vendors
 } hipDeviceAttribute_t;
 typedef struct hipUUID_t {
    char bytes[16];
 } hipUUID;
 typedef struct hipDeviceProp_t {
    char name[256];                   ///< Device name.
    hipUUID uuid;                     ///< UUID of a device
    char luid[8];                     ///< 8-byte unique identifier. Only valid on windows
    unsigned int luidDeviceNodeMask;  ///< LUID node mask
    size_t totalGlobalMem;            ///< Size of global memory region (in bytes).
    size_t sharedMemPerBlock;         ///< Size of shared memory per block (in bytes).
    int regsPerBlock;                 ///< Registers per block.
    int warpSize;                     ///< Warp size.
    size_t memPitch;                  ///< Maximum pitch in bytes allowed by memory copies
                                      ///< pitched memory
    int maxThreadsPerBlock;           ///< Max work items per work group or workgroup max size.
    int maxThreadsDim[3];             ///< Max number of threads in each dimension (XYZ) of a block.
    int maxGridSize[3];               ///< Max grid dimensions (XYZ).
    int clockRate;                    ///< Max clock frequency of the multiProcessors in khz.
    size_t totalConstMem;             ///< Size of shared constant memory region on the device
                                      ///< (in bytes).
    int major;  ///< Major compute capability.  On HCC, this is an approximation and features may
                ///< differ from CUDA CC.  See the arch feature flags for portable ways to query
                ///< feature caps.
    int minor;  ///< Minor compute capability.  On HCC, this is an approximation and features may
                ///< differ from CUDA CC.  See the arch feature flags for portable ways to query
                ///< feature caps.
    size_t textureAlignment;       ///< Alignment requirement for textures
    size_t texturePitchAlignment;  ///< Pitch alignment requirement for texture references bound to
    int deviceOverlap;             ///< Deprecated. Use asyncEngineCount instead
    int multiProcessorCount;       ///< Number of multi-processors (compute units).
    int kernelExecTimeoutEnabled;  ///< Run time limit for kernels executed on the device
    int integrated;                ///< APU vs dGPU
    int canMapHostMemory;          ///< Check whether HIP can map host memory
    int computeMode;               ///< Compute mode.
    int maxTexture1D;              ///< Maximum number of elements in 1D images
    int maxTexture1DMipmap;        ///< Maximum 1D mipmap texture size
    int maxTexture1DLinear;        ///< Maximum size for 1D textures bound to linear memory
    int maxTexture2D[2];  ///< Maximum dimensions (width, height) of 2D images, in image elements
    int maxTexture2DMipmap[2];  ///< Maximum number of elements in 2D array mipmap of images
    int maxTexture2DLinear[3];  ///< Maximum 2D tex dimensions if tex are bound to pitched memory
    int maxTexture2DGather[2];  ///< Maximum 2D tex dimensions if gather has to be performed
    int maxTexture3D[3];  ///< Maximum dimensions (width, height, depth) of 3D images, in image
                          ///< elements
    int maxTexture3DAlt[3];           ///< Maximum alternate 3D texture dims
    int maxTextureCubemap;            ///< Maximum cubemap texture dims
    int maxTexture1DLayered[2];       ///< Maximum number of elements in 1D array images
    int maxTexture2DLayered[3];       ///< Maximum number of elements in 2D array images
    int maxTextureCubemapLayered[2];  ///< Maximum cubemaps layered texture dims
    int maxSurface1D;                 ///< Maximum 1D surface size
    int maxSurface2D[2];              ///< Maximum 2D surface size
    int maxSurface3D[3];              ///< Maximum 3D surface size
    int maxSurface1DLayered[2];       ///< Maximum 1D layered surface size
    int maxSurface2DLayered[3];       ///< Maximum 2D layared surface size
    int maxSurfaceCubemap;            ///< Maximum cubemap surface size
    int maxSurfaceCubemapLayered[2];  ///< Maximum cubemap layered surface size
    size_t surfaceAlignment;          ///< Alignment requirement for surface
    int concurrentKernels;         ///< Device can possibly execute multiple kernels concurrently.
    int ECCEnabled;                ///< Device has ECC support enabled
    int pciBusID;                  ///< PCI Bus ID.
    int pciDeviceID;               ///< PCI Device ID.
    int pciDomainID;               ///< PCI Domain ID
    int tccDriver;                 ///< 1:If device is Tesla device using TCC driver, else 0
    int asyncEngineCount;          ///< Number of async engines
    int unifiedAddressing;         ///< Does device and host share unified address space
    int memoryClockRate;           ///< Max global memory clock frequency in khz.
    int memoryBusWidth;            ///< Global memory bus width in bits.
    int l2CacheSize;               ///< L2 cache size.
    int persistingL2CacheMaxSize;  ///< Device's max L2 persisting lines in bytes
    int maxThreadsPerMultiProcessor;    ///< Maximum resident threads per multi-processor.
    int streamPrioritiesSupported;      ///< Device supports stream priority
    int globalL1CacheSupported;         ///< Indicates globals are cached in L1
    int localL1CacheSupported;          ///< Locals are cahced in L1
    size_t sharedMemPerMultiprocessor;  ///< Amount of shared memory available per multiprocessor.
    int regsPerMultiprocessor;          ///< registers available per multiprocessor
    int managedMemory;         ///< Device supports allocating managed memory on this system
    int isMultiGpuBoard;       ///< 1 if device is on a multi-GPU board, 0 if not.
    int multiGpuBoardGroupID;  ///< Unique identifier for a group of devices on same multiboard GPU
    int hostNativeAtomicSupported;         ///< Link between host and device supports native atomics
    int singleToDoublePrecisionPerfRatio;  ///< Deprecated. CUDA only.
    int pageableMemoryAccess;              ///< Device supports coherently accessing pageable memory
                                           ///< without calling hipHostRegister on it
    int concurrentManagedAccess;  ///< Device can coherently access managed memory concurrently with
                                  ///< the CPU
    int computePreemptionSupported;         ///< Is compute preemption supported on the device
    int canUseHostPointerForRegisteredMem;  ///< Device can access host registered memory with same
                                            ///< address as the host
    int cooperativeLaunch;                  ///< HIP device supports cooperative launch
    int cooperativeMultiDeviceLaunch;       ///< HIP device supports cooperative launch on multiple
                                            ///< devices
    size_t
        sharedMemPerBlockOptin;  ///< Per device m ax shared mem per block usable by special opt in
    int pageableMemoryAccessUsesHostPageTables;  ///< Device accesses pageable memory via the host's
                                                 ///< page tables
    int directManagedMemAccessFromHost;  ///< Host can directly access managed memory on the device
                                         ///< without migration
    int maxBlocksPerMultiProcessor;      ///< Max number of blocks on CU
    int accessPolicyMaxWindowSize;       ///< Max value of access policy window
    size_t reservedSharedMemPerBlock;    ///< Shared memory reserved by driver per block
    int hostRegisterSupported;           ///< Device supports hipHostRegister
    int sparseHipArraySupported;         ///< Indicates if device supports sparse hip arrays
    int hostRegisterReadOnlySupported;   ///< Device supports using the hipHostRegisterReadOnly flag
                                         ///< with hipHostRegistger
    int timelineSemaphoreInteropSupported;  ///< Indicates external timeline semaphore support
    int memoryPoolsSupported;  ///< Indicates if device supports hipMallocAsync and hipMemPool APIs
    int gpuDirectRDMASupported;                    ///< Indicates device support of RDMA APIs
    unsigned int gpuDirectRDMAFlushWritesOptions;  ///< Bitmask to be interpreted according to
                                                   ///< hipFlushGPUDirectRDMAWritesOptions
    int gpuDirectRDMAWritesOrdering;               ///< value of hipGPUDirectRDMAWritesOrdering
    unsigned int
        memoryPoolSupportedHandleTypes;  ///< Bitmask of handle types support with mempool based IPC
    int deferredMappingHipArraySupported;  ///< Device supports deferred mapping HIP arrays and HIP
                                           ///< mipmapped arrays
    int ipcEventSupported;                 ///< Device supports IPC events
    int clusterLaunch;                     ///< Device supports cluster launch
    int unifiedFunctionPointers;           ///< Indicates device supports unified function pointers
    int reserved[63];                      ///< CUDA Reserved.
    int hipReserved[32];  ///< Reserved for adding new entries for HIP/CUDA.
    /* HIP Only struct members */
    char gcnArchName[256];                    ///< AMD GCN Arch Name. HIP Only.
    size_t maxSharedMemoryPerMultiProcessor;  ///< Maximum Shared Memory Per CU. HIP Only.
    int clockInstructionRate;  ///< Frequency in khz of the timer used by the device-side "clock*"
                               ///< instructions.  New for HIP.
    hipDeviceArch_t arch;      ///< Architectural feature flags.  New for HIP.
    unsigned int* hdpMemFlushCntl;            ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
    unsigned int* hdpRegFlushCntl;            ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
    int cooperativeMultiDeviceUnmatchedFunc;  ///< HIP device supports cooperative launch on
                                              ///< multiple
                                              /// devices with unmatched functions
    int cooperativeMultiDeviceUnmatchedGridDim;    ///< HIP device supports cooperative launch on
                                                   ///< multiple
                                                   /// devices with unmatched grid dimensions
    int cooperativeMultiDeviceUnmatchedBlockDim;   ///< HIP device supports cooperative launch on
                                                   ///< multiple
                                                   /// devices with unmatched block dimensions
    int cooperativeMultiDeviceUnmatchedSharedMem;  ///< HIP device supports cooperative launch on
                                                   ///< multiple
                                                   /// devices with unmatched shared memories
    int isLargeBar;                                ///< 1: if it is a large PCI bar device, else 0
    int asicRevision;                              ///< Revision of the GPU in this device
 } hipDeviceProp_t;
 typedef enum HIPpointer_attribute_enum {
  HIP_POINTER_ATTRIBUTE_CONTEXT = 1,   ///< The context on which a pointer was allocated
                                         ///< @warning - not supported in HIP
    HIP_POINTER_ATTRIBUTE_MEMORY_TYPE,   ///< memory type describing location of a pointer
    HIP_POINTER_ATTRIBUTE_DEVICE_POINTER,///< address at which the pointer is allocated on device
    HIP_POINTER_ATTRIBUTE_HOST_POINTER,  ///< address at which the pointer is allocated on host
    HIP_POINTER_ATTRIBUTE_P2P_TOKENS,    ///< A pair of tokens for use with linux kernel interface
                                         ///< @warning - not supported in HIP
    HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS,   ///< Synchronize every synchronous memory operation
                                         ///< initiated on this region
    HIP_POINTER_ATTRIBUTE_BUFFER_ID,     ///< Unique ID for an allocated memory region
    HIP_POINTER_ATTRIBUTE_IS_MANAGED,    ///< Indicates if the pointer points to managed memory
    HIP_POINTER_ATTRIBUTE_DEVICE_ORDINAL,///< device ordinal of a device on which a pointer
                                         ///< was allocated or registered
    HIP_POINTER_ATTRIBUTE_IS_LEGACY_HIP_IPC_CAPABLE, ///< if this pointer maps to an allocation
                                                     ///< that is suitable for hipIpcGetMemHandle
                                                     ///< @warning - not supported in HIP
    HIP_POINTER_ATTRIBUTE_RANGE_START_ADDR,///< Starting address for this requested pointer
    HIP_POINTER_ATTRIBUTE_RANGE_SIZE,      ///< Size of the address range for this requested pointer
    HIP_POINTER_ATTRIBUTE_MAPPED,          ///< tells if this pointer is in a valid address range
                                           ///< that is mapped to a backing allocation
    HIP_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES,///< Bitmask of allowed hipmemAllocationHandleType
                                           ///< for this allocation @warning - not supported in HIP
    HIP_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE, ///< returns if the memory referenced by
                                           ///< this pointer can be used with the GPUDirect RDMA API
                                           ///< @warning - not supported in HIP
    HIP_POINTER_ATTRIBUTE_ACCESS_FLAGS,    ///< Returns the access flags the device associated with
                                           ///< for the corresponding memory referenced by the ptr
    HIP_POINTER_ATTRIBUTE_MEMPOOL_HANDLE   ///< Returns the mempool handle for the allocation if
                                           ///< it was allocated from a mempool
                                           ///< @warning - not supported in HIP
 } HIPpointer_attribute;
 typedef enum hipComputeMode {
  hipComputeModeDefault = 0,
  hipComputeModeExclusive = 1,
  hipComputeModeProhibited = 2,
  hipComputeModeExclusiveProcess = 3,
 } hipComputeMode;
 typedef struct HIP_MEMCPY3D {
  size_t srcXInBytes;
  size_t srcY;
  size_t srcZ;
  size_t srcLOD;
  hipMemoryType srcMemoryType;
  const void* srcHost;
  hipDeviceptr_t srcDevice;
  hArray srcArray;
  size_t srcPitch;
  size_t srcHeight;
  size_t dstXInBytes;
  size_t dstY;
  size_t dstZ;
  size_t dstLOD;
  hipMemoryType dstMemoryType;
  void* dstHost;
  hipDeviceptr_t dstDevice;
  hArray dstArray;
  size_t dstPitch;
  size_t dstHeight;
  size_t WidthInBytes;
  size_t Height;
  size_t Depth;
 } HIP_MEMCPY3D;
 #if 0 //version 5 and 6 are the same but the structure already in hipew use doesn't match the structure in either version! but matches CUDA ew
 //need to check further
 typedef struct hipTextureDesc {
    enum hipTextureAddressMode addressMode[3];  // Texture address mode for up to 3 dimensions
    enum hipTextureFilterMode filterMode;
    enum hipTextureReadMode readMode;
    int sRGB;  // Perform sRGB->linear conversion during texture read
    float borderColor[4];
    int normalizedCoords;
    unsigned int maxAnisotropy;
    enum hipTextureFilterMode mipmapFilterMode;
    float mipmapLevelBias;
    float minMipmapLevelClamp;
    float maxMipmapLevelClamp;
 }hipTextureDesc;
 #endif
 typedef enum hipExternalMemoryHandleType_enum {
  hipExternalMemoryHandleTypeOpaqueFd = 1,
  hipExternalMemoryHandleTypeOpaqueWin32 = 2,
  hipExternalMemoryHandleTypeOpaqueWin32Kmt = 3,
  hipExternalMemoryHandleTypeD3D12Heap = 4,
  hipExternalMemoryHandleTypeD3D12Resource = 5,
  hipExternalMemoryHandleTypeD3D11Resource = 6,
  hipExternalMemoryHandleTypeD3D11ResourceKmt = 7,
  hipExternalMemoryHandleTypeNvSciBuf         = 8
 } hipExternalMemoryHandleType;
 typedef struct hipExternalMemoryHandleDesc_st {
  hipExternalMemoryHandleType type;
  union {
    int fd;
    struct {
      void *handle;
      const void *name;
    } win32;
    const void *nvSciBufObject;
  } handle;
  unsigned long long size;
  unsigned int flags;
  unsigned int reserved[16];
 } hipExternalMemoryHandleDesc;
 typedef struct hipExternalMemoryBufferDesc_st {
  unsigned long long offset;
  unsigned long long size;
  unsigned int flags;
  unsigned int reserved[16];
 } hipExternalMemoryBufferDesc;
 typedef hipError_t HIPAPI thipGetDevicePropertiesR0600(hipDeviceProp_t* props, int deviceId);
 extern thipGetDevicePropertiesR0600 *hipGetDevicePropertiesR0600;
 #define hipGetDeviceProperties hipGetDevicePropertiesR0600
 #endif  /* __HIPEW_H__ */
--- a/extern/hipew/src/hipew.c
+++ b/extern/hipew/src/hipew.c
@ -38,7 +38,11 @@ thipDriverGetVersion *hipDriverGetVersion;
 thipRuntimeGetVersion *hipRuntimeGetVersion;
 thipGetDevice *hipGetDevice;
 thipGetDeviceCount *hipGetDeviceCount;
-thipGetDeviceProperties *hipGetDeviceProperties;
+#ifdef WITH_HIP_SDK_5
  thipGetDeviceProperties *hipGetDeviceProperties;
 #else
  thipGetDevicePropertiesR0600 *hipGetDevicePropertiesR0600;
 #endif
 thipDeviceGet* hipDeviceGet;
 thipDeviceGetName *hipDeviceGetName;
 thipDeviceGetAttribute *hipDeviceGetAttribute;
@ -233,19 +237,25 @@ static int hipewHipInit(void) {
  /* Library paths. */
 #ifdef _WIN32
  /* Expected in C:/Windows/System32 or similar, no path needed. */
-  const char *hip_paths[] = {"amdhip64.dll", "amdhip64_6.dll", NULL};
+  const char *hip_paths[] = {WIN_DRIVER, NULL};
 #elif defined(__APPLE__)
  /* Default installation path. */
  const char *hip_paths[] = {"", NULL};
 #else
  /* ROCm 6 changes paths from /opt/rocm/hip/lib to /opt/rocm/lib, so
   * search for libraries there. It still includes .so.5. */
  #ifdef WITH_HIP_SDK_5
      const char *hip_paths[] = {"libamdhip64.so.5",
                               "/opt/rocm/lib/libamdhip64.so.5",
                               "/opt/rocm/hip/lib/libamdhip64.so.5",
-                             "libamdhip64.so",
+                                NULL};
-                             "/opt/rocm/lib/libamdhip64.so",
+  #else
-                             "/opt/rocm/hip/lib/libamdhip64.so", NULL};
+  const char *hip_paths[] = {"libamdhip64.so.6",
                              "/opt/rocm/lib/libamdhip64.so.6",
                              "/opt/rocm/hip/lib/libamdhip64.so.6",
                               NULL};
  #endif
 #endif
  static int initialized = 0;
  static int result = 0;
@ -280,6 +290,11 @@ static int hipewHipInit(void) {
  }
  /* Fetch all function pointers. */
 #ifdef WITH_HIP_SDK_5
  HIP_LIBRARY_FIND_CHECKED(hipGetDeviceProperties);
 #else
  HIP_LIBRARY_FIND_CHECKED(hipGetDevicePropertiesR0600);
 #endif
  HIP_LIBRARY_FIND_CHECKED(hipGetErrorName);
  HIP_LIBRARY_FIND_CHECKED(hipGetErrorString);
  HIP_LIBRARY_FIND_CHECKED(hipGetLastError);
@ -288,7 +303,6 @@ static int hipewHipInit(void) {
  HIP_LIBRARY_FIND_CHECKED(hipRuntimeGetVersion);
  HIP_LIBRARY_FIND_CHECKED(hipGetDevice);
  HIP_LIBRARY_FIND_CHECKED(hipGetDeviceCount);
  HIP_LIBRARY_FIND_CHECKED(hipGetDeviceProperties);
  HIP_LIBRARY_FIND_CHECKED(hipDeviceGet);
  HIP_LIBRARY_FIND_CHECKED(hipDeviceGetName);
  HIP_LIBRARY_FIND_CHECKED(hipDeviceGetAttribute);
@ -411,28 +425,6 @@ static int hipewHipInit(void) {
  return result;
 }
 hipMemoryType get_hip_memory_type(hipMemoryType mem_type, int runtime_version) {
  /** Convert hipMemoryType for backwards compatibility with rocm5/6. 
   * This can be removed when support for ROCm 5 is removed. */
  /* If version is 5 we need to use the old enum vals (60000000 is start of ROCm 6) */
  if (runtime_version > 60000000) {
    return mem_type;
  }
  switch (mem_type) {
    case hipMemoryTypeHost:
      return hipMemoryTypeHost_v5;
    case hipMemoryTypeDevice:
      return hipMemoryTypeDevice_v5;
    case hipMemoryTypeArray:
      return hipMemoryTypeArray_v5;
    case hipMemoryTypeUnified:
      return hipMemoryTypeUnified_v5;
    default:
      return hipMemoryTypeUnregistered;  /* This should not happen. */
  }
 }
 int hipewInit(hipuint32_t flags) {
  int result = HIPEW_SUCCESS;
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@ -1749,7 +1749,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
                    col.label(text=rpt_("and AMD Radeon Pro %s driver or newer") % driver_version,
                              icon='BLANK1', translate=False)
                elif sys.platform.startswith("linux"):
-                    driver_version = "22.10"
+                    driver_version = "23.40"
                    col.label(
                        text=rpt_("Requires AMD GPU with RDNA architecture"),
                        icon='BLANK1',
--- a/intern/cycles/device/hip/device_impl.cpp
+++ b/intern/cycles/device/hip/device_impl.cpp
@ -313,16 +313,17 @@ string HIPDevice::compile_kernel(const uint kernel_features, const char *name, c
    return string();
  }
-  const int hipcc_hip_version = hipewCompilerVersion();
+#  ifdef WITH_HIP_SDK_5
-  VLOG_INFO << "Found hipcc " << hipcc << ", HIP version " << hipcc_hip_version << ".";
+  int hip_major_ver = hipRuntimeVersion / 10000000;
-  if (hipcc_hip_version < 40) {
+  if (hip_major_ver > 5) {
-    printf(
+    set_error(string_printf(
-        "Unsupported HIP version %d.%d detected, "
+        "HIP Runtime version %d does not work with kernels compiled with HIP SDK 5\n",
-        "you need HIP 4.0 or newer.\n",
+        hip_major_ver));
        hipcc_hip_version / 10,
        hipcc_hip_version % 10);
    return string();
  }
 #  endif
  const int hipcc_hip_version = hipewCompilerVersion();
  VLOG_INFO << "Found hipcc " << hipcc << ", HIP version " << hipcc_hip_version << ".";
  double starttime = time_dt();
@ -755,9 +756,9 @@ void HIPDevice::tex_alloc(device_texture &mem)
    HIP_MEMCPY3D param;
    memset(&param, 0, sizeof(HIP_MEMCPY3D));
-    param.dstMemoryType = get_memory_type(hipMemoryTypeArray);
+    param.dstMemoryType = hipMemoryTypeArray;
    param.dstArray = array_3d;
-    param.srcMemoryType = get_memory_type(hipMemoryTypeHost);
+    param.srcMemoryType = hipMemoryTypeHost;
    param.srcHost = mem.host_pointer;
    param.srcPitch = src_pitch;
    param.WidthInBytes = param.srcPitch;
@ -787,10 +788,10 @@ void HIPDevice::tex_alloc(device_texture &mem)
    hip_Memcpy2D param;
    memset(&param, 0, sizeof(param));
-    param.dstMemoryType = get_memory_type(hipMemoryTypeDevice);
+    param.dstMemoryType = hipMemoryTypeDevice;
    param.dstDevice = mem.device_pointer;
    param.dstPitch = dst_pitch;
-    param.srcMemoryType = get_memory_type(hipMemoryTypeHost);
+    param.srcMemoryType = hipMemoryTypeHost;
    param.srcHost = mem.host_pointer;
    param.srcPitch = src_pitch;
    param.WidthInBytes = param.srcPitch;
@ -978,11 +979,6 @@ int HIPDevice::get_device_default_attribute(hipDeviceAttribute_t attribute, int
  return value;
 }
 hipMemoryType HIPDevice::get_memory_type(hipMemoryType mem_type)
 {
  return get_hip_memory_type(mem_type, hipRuntimeVersion);
 }
 CCL_NAMESPACE_END
 #endif
--- a/intern/cycles/device/hip/device_impl.h
+++ b/intern/cycles/device/hip/device_impl.h
@ -103,7 +103,6 @@ class HIPDevice : public GPUDevice {
 protected:
  bool get_device_attribute(hipDeviceAttribute_t attribute, int *value);
  int get_device_default_attribute(hipDeviceAttribute_t attribute, int default_value);
  hipMemoryType get_memory_type(hipMemoryType mem_type);
 };
 CCL_NAMESPACE_END
--- a/lib/linux_x64
+++ b/lib/linux_x64
@ -1 +1 @@
-Subproject commit a2917f7e221fc1f81c4f018788ff51c619417413
+Subproject commit 4f7c423914f5c422bd4709e9e5a969fd87bb855e
		`@ -1 +1 @@`
			`Subproject commit a2917f7e221fc1f81c4f018788ff51c619417413`				`Subproject commit 4f7c423914f5c422bd4709e9e5a969fd87bb855e`